diff options
1124 files changed, 62471 insertions, 35237 deletions
diff --git a/Documentation/ABI/testing/sysfs-kernel-dmabuf-buffers b/Documentation/ABI/testing/sysfs-kernel-dmabuf-buffers new file mode 100644 index 000000000000..5d3bc997dc64 --- /dev/null +++ b/Documentation/ABI/testing/sysfs-kernel-dmabuf-buffers @@ -0,0 +1,24 @@ +What: /sys/kernel/dmabuf/buffers +Date: May 2021 +KernelVersion: v5.13 +Contact: Hridya Valsaraju <hridya@google.com> +Description: The /sys/kernel/dmabuf/buffers directory contains a + snapshot of the internal state of every DMA-BUF. + /sys/kernel/dmabuf/buffers/<inode_number> will contain the + statistics for the DMA-BUF with the unique inode number + <inode_number> +Users: kernel memory tuning/debugging tools + +What: /sys/kernel/dmabuf/buffers/<inode_number>/exporter_name +Date: May 2021 +KernelVersion: v5.13 +Contact: Hridya Valsaraju <hridya@google.com> +Description: This file is read-only and contains the name of the exporter of + the DMA-BUF. + +What: /sys/kernel/dmabuf/buffers/<inode_number>/size +Date: May 2021 +KernelVersion: v5.13 +Contact: Hridya Valsaraju <hridya@google.com> +Description: This file is read-only and specifies the size of the DMA-BUF in + bytes. diff --git a/Documentation/devicetree/bindings/display/bridge/simple-bridge.yaml b/Documentation/devicetree/bindings/display/bridge/simple-bridge.yaml index 6c7b577fd471..43cf4df9811a 100644 --- a/Documentation/devicetree/bindings/display/bridge/simple-bridge.yaml +++ b/Documentation/devicetree/bindings/display/bridge/simple-bridge.yaml @@ -22,6 +22,9 @@ properties: - ti,ths8134a - ti,ths8134b - const: ti,ths8134 + - items: + - const: corpro,gm7123 + - const: adi,adv7123 - enum: - adi,adv7123 - dumb-vga-dac diff --git a/Documentation/devicetree/bindings/display/bridge/ti,sn65dsi86.yaml b/Documentation/devicetree/bindings/display/bridge/ti,sn65dsi86.yaml index 26932d2e86ab..1c2daf7c24cc 100644 --- a/Documentation/devicetree/bindings/display/bridge/ti,sn65dsi86.yaml +++ b/Documentation/devicetree/bindings/display/bridge/ti,sn65dsi86.yaml @@ -70,6 +70,9 @@ properties: const: 1 description: See ../../pwm/pwm.yaml for description of the cell formats. + aux-bus: + $ref: /schemas/display/dp-aux-bus.yaml# + ports: $ref: /schemas/graph.yaml#/properties/ports @@ -150,7 +153,6 @@ properties: required: - compatible - reg - - enable-gpios - vccio-supply - vpll-supply - vcca-supply @@ -201,11 +203,26 @@ examples: port@1 { reg = <1>; - endpoint { + sn65dsi86_out: endpoint { remote-endpoint = <&panel_in_edp>; }; }; }; + + aux-bus { + panel { + compatible = "boe,nv133fhm-n62"; + power-supply = <&pp3300_dx_edp>; + backlight = <&backlight>; + hpd-gpios = <&sn65dsi86_bridge 2 GPIO_ACTIVE_HIGH>; + + port { + panel_in_edp: endpoint { + remote-endpoint = <&sn65dsi86_out>; + }; + }; + }; + }; }; }; - | diff --git a/Documentation/devicetree/bindings/display/dp-aux-bus.yaml b/Documentation/devicetree/bindings/display/dp-aux-bus.yaml new file mode 100644 index 000000000000..5e4afe9f98fb --- /dev/null +++ b/Documentation/devicetree/bindings/display/dp-aux-bus.yaml @@ -0,0 +1,37 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/display/dp-aux-bus.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: DisplayPort AUX bus + +maintainers: + - Douglas Anderson <dianders@chromium.org> + +description: + DisplayPort controllers provide a control channel to the sinks that + are hooked up to them. This is the DP AUX bus. Over the DP AUX bus + we can query properties about a sink and also configure it. In + particular, DP sinks support DDC over DP AUX which allows tunneling + a standard I2C DDC connection over the AUX channel. + + To model this relationship, DP sinks should be placed as children + of the DP controller under the "aux-bus" node. + + At the moment, this binding only handles the eDP case. It is + possible it will be extended in the future to handle the DP case. + For DP, presumably a connector would be listed under the DP AUX + bus instead of a panel. + +properties: + $nodename: + const: "aux-bus" + + panel: + $ref: panel/panel-common.yaml# + +additionalProperties: false + +required: + - panel diff --git a/Documentation/devicetree/bindings/display/mediatek/mediatek,dsi.txt b/Documentation/devicetree/bindings/display/mediatek/mediatek,dsi.txt index 8238a86686be..d30428b9fb33 100644 --- a/Documentation/devicetree/bindings/display/mediatek/mediatek,dsi.txt +++ b/Documentation/devicetree/bindings/display/mediatek/mediatek,dsi.txt @@ -7,7 +7,7 @@ channel output. Required properties: - compatible: "mediatek,<chip>-dsi" -- the supported chips are mt2701, mt7623, mt8173 and mt8183. +- the supported chips are mt2701, mt7623, mt8167, mt8173 and mt8183. - reg: Physical base address and length of the controller's registers - interrupts: The interrupt signal from the function block. - clocks: device clocks diff --git a/Documentation/devicetree/bindings/display/msm/dsi-controller-main.yaml b/Documentation/devicetree/bindings/display/msm/dsi-controller-main.yaml index 76348b71f736..760eec6b0db1 100644 --- a/Documentation/devicetree/bindings/display/msm/dsi-controller-main.yaml +++ b/Documentation/devicetree/bindings/display/msm/dsi-controller-main.yaml @@ -64,6 +64,18 @@ properties: Indicates if the DSI controller is driving a panel which needs 2 DSI links. + assigned-clocks: + minItems: 2 + maxItems: 2 + description: | + Parents of "byte" and "pixel" for the given platform. + + assigned-clock-parents: + minItems: 2 + maxItems: 2 + description: | + The Byte clock and Pixel clock PLL outputs provided by a DSI PHY block. + power-domains: maxItems: 1 @@ -119,6 +131,8 @@ required: - clock-names - phys - phy-names + - assigned-clocks + - assigned-clock-parents - power-domains - operating-points-v2 - ports @@ -159,6 +173,9 @@ examples: phys = <&dsi0_phy>; phy-names = "dsi"; + assigned-clocks = <&dispcc DISP_CC_MDSS_BYTE0_CLK_SRC>, <&dispcc DISP_CC_MDSS_PCLK0_CLK_SRC>; + assigned-clock-parents = <&dsi_phy 0>, <&dsi_phy 1>; + power-domains = <&rpmhpd SC7180_CX>; operating-points-v2 = <&dsi_opp_table>; diff --git a/Documentation/devicetree/bindings/display/msm/dsi-phy-7nm.yaml b/Documentation/devicetree/bindings/display/msm/dsi-phy-7nm.yaml new file mode 100644 index 000000000000..4265399bb154 --- /dev/null +++ b/Documentation/devicetree/bindings/display/msm/dsi-phy-7nm.yaml @@ -0,0 +1,72 @@ +# SPDX-License-Identifier: GPL-2.0-only or BSD-2-Clause +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/display/msm/dsi-phy-7nm.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Qualcomm Display DSI 7nm PHY + +maintainers: + - Jonathan Marek <jonathan@marek.ca> + +allOf: + - $ref: dsi-phy-common.yaml# + +properties: + compatible: + oneOf: + - const: qcom,dsi-phy-7nm + - const: qcom,dsi-phy-7nm-8150 + - const: qcom,sc7280-dsi-phy-7nm + + reg: + items: + - description: dsi phy register set + - description: dsi phy lane register set + - description: dsi pll register set + + reg-names: + items: + - const: dsi_phy + - const: dsi_phy_lane + - const: dsi_pll + + vdds-supply: + description: | + Connected to VDD_A_DSI_PLL_0P9 pin (or VDDA_DSI{0,1}_PLL_0P9 for sm8150) + + phy-type: + description: D-PHY (default) or C-PHY mode + enum: [ 10, 11 ] + default: 10 + +required: + - compatible + - reg + - reg-names + - vdds-supply + +unevaluatedProperties: false + +examples: + - | + #include <dt-bindings/clock/qcom,dispcc-sm8250.h> + #include <dt-bindings/clock/qcom,rpmh.h> + + dsi-phy@ae94400 { + compatible = "qcom,dsi-phy-7nm"; + reg = <0x0ae94400 0x200>, + <0x0ae94600 0x280>, + <0x0ae94900 0x260>; + reg-names = "dsi_phy", + "dsi_phy_lane", + "dsi_pll"; + + #clock-cells = <1>; + #phy-cells = <0>; + + vdds-supply = <&vreg_l5a_0p88>; + clocks = <&dispcc DISP_CC_MDSS_AHB_CLK>, + <&rpmhcc RPMH_CXO_CLK>; + clock-names = "iface", "ref"; + }; diff --git a/Documentation/devicetree/bindings/display/panel/ilitek,ili9341.yaml b/Documentation/devicetree/bindings/display/panel/ilitek,ili9341.yaml new file mode 100644 index 000000000000..2ed010f91e2d --- /dev/null +++ b/Documentation/devicetree/bindings/display/panel/ilitek,ili9341.yaml @@ -0,0 +1,78 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/display/panel/ilitek,ili9341.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Ilitek-9341 Display Panel + +maintainers: + - Dillon Min <dillon.minfei@gmail.com> + +description: | + Ilitek ILI9341 TFT panel driver with SPI control bus + This is a driver for 320x240 TFT panels, accepting a rgb input + streams with 16 bits or 18 bits. + +allOf: + - $ref: panel-common.yaml# + +properties: + compatible: + items: + - enum: + # ili9341 240*320 Color on stm32f429-disco board + - st,sf-tc240t-9370-t + - const: ilitek,ili9341 + + reg: true + + dc-gpios: + maxItems: 1 + description: Display data/command selection (D/CX) of this DBI panel + + spi-3wire: true + + spi-max-frequency: + const: 10000000 + + port: true + + vci-supply: + description: Analog voltage supply (2.5 .. 3.3V) + + vddi-supply: + description: Voltage supply for interface logic (1.65 .. 3.3 V) + + vddi-led-supply: + description: Voltage supply for the LED driver (1.65 .. 3.3 V) + +additionalProperties: false + +required: + - compatible + - reg + - dc-gpios + - port + +examples: + - |+ + spi { + #address-cells = <1>; + #size-cells = <0>; + panel: display@0 { + compatible = "st,sf-tc240t-9370-t", + "ilitek,ili9341"; + reg = <0>; + spi-3wire; + spi-max-frequency = <10000000>; + dc-gpios = <&gpiod 13 0>; + port { + panel_in: endpoint { + remote-endpoint = <&display_out>; + }; + }; + }; + }; +... + diff --git a/Documentation/devicetree/bindings/display/panel/innolux,ej030na.yaml b/Documentation/devicetree/bindings/display/panel/innolux,ej030na.yaml new file mode 100644 index 000000000000..cda36c04e85c --- /dev/null +++ b/Documentation/devicetree/bindings/display/panel/innolux,ej030na.yaml @@ -0,0 +1,62 @@ +# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/display/panel/innolux,ej030na.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Innolux EJ030NA 3.0" (320x480 pixels) 24-bit TFT LCD panel + +description: | + The panel must obey the rules for a SPI slave device as specified in + spi/spi-controller.yaml + +maintainers: + - Paul Cercueil <paul@crapouillou.net> + +allOf: + - $ref: panel-common.yaml# + +properties: + compatible: + const: innolux,ej030na + + backlight: true + port: true + power-supply: true + reg: true + reset-gpios: true + +required: + - compatible + - reg + - power-supply + - reset-gpios + +unevaluatedProperties: false + +examples: + - | + #include <dt-bindings/gpio/gpio.h> + + spi { + #address-cells = <1>; + #size-cells = <0>; + + panel@0 { + compatible = "innolux,ej030na"; + reg = <0>; + + spi-max-frequency = <10000000>; + + reset-gpios = <&gpe 4 GPIO_ACTIVE_LOW>; + power-supply = <&lcd_power>; + + backlight = <&backlight>; + + port { + panel_input: endpoint { + remote-endpoint = <&panel_output>; + }; + }; + }; + }; diff --git a/Documentation/devicetree/bindings/display/panel/panel-simple.yaml b/Documentation/devicetree/bindings/display/panel/panel-simple.yaml index b3797ba2698b..335776c45474 100644 --- a/Documentation/devicetree/bindings/display/panel/panel-simple.yaml +++ b/Documentation/devicetree/bindings/display/panel/panel-simple.yaml @@ -46,9 +46,13 @@ properties: # AU Optronics Corporation 11.6" HD (1366x768) color TFT-LCD panel - auo,b116xw03 # AU Optronics Corporation 13.3" FHD (1920x1080) color TFT-LCD panel + - auo,b133han05 + # AU Optronics Corporation 13.3" FHD (1920x1080) color TFT-LCD panel - auo,b133htn01 # AU Optronics Corporation 13.3" WXGA (1366x768) TFT LCD panel - auo,b133xtn01 + # AU Optronics Corporation 14.0" FHD (1920x1080) color TFT-LCD panel + - auo,b140han06 # AU Optronics Corporation 7.0" FHD (800 x 480) TFT LCD panel - auo,g070vvn01 # AU Optronics Corporation 10.1" (1280x800) color TFT LCD panel @@ -110,6 +114,9 @@ properties: # Emerging Display Technology Corp. 5.7" VGA TFT LCD panel - edt,et057090dhu - edt,et070080dh6 + # Emerging Display Technology Corp. 3.5" WVGA TFT LCD panel with + # capacitive multitouch + - edt,etm0350g0dh6 # Emerging Display Technology Corp. 480x272 TFT Display with capacitive touch - edt,etm043080dh6gp # Emerging Display Technology Corp. 480x272 TFT Display @@ -128,6 +135,11 @@ properties: # Emerging Display Technology Corp. WVGA TFT Display with capacitive touch - edt,etm0700g0dh6 - edt,etm0700g0edh6 + # Emerging Display Technology Corp. 5.7" VGA TFT LCD panel with + # capacitive touch + - edt,etmv570g2dhu + # E Ink VB3300-KCA + - eink,vb3300-kca # Evervision Electronics Co. Ltd. VGG804821 5.0" WVGA TFT LCD Panel - evervision,vgg804821 # Foxlink Group 5" WVGA TFT LCD panel @@ -202,8 +214,14 @@ properties: - logictechno,lt161010-2nhr # Logic Technologies LT170410-2WHC 10.1" 1280x800 IPS TFT Cap Touch Mod. - logictechno,lt170410-2whc + # Logic Technologies LTTD800x480 L2RT 7" 800x480 TFT Resistive Touch Module + - logictechno,lttd800480070-l2rt + # Logic Technologies LTTD800480070-L6WH-RT 7” 800x480 TFT Resistive Touch Module + - logictechno,lttd800480070-l6wh-rt # Mitsubishi "AA070MC01 7.0" WVGA TFT LCD panel - mitsubishi,aa070mc01-ca1 + # Multi-Inno Technology Co.,Ltd MI1010AIT-1CP 10.1" 1280x800 LVDS IPS Cap Touch Mod. + - multi-inno,mi1010ait-1cp # NEC LCD Technologies, Ltd. 12.1" WXGA (1280x800) LVDS TFT LCD panel - nec,nl12880bc20-05 # NEC LCD Technologies,Ltd. WQVGA TFT LCD panel @@ -238,10 +256,14 @@ properties: - powertip,ph800480t013-idf02 # QiaoDian XianShi Corporation 4"3 TFT LCD panel - qiaodian,qd43003c0-40 + # Shenzhen QiShenglong Industrialist Co., Ltd. Gopher 2b 4.3" 480(RGB)x272 TFT LCD panel + - qishenglong,gopher2b-lcd # Rocktech Displays Ltd. RK101II01D-CT 10.1" TFT 1280x800 - rocktech,rk101ii01d-ct # Rocktech Display Ltd. RK070ER9427 800(RGB)x480 TFT LCD panel - rocktech,rk070er9427 + # Samsung 13.3" FHD (1920x1080 pixels) eDP AMOLED panel + - samsung,atna33xc20 # Samsung 12.2" (2560x1600 pixels) TFT LCD panel - samsung,lsn122dl01-c01 # Samsung Electronics 10.1" WSVGA TFT LCD panel @@ -298,6 +320,8 @@ properties: enable-gpios: true port: true power-supply: true + no-hpd: true + hpd-gpios: true additionalProperties: false diff --git a/Documentation/devicetree/bindings/display/panel/samsung,lms380kf01.yaml b/Documentation/devicetree/bindings/display/panel/samsung,lms380kf01.yaml new file mode 100644 index 000000000000..251f0c7115aa --- /dev/null +++ b/Documentation/devicetree/bindings/display/panel/samsung,lms380kf01.yaml @@ -0,0 +1,99 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/display/panel/samsung,lms380kf01.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Samsung LMS380KF01 display panel + +description: The LMS380KF01 is a 480x800 DPI display panel from Samsung Mobile + Displays (SMD) utilizing the WideChips WS2401 display controller. It can be + used with internal or external backlight control. + The panel must obey the rules for a SPI slave device as specified in + spi/spi-controller.yaml + +maintainers: + - Linus Walleij <linus.walleij@linaro.org> + +allOf: + - $ref: panel-common.yaml# + +properties: + compatible: + const: samsung,lms380kf01 + + reg: true + + interrupts: + description: provides an optional ESD (electrostatic discharge) + interrupt that signals abnormalities in the display hardware. + This can also be raised for other reasons like erroneous + configuration. + maxItems: 1 + + reset-gpios: true + + vci-supply: + description: regulator that supplies the VCI analog voltage + usually around 3.0 V + + vccio-supply: + description: regulator that supplies the VCCIO voltage usually + around 1.8 V + + backlight: true + + spi-cpha: true + + spi-cpol: true + + spi-max-frequency: + maximum: 1200000 + + port: true + +required: + - compatible + - reg + - spi-cpha + - spi-cpol + - port + +additionalProperties: false + +examples: + - | + #include <dt-bindings/gpio/gpio.h> + #include <dt-bindings/interrupt-controller/irq.h> + + spi { + compatible = "spi-gpio"; + sck-gpios = <&gpio 0 GPIO_ACTIVE_HIGH>; + miso-gpios = <&gpio 1 GPIO_ACTIVE_HIGH>; + mosi-gpios = <&gpio 2 GPIO_ACTIVE_HIGH>; + cs-gpios = <&gpio 3 GPIO_ACTIVE_HIGH>; + num-chipselects = <1>; + #address-cells = <1>; + #size-cells = <0>; + + panel@0 { + compatible = "samsung,lms380kf01"; + spi-max-frequency = <1200000>; + spi-cpha; + spi-cpol; + reg = <0>; + vci-supply = <&lcd_3v0_reg>; + vccio-supply = <&lcd_1v8_reg>; + reset-gpios = <&gpio 4 GPIO_ACTIVE_LOW>; + interrupt-parent = <&gpio>; + interrupts = <5 IRQ_TYPE_EDGE_RISING>; + + port { + panel_in: endpoint { + remote-endpoint = <&display_out>; + }; + }; + }; + }; + +... diff --git a/Documentation/devicetree/bindings/display/panel/samsung,lms397kf04.yaml b/Documentation/devicetree/bindings/display/panel/samsung,lms397kf04.yaml index 4cb75a5f2e3a..cd62968426fb 100644 --- a/Documentation/devicetree/bindings/display/panel/samsung,lms397kf04.yaml +++ b/Documentation/devicetree/bindings/display/panel/samsung,lms397kf04.yaml @@ -33,8 +33,11 @@ properties: backlight: true + spi-cpha: true + + spi-cpol: true + spi-max-frequency: - $ref: /schemas/types.yaml#/definitions/uint32 description: inherited as a SPI client node, the datasheet specifies maximum 300 ns minimum cycle which gives around 3 MHz max frequency maximum: 3000000 @@ -44,6 +47,9 @@ properties: required: - compatible - reg + - spi-cpha + - spi-cpol + - port additionalProperties: false @@ -52,15 +58,23 @@ examples: #include <dt-bindings/gpio/gpio.h> spi { + compatible = "spi-gpio"; + sck-gpios = <&gpio 0 GPIO_ACTIVE_HIGH>; + miso-gpios = <&gpio 1 GPIO_ACTIVE_HIGH>; + mosi-gpios = <&gpio 2 GPIO_ACTIVE_HIGH>; + cs-gpios = <&gpio 3 GPIO_ACTIVE_HIGH>; + num-chipselects = <1>; #address-cells = <1>; #size-cells = <0>; panel@0 { compatible = "samsung,lms397kf04"; spi-max-frequency = <3000000>; + spi-cpha; + spi-cpol; reg = <0>; vci-supply = <&lcd_3v0_reg>; vccio-supply = <&lcd_1v8_reg>; - reset-gpios = <&gpio 1 GPIO_ACTIVE_LOW>; + reset-gpios = <&gpio 4 GPIO_ACTIVE_LOW>; backlight = <&ktd259>; port { diff --git a/Documentation/devicetree/bindings/display/rockchip/dw_mipi_dsi_rockchip.txt b/Documentation/devicetree/bindings/display/rockchip/dw_mipi_dsi_rockchip.txt index 151be3bba06f..39792f051d2d 100644 --- a/Documentation/devicetree/bindings/display/rockchip/dw_mipi_dsi_rockchip.txt +++ b/Documentation/devicetree/bindings/display/rockchip/dw_mipi_dsi_rockchip.txt @@ -23,6 +23,7 @@ Required properties: Optional properties: - phys: from general PHY binding: the phandle for the PHY device. - phy-names: Should be "dphy" if phys references an external phy. +- #phy-cells: Defined when used as ISP phy, should be 0. - power-domains: a phandle to mipi dsi power domain node. - resets: list of phandle + reset specifier pairs, as described in [3]. - reset-names: string reset name, must be "apb". diff --git a/Documentation/devicetree/bindings/display/solomon,ssd1307fb.yaml b/Documentation/devicetree/bindings/display/solomon,ssd1307fb.yaml new file mode 100644 index 000000000000..2ed2a7d0ca2f --- /dev/null +++ b/Documentation/devicetree/bindings/display/solomon,ssd1307fb.yaml @@ -0,0 +1,208 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/display/solomon,ssd1307fb.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Solomon SSD1307 OLED Controller Framebuffer + +maintainers: + - Maxime Ripard <mripard@kernel.org> + +properties: + compatible: + enum: + - solomon,ssd1305fb-i2c + - solomon,ssd1306fb-i2c + - solomon,ssd1307fb-i2c + - solomon,ssd1309fb-i2c + + reg: + maxItems: 1 + + pwms: + maxItems: 1 + + reset-gpios: + maxItems: 1 + + vbat-supply: + description: The supply for VBAT + + solomon,height: + $ref: /schemas/types.yaml#/definitions/uint32 + default: 16 + description: + Height in pixel of the screen driven by the controller + + solomon,width: + $ref: /schemas/types.yaml#/definitions/uint32 + default: 96 + description: + Width in pixel of the screen driven by the controller + + solomon,page-offset: + $ref: /schemas/types.yaml#/definitions/uint32 + default: 1 + description: + Offset of pages (band of 8 pixels) that the screen is mapped to + + solomon,segment-no-remap: + type: boolean + description: + Display needs normal (non-inverted) data column to segment mapping + + solomon,col-offset: + $ref: /schemas/types.yaml#/definitions/uint32 + default: 0 + description: + Offset of columns (COL/SEG) that the screen is mapped to + + solomon,com-seq: + type: boolean + description: + Display uses sequential COM pin configuration + + solomon,com-lrremap: + type: boolean + description: + Display uses left-right COM pin remap + + solomon,com-invdir: + type: boolean + description: + Display uses inverted COM pin scan direction + + solomon,com-offset: + $ref: /schemas/types.yaml#/definitions/uint32 + default: 0 + description: + Number of the COM pin wired to the first display line + + solomon,prechargep1: + $ref: /schemas/types.yaml#/definitions/uint32 + default: 2 + description: + Length of deselect period (phase 1) in clock cycles + + solomon,prechargep2: + $ref: /schemas/types.yaml#/definitions/uint32 + default: 2 + description: + Length of precharge period (phase 2) in clock cycles. This needs to be + the higher, the higher the capacitance of the OLED's pixels is. + + solomon,dclk-div: + $ref: /schemas/types.yaml#/definitions/uint32 + minimum: 1 + maximum: 16 + description: + Clock divisor. The default value is controller-dependent. + + solomon,dclk-frq: + $ref: /schemas/types.yaml#/definitions/uint32 + minimum: 0 + maximum: 15 + description: + Clock frequency, higher value means higher frequency. + The default value is controller-dependent. + + solomon,lookup-table: + $ref: /schemas/types.yaml#/definitions/uint8-array + maxItems: 4 + description: + 8 bit value array of current drive pulse widths for BANK0, and colors A, + B, and C. Each value in range of 31 to 63 for pulse widths of 32 to 64. + Color D is always width 64. + + solomon,area-color-enable: + type: boolean + description: + Display uses color mode + + solomon,low-power: + type: boolean + description: + Display runs in low power mode + +required: + - compatible + - reg + +allOf: + - if: + properties: + compatible: + contains: + const: solomon,ssd1305fb-i2c + then: + properties: + solomon,dclk-div: + default: 1 + solomon,dclk-frq: + default: 7 + + - if: + properties: + compatible: + contains: + const: solomon,ssd1306fb-i2c + then: + properties: + solomon,dclk-div: + default: 1 + solomon,dclk-frq: + default: 8 + + - if: + properties: + compatible: + contains: + const: solomon,ssd1307fb-i2c + then: + properties: + solomon,dclk-div: + default: 2 + solomon,dclk-frq: + default: 12 + required: + - pwms + + - if: + properties: + compatible: + contains: + const: solomon,ssd1309fb-i2c + then: + properties: + solomon,dclk-div: + default: 1 + solomon,dclk-frq: + default: 10 + +additionalProperties: false + +examples: + - | + i2c1 { + #address-cells = <1>; + #size-cells = <0>; + + ssd1307: oled@3c { + compatible = "solomon,ssd1307fb-i2c"; + reg = <0x3c>; + pwms = <&pwm 4 3000>; + reset-gpios = <&gpio2 7>; + }; + + ssd1306: oled@3d { + compatible = "solomon,ssd1306fb-i2c"; + reg = <0x3c>; + pwms = <&pwm 4 3000>; + reset-gpios = <&gpio2 7>; + solomon,com-lrremap; + solomon,com-invdir; + solomon,com-offset = <32>; + solomon,lookup-table = /bits/ 8 <0x3f 0x3f 0x3f 0x3f>; + }; + }; diff --git a/Documentation/devicetree/bindings/display/ssd1307fb.txt b/Documentation/devicetree/bindings/display/ssd1307fb.txt deleted file mode 100644 index 2dcb6d12d137..000000000000 --- a/Documentation/devicetree/bindings/display/ssd1307fb.txt +++ /dev/null @@ -1,60 +0,0 @@ -* Solomon SSD1307 Framebuffer Driver - -Required properties: - - compatible: Should be "solomon,<chip>fb-<bus>". The only supported bus for - now is i2c, and the supported chips are ssd1305, ssd1306, ssd1307 and - ssd1309. - - reg: Should contain address of the controller on the I2C bus. Most likely - 0x3c or 0x3d - - pwm: Should contain the pwm to use according to the OF device tree PWM - specification [0]. Only required for the ssd1307. - - solomon,height: Height in pixel of the screen driven by the controller - - solomon,width: Width in pixel of the screen driven by the controller - - solomon,page-offset: Offset of pages (band of 8 pixels) that the screen is - mapped to. - -Optional properties: - - reset-gpios: The GPIO used to reset the OLED display, if available. See - Documentation/devicetree/bindings/gpio/gpio.txt for details. - - vbat-supply: The supply for VBAT - - solomon,segment-no-remap: Display needs normal (non-inverted) data column - to segment mapping - - solomon,col-offset: Offset of columns (COL/SEG) that the screen is mapped to. - - solomon,com-seq: Display uses sequential COM pin configuration - - solomon,com-lrremap: Display uses left-right COM pin remap - - solomon,com-invdir: Display uses inverted COM pin scan direction - - solomon,com-offset: Number of the COM pin wired to the first display line - - solomon,prechargep1: Length of deselect period (phase 1) in clock cycles. - - solomon,prechargep2: Length of precharge period (phase 2) in clock cycles. - This needs to be the higher, the higher the capacitance - of the OLED's pixels is - - solomon,dclk-div: Clock divisor 1 to 16 - - solomon,dclk-frq: Clock frequency 0 to 15, higher value means higher - frequency - - solomon,lookup-table: 8 bit value array of current drive pulse widths for - BANK0, and colors A, B, and C. Each value in range - of 31 to 63 for pulse widths of 32 to 64. Color D - is always width 64. - - solomon,area-color-enable: Display uses color mode - - solomon,low-power. Display runs in low power mode - -[0]: Documentation/devicetree/bindings/pwm/pwm.txt - -Examples: -ssd1307: oled@3c { - compatible = "solomon,ssd1307fb-i2c"; - reg = <0x3c>; - pwms = <&pwm 4 3000>; - reset-gpios = <&gpio2 7>; -}; - -ssd1306: oled@3c { - compatible = "solomon,ssd1306fb-i2c"; - reg = <0x3c>; - pwms = <&pwm 4 3000>; - reset-gpios = <&gpio2 7>; - solomon,com-lrremap; - solomon,com-invdir; - solomon,com-offset = <32>; - solomon,lookup-table = /bits/ 8 <0x3f 0x3f 0x3f 0x3f>; -}; diff --git a/Documentation/devicetree/bindings/vendor-prefixes.yaml b/Documentation/devicetree/bindings/vendor-prefixes.yaml index ed4f66ec9a65..507aeef26d77 100644 --- a/Documentation/devicetree/bindings/vendor-prefixes.yaml +++ b/Documentation/devicetree/bindings/vendor-prefixes.yaml @@ -341,6 +341,8 @@ patternProperties: description: eGalax_eMPIA Technology Inc "^einfochips,.*": description: Einfochips + "^eink,.*": + description: E Ink Corporation "^elan,.*": description: Elan Microelectronic Corp. "^element14,.*": @@ -938,6 +940,8 @@ patternProperties: description: Chengdu Kaixuan Information Technology Co., Ltd. "^qiaodian,.*": description: QiaoDian XianShi Corporation + "^qishenglong,.*": + description: Shenzhen QiShenglong Industrialist Co., Ltd. "^qnap,.*": description: QNAP Systems, Inc. "^radxa,.*": diff --git a/Documentation/driver-api/dma-buf.rst b/Documentation/driver-api/dma-buf.rst index 7f21425d9435..f5ac4c90b237 100644 --- a/Documentation/driver-api/dma-buf.rst +++ b/Documentation/driver-api/dma-buf.rst @@ -88,6 +88,9 @@ consider though: - The DMA buffer FD is also pollable, see `Implicit Fence Poll Support`_ below for details. +- The DMA buffer FD also supports a few dma-buf-specific ioctls, see + `DMA Buffer ioctls`_ below for details. + Basic Operation and Device DMA Access ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -106,6 +109,16 @@ Implicit Fence Poll Support .. kernel-doc:: drivers/dma-buf/dma-buf.c :doc: implicit fence polling +DMA-BUF statistics +~~~~~~~~~~~~~~~~~~ +.. kernel-doc:: drivers/dma-buf/dma-buf-sysfs-stats.c + :doc: overview + +DMA Buffer ioctls +~~~~~~~~~~~~~~~~~ + +.. kernel-doc:: include/uapi/linux/dma-buf.h + Kernel Functions and Structures Reference ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/Documentation/driver-api/thermal/nouveau_thermal.rst b/Documentation/driver-api/thermal/nouveau_thermal.rst index 79ece266cf6d..aa10db6df309 100644 --- a/Documentation/driver-api/thermal/nouveau_thermal.rst +++ b/Documentation/driver-api/thermal/nouveau_thermal.rst @@ -90,7 +90,7 @@ Bug reports ----------- Thermal management on Nouveau is new and may not work on all cards. If you have -inquiries, please ping mupuf on IRC (#nouveau, freenode). +inquiries, please ping mupuf on IRC (#nouveau, OFTC). Bug reports should be filled on Freedesktop's bug tracker. Please follow https://nouveau.freedesktop.org/wiki/Bugs diff --git a/Documentation/gpu/drm-kms.rst b/Documentation/gpu/drm-kms.rst index 87e5023e3f55..1ef7951ded5e 100644 --- a/Documentation/gpu/drm-kms.rst +++ b/Documentation/gpu/drm-kms.rst @@ -159,6 +159,8 @@ KMS Core Structures and Functions .. kernel-doc:: drivers/gpu/drm/drm_mode_config.c :export: +.. _kms_base_object_abstraction: + Modeset Base Object Abstraction =============================== @@ -463,6 +465,35 @@ KMS Properties This section of the documentation is primarily aimed at user-space developers. For the driver APIs, see the other sections. +Requirements +------------ + +KMS drivers might need to add extra properties to support new features. Each +new property introduced in a driver needs to meet a few requirements, in +addition to the one mentioned above: + +* It must be standardized, documenting: + + * The full, exact, name string; + * If the property is an enum, all the valid value name strings; + * What values are accepted, and what these values mean; + * What the property does and how it can be used; + * How the property might interact with other, existing properties. + +* It must provide a generic helper in the core code to register that + property on the object it attaches to. + +* Its content must be decoded by the core and provided in the object's + associated state structure. That includes anything drivers might want + to precompute, like struct drm_clip_rect for planes. + +* Its initial state must match the behavior prior to the property + introduction. This might be a fixed value matching what the hardware + does, or it may be inherited from the state the firmware left the + system in during boot. + +* An IGT test must be submitted where reasonable. + Property Types and Blob Property Support ---------------------------------------- @@ -508,8 +539,8 @@ Plane Composition Properties Damage Tracking Properties -------------------------- -.. kernel-doc:: drivers/gpu/drm/drm_damage_helper.c - :doc: overview +.. kernel-doc:: drivers/gpu/drm/drm_plane.c + :doc: damage tracking Color Management Properties --------------------------- diff --git a/Documentation/gpu/drm-uapi.rst b/Documentation/gpu/drm-uapi.rst index 04bdc7a91d53..199afb503ab1 100644 --- a/Documentation/gpu/drm-uapi.rst +++ b/Documentation/gpu/drm-uapi.rst @@ -37,6 +37,15 @@ Primary Nodes, DRM Master and Authentication .. kernel-doc:: include/drm/drm_auth.h :internal: + +.. _drm_leasing: + +DRM Display Resource Leasing +============================ + +.. kernel-doc:: drivers/gpu/drm/drm_lease.c + :doc: drm leasing + Open-Source Userspace Requirements ================================== @@ -457,6 +466,19 @@ Userspace API Structures .. kernel-doc:: include/uapi/drm/drm_mode.h :doc: overview +.. _crtc_index: + +CRTC index +---------- + +CRTC's have both an object ID and an index, and they are not the same thing. +The index is used in cases where a densely packed identifier for a CRTC is +needed, for instance a bitmask of CRTC's. The member possible_crtcs of struct +drm_mode_get_plane is an example. + +DRM_IOCTL_MODE_GETRESOURCES populates a structure with an array of CRTC ID's, +and the CRTC index is its position in this array. + .. kernel-doc:: include/uapi/drm/drm.h :internal: diff --git a/Documentation/gpu/i915.rst b/Documentation/gpu/i915.rst index 42ce0196930a..204ebdaadb45 100644 --- a/Documentation/gpu/i915.rst +++ b/Documentation/gpu/i915.rst @@ -422,9 +422,16 @@ Batchbuffer Parsing User Batchbuffer Execution -------------------------- +.. kernel-doc:: drivers/gpu/drm/i915/gem/i915_gem_context_types.h + .. kernel-doc:: drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c :doc: User command execution +Scheduling +---------- +.. kernel-doc:: drivers/gpu/drm/i915/i915_scheduler_types.h + :functions: i915_sched_engine + Logical Rings, Logical Ring Contexts and Execlists -------------------------------------------------- @@ -518,6 +525,14 @@ GuC-based command submission .. kernel-doc:: drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c :doc: GuC-based command submission +GuC ABI +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. kernel-doc:: drivers/gpu/drm/i915/gt/uc/abi/guc_messages_abi.h +.. kernel-doc:: drivers/gpu/drm/i915/gt/uc/abi/guc_communication_mmio_abi.h +.. kernel-doc:: drivers/gpu/drm/i915/gt/uc/abi/guc_communication_ctb_abi.h +.. kernel-doc:: drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h + HuC --- .. kernel-doc:: drivers/gpu/drm/i915/gt/uc/intel_huc.c diff --git a/Documentation/gpu/rfc/i915_parallel_execbuf.h b/Documentation/gpu/rfc/i915_parallel_execbuf.h new file mode 100644 index 000000000000..8cbe2c4e0172 --- /dev/null +++ b/Documentation/gpu/rfc/i915_parallel_execbuf.h @@ -0,0 +1,122 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2021 Intel Corporation + */ + +#define I915_CONTEXT_ENGINES_EXT_PARALLEL_SUBMIT 2 /* see i915_context_engines_parallel_submit */ + +/** + * struct drm_i915_context_engines_parallel_submit - Configure engine for + * parallel submission. + * + * Setup a slot in the context engine map to allow multiple BBs to be submitted + * in a single execbuf IOCTL. Those BBs will then be scheduled to run on the GPU + * in parallel. Multiple hardware contexts are created internally in the i915 + * run these BBs. Once a slot is configured for N BBs only N BBs can be + * submitted in each execbuf IOCTL and this is implicit behavior e.g. The user + * doesn't tell the execbuf IOCTL there are N BBs, the execbuf IOCTL knows how + * many BBs there are based on the slot's configuration. The N BBs are the last + * N buffer objects or first N if I915_EXEC_BATCH_FIRST is set. + * + * The default placement behavior is to create implicit bonds between each + * context if each context maps to more than 1 physical engine (e.g. context is + * a virtual engine). Also we only allow contexts of same engine class and these + * contexts must be in logically contiguous order. Examples of the placement + * behavior described below. Lastly, the default is to not allow BBs to + * preempted mid BB rather insert coordinated preemption on all hardware + * contexts between each set of BBs. Flags may be added in the future to change + * both of these default behaviors. + * + * Returns -EINVAL if hardware context placement configuration is invalid or if + * the placement configuration isn't supported on the platform / submission + * interface. + * Returns -ENODEV if extension isn't supported on the platform / submission + * interface. + * + * .. code-block:: none + * + * Example 1 pseudo code: + * CS[X] = generic engine of same class, logical instance X + * INVALID = I915_ENGINE_CLASS_INVALID, I915_ENGINE_CLASS_INVALID_NONE + * set_engines(INVALID) + * set_parallel(engine_index=0, width=2, num_siblings=1, + * engines=CS[0],CS[1]) + * + * Results in the following valid placement: + * CS[0], CS[1] + * + * Example 2 pseudo code: + * CS[X] = generic engine of same class, logical instance X + * INVALID = I915_ENGINE_CLASS_INVALID, I915_ENGINE_CLASS_INVALID_NONE + * set_engines(INVALID) + * set_parallel(engine_index=0, width=2, num_siblings=2, + * engines=CS[0],CS[2],CS[1],CS[3]) + * + * Results in the following valid placements: + * CS[0], CS[1] + * CS[2], CS[3] + * + * This can also be thought of as 2 virtual engines described by 2-D array + * in the engines the field with bonds placed between each index of the + * virtual engines. e.g. CS[0] is bonded to CS[1], CS[2] is bonded to + * CS[3]. + * VE[0] = CS[0], CS[2] + * VE[1] = CS[1], CS[3] + * + * Example 3 pseudo code: + * CS[X] = generic engine of same class, logical instance X + * INVALID = I915_ENGINE_CLASS_INVALID, I915_ENGINE_CLASS_INVALID_NONE + * set_engines(INVALID) + * set_parallel(engine_index=0, width=2, num_siblings=2, + * engines=CS[0],CS[1],CS[1],CS[3]) + * + * Results in the following valid and invalid placements: + * CS[0], CS[1] + * CS[1], CS[3] - Not logical contiguous, return -EINVAL + */ +struct drm_i915_context_engines_parallel_submit { + /** + * @base: base user extension. + */ + struct i915_user_extension base; + + /** + * @engine_index: slot for parallel engine + */ + __u16 engine_index; + + /** + * @width: number of contexts per parallel engine + */ + __u16 width; + + /** + * @num_siblings: number of siblings per context + */ + __u16 num_siblings; + + /** + * @mbz16: reserved for future use; must be zero + */ + __u16 mbz16; + + /** + * @flags: all undefined flags must be zero, currently not defined flags + */ + __u64 flags; + + /** + * @mbz64: reserved for future use; must be zero + */ + __u64 mbz64[3]; + + /** + * @engines: 2-d array of engine instances to configure parallel engine + * + * length = width (i) * num_siblings (j) + * index = j + i * num_siblings + */ + struct i915_engine_class_instance engines[0]; + +} __packed; + diff --git a/Documentation/gpu/rfc/i915_scheduler.rst b/Documentation/gpu/rfc/i915_scheduler.rst new file mode 100644 index 000000000000..cbda75065dad --- /dev/null +++ b/Documentation/gpu/rfc/i915_scheduler.rst @@ -0,0 +1,148 @@ +========================================= +I915 GuC Submission/DRM Scheduler Section +========================================= + +Upstream plan +============= +For upstream the overall plan for landing GuC submission and integrating the +i915 with the DRM scheduler is: + +* Merge basic GuC submission + * Basic submission support for all gen11+ platforms + * Not enabled by default on any current platforms but can be enabled via + modparam enable_guc + * Lots of rework will need to be done to integrate with DRM scheduler so + no need to nit pick everything in the code, it just should be + functional, no major coding style / layering errors, and not regress + execlists + * Update IGTs / selftests as needed to work with GuC submission + * Enable CI on supported platforms for a baseline + * Rework / get CI heathly for GuC submission in place as needed +* Merge new parallel submission uAPI + * Bonding uAPI completely incompatible with GuC submission, plus it has + severe design issues in general, which is why we want to retire it no + matter what + * New uAPI adds I915_CONTEXT_ENGINES_EXT_PARALLEL context setup step + which configures a slot with N contexts + * After I915_CONTEXT_ENGINES_EXT_PARALLEL a user can submit N batches to + a slot in a single execbuf IOCTL and the batches run on the GPU in + paralllel + * Initially only for GuC submission but execlists can be supported if + needed +* Convert the i915 to use the DRM scheduler + * GuC submission backend fully integrated with DRM scheduler + * All request queues removed from backend (e.g. all backpressure + handled in DRM scheduler) + * Resets / cancels hook in DRM scheduler + * Watchdog hooks into DRM scheduler + * Lots of complexity of the GuC backend can be pulled out once + integrated with DRM scheduler (e.g. state machine gets + simplier, locking gets simplier, etc...) + * Execlists backend will minimum required to hook in the DRM scheduler + * Legacy interface + * Features like timeslicing / preemption / virtual engines would + be difficult to integrate with the DRM scheduler and these + features are not required for GuC submission as the GuC does + these things for us + * ROI low on fully integrating into DRM scheduler + * Fully integrating would add lots of complexity to DRM + scheduler + * Port i915 priority inheritance / boosting feature in DRM scheduler + * Used for i915 page flip, may be useful to other DRM drivers as + well + * Will be an optional feature in the DRM scheduler + * Remove in-order completion assumptions from DRM scheduler + * Even when using the DRM scheduler the backends will handle + preemption, timeslicing, etc... so it is possible for jobs to + finish out of order + * Pull out i915 priority levels and use DRM priority levels + * Optimize DRM scheduler as needed + +TODOs for GuC submission upstream +================================= + +* Need an update to GuC firmware / i915 to enable error state capture +* Open source tool to decode GuC logs +* Public GuC spec + +New uAPI for basic GuC submission +================================= +No major changes are required to the uAPI for basic GuC submission. The only +change is a new scheduler attribute: I915_SCHEDULER_CAP_STATIC_PRIORITY_MAP. +This attribute indicates the 2k i915 user priority levels are statically mapped +into 3 levels as follows: + +* -1k to -1 Low priority +* 0 Medium priority +* 1 to 1k High priority + +This is needed because the GuC only has 4 priority bands. The highest priority +band is reserved with the kernel. This aligns with the DRM scheduler priority +levels too. + +Spec references: +---------------- +* https://www.khronos.org/registry/EGL/extensions/IMG/EGL_IMG_context_priority.txt +* https://www.khronos.org/registry/vulkan/specs/1.2-extensions/html/chap5.html#devsandqueues-priority +* https://spec.oneapi.com/level-zero/latest/core/api.html#ze-command-queue-priority-t + +New parallel submission uAPI +============================ +The existing bonding uAPI is completely broken with GuC submission because +whether a submission is a single context submit or parallel submit isn't known +until execbuf time activated via the I915_SUBMIT_FENCE. To submit multiple +contexts in parallel with the GuC the context must be explicitly registered with +N contexts and all N contexts must be submitted in a single command to the GuC. +The GuC interfaces do not support dynamically changing between N contexts as the +bonding uAPI does. Hence the need for a new parallel submission interface. Also +the legacy bonding uAPI is quite confusing and not intuitive at all. Furthermore +I915_SUBMIT_FENCE is by design a future fence, so not really something we should +continue to support. + +The new parallel submission uAPI consists of 3 parts: + +* Export engines logical mapping +* A 'set_parallel' extension to configure contexts for parallel + submission +* Extend execbuf2 IOCTL to support submitting N BBs in a single IOCTL + +Export engines logical mapping +------------------------------ +Certain use cases require BBs to be placed on engine instances in logical order +(e.g. split-frame on gen11+). The logical mapping of engine instances can change +based on fusing. Rather than making UMDs be aware of fusing, simply expose the +logical mapping with the existing query engine info IOCTL. Also the GuC +submission interface currently only supports submitting multiple contexts to +engines in logical order which is a new requirement compared to execlists. +Lastly, all current platforms have at most 2 engine instances and the logical +order is the same as uAPI order. This will change on platforms with more than 2 +engine instances. + +A single bit will be added to drm_i915_engine_info.flags indicating that the +logical instance has been returned and a new field, +drm_i915_engine_info.logical_instance, returns the logical instance. + +A 'set_parallel' extension to configure contexts for parallel submission +------------------------------------------------------------------------ +The 'set_parallel' extension configures a slot for parallel submission of N BBs. +It is a setup step that must be called before using any of the contexts. See +I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE or I915_CONTEXT_ENGINES_EXT_BOND for +similar existing examples. Once a slot is configured for parallel submission the +execbuf2 IOCTL can be called submitting N BBs in a single IOCTL. Initially only +supports GuC submission. Execlists supports can be added later if needed. + +Add I915_CONTEXT_ENGINES_EXT_PARALLEL_SUBMIT and +drm_i915_context_engines_parallel_submit to the uAPI to implement this +extension. + +.. kernel-doc:: Documentation/gpu/rfc/i915_parallel_execbuf.h + :functions: drm_i915_context_engines_parallel_submit + +Extend execbuf2 IOCTL to support submitting N BBs in a single IOCTL +------------------------------------------------------------------- +Contexts that have been configured with the 'set_parallel' extension can only +submit N BBs in a single execbuf2 IOCTL. The BBs are either the last N objects +in the drm_i915_gem_exec_object2 list or the first N if I915_EXEC_BATCH_FIRST is +set. The number of BBs is implicit based on the slot submitted and how it has +been configured by 'set_parallel' or other extensions. No uAPI changes are +required to the execbuf2 IOCTL. diff --git a/Documentation/gpu/rfc/index.rst b/Documentation/gpu/rfc/index.rst index 05670442ca1b..91e93a705230 100644 --- a/Documentation/gpu/rfc/index.rst +++ b/Documentation/gpu/rfc/index.rst @@ -19,3 +19,7 @@ host such documentation: .. toctree:: i915_gem_lmem.rst + +.. toctree:: + + i915_scheduler.rst diff --git a/Documentation/gpu/vkms.rst b/Documentation/gpu/vkms.rst index 2c9b376da5ca..941f0e7e5eef 100644 --- a/Documentation/gpu/vkms.rst +++ b/Documentation/gpu/vkms.rst @@ -98,9 +98,17 @@ with VKMS maintainers. IGT better support ------------------ -- Investigate: (1) test cases on kms_plane that are failing due to timeout on - capturing CRC; (2) when running kms_flip test cases in sequence, some - successful individual test cases are failing randomly. +Debugging: + +- kms_plane: some test cases are failing due to timeout on capturing CRC; + +- kms_flip: when running test cases in sequence, some successful individual + test cases are failing randomly; when individually, some successful test + cases display in the log the following error:: + + [drm:vkms_prepare_fb [vkms]] ERROR vmap failed: -4 + +Virtual hardware (vblank-less) mode: - VKMS already has support for vblanks simulated via hrtimers, which can be tested with kms_flip test; in some way, we can say that VKMS already mimics @@ -116,7 +124,17 @@ Add Plane Features There's lots of plane features we could add support for: -- Real overlay planes, not just cursor. +- Multiple overlay planes. [Good to get started] + +- Clearing primary plane: clear primary plane before plane composition (at the + start) for correctness of pixel blend ops. It also guarantees alpha channel + is cleared in the target buffer for stable crc. [Good to get started] + +- ARGB format on primary plane: blend the primary plane into background with + translucent alpha. + +- Support when the primary plane isn't exactly matching the output size: blend + the primary plane into the black background. - Full alpha blending on all planes. @@ -129,13 +147,8 @@ There's lots of plane features we could add support for: cursor api). For all of these, we also want to review the igt test coverage and make sure -all relevant igt testcases work on vkms. - -Prime Buffer Sharing --------------------- - -- Syzbot report - WARNING in vkms_gem_free_object: - https://syzkaller.appspot.com/bug?extid=e7ad70d406e74d8fc9d0 +all relevant igt testcases work on vkms. They are good options for internship +project. Runtime Configuration --------------------- @@ -153,7 +166,7 @@ module. Use/Test-cases: the refresh rate. The currently proposed solution is to expose vkms configuration through -configfs. All existing module options should be supported through configfs +configfs. All existing module options should be supported through configfs too. Writeback support @@ -162,6 +175,7 @@ Writeback support - The writeback and CRC capture operations share the use of composer_enabled boolean to ensure vblanks. Probably, when these operations work together, composer_enabled needs to refcounting the composer state to proper work. + [Good to get started] - Add support for cloned writeback outputs and related test cases using a cloned output in the IGT kms_writeback. diff --git a/MAINTAINERS b/MAINTAINERS index af0df260c02b..fffe8f814700 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1703,7 +1703,7 @@ L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) S: Maintained W: https://asahilinux.org B: https://github.com/AsahiLinux/linux/issues -C: irc://chat.freenode.net/asahi-dev +C: irc://irc.oftc.net/asahi-dev T: git https://github.com/AsahiLinux/linux.git F: Documentation/devicetree/bindings/arm/apple.yaml F: Documentation/devicetree/bindings/interrupt-controller/apple,aic.yaml @@ -5799,7 +5799,7 @@ M: Gerd Hoffmann <kraxel@redhat.com> L: virtualization@lists.linux-foundation.org S: Maintained T: git git://anongit.freedesktop.org/drm/drm-misc -F: drivers/gpu/drm/bochs/ +F: drivers/gpu/drm/tiny/bochs.c DRM DRIVER FOR BOE HIMAX8279D PANELS M: Jerry Han <hanxu5@huaqin.corp-partner.google.com> @@ -5984,6 +5984,13 @@ S: Maintained F: Documentation/devicetree/bindings/display/panel/raydium,rm67191.yaml F: drivers/gpu/drm/panel/panel-raydium-rm67191.c +DRM DRIVER FOR SAMSUNG DB7430 PANELS +M: Linus Walleij <linus.walleij@linaro.org> +S: Maintained +T: git git://anongit.freedesktop.org/drm/drm-misc +F: Documentation/devicetree/bindings/display/panel/samsung,lms397kf04.yaml +F: drivers/gpu/drm/panel/panel-samsung-db7430.c + DRM DRIVER FOR SITRONIX ST7703 PANELS M: Guido Günther <agx@sigxcpu.org> R: Purism Kernel Team <kernel@puri.sm> @@ -6082,21 +6089,27 @@ F: drivers/gpu/drm/vboxvideo/ DRM DRIVER FOR VMWARE VIRTUAL GPU M: "VMware Graphics" <linux-graphics-maintainer@vmware.com> -M: Roland Scheidegger <sroland@vmware.com> M: Zack Rusin <zackr@vmware.com> L: dri-devel@lists.freedesktop.org S: Supported -T: git git://people.freedesktop.org/~sroland/linux +T: git git://anongit.freedesktop.org/drm/drm-misc F: drivers/gpu/drm/vmwgfx/ F: include/uapi/drm/vmwgfx_drm.h +DRM DRIVER FOR WIDECHIPS WS2401 PANELS +M: Linus Walleij <linus.walleij@linaro.org> +S: Maintained +T: git git://anongit.freedesktop.org/drm/drm-misc +F: Documentation/devicetree/bindings/display/panel/samsung,lms380kf01.yaml +F: drivers/gpu/drm/panel/panel-widechips-ws2401.c + DRM DRIVERS M: David Airlie <airlied@linux.ie> M: Daniel Vetter <daniel@ffwll.ch> L: dri-devel@lists.freedesktop.org S: Maintained B: https://gitlab.freedesktop.org/drm -C: irc://chat.freenode.net/dri-devel +C: irc://irc.oftc.net/dri-devel T: git git://anongit.freedesktop.org/drm/drm F: Documentation/devicetree/bindings/display/ F: Documentation/devicetree/bindings/gpu/ @@ -9288,7 +9301,7 @@ S: Supported W: https://01.org/linuxgraphics/ Q: http://patchwork.freedesktop.org/project/intel-gfx/ B: https://gitlab.freedesktop.org/drm/intel/-/wikis/How-to-file-i915-bugs -C: irc://chat.freenode.net/intel-gfx +C: irc://irc.oftc.net/intel-gfx T: git git://anongit.freedesktop.org/drm-intel F: Documentation/gpu/i915.rst F: drivers/gpu/drm/i915/ diff --git a/arch/arm/include/asm/efi.h b/arch/arm/include/asm/efi.h index 9de7ab2ce05d..a6f3b179e8a9 100644 --- a/arch/arm/include/asm/efi.h +++ b/arch/arm/include/asm/efi.h @@ -17,6 +17,7 @@ #ifdef CONFIG_EFI void efi_init(void); +extern void efifb_setup_from_dmi(struct screen_info *si, const char *opt); int efi_create_mapping(struct mm_struct *mm, efi_memory_desc_t *md); int efi_set_mapping_permissions(struct mm_struct *mm, efi_memory_desc_t *md); @@ -52,10 +53,6 @@ void efi_virtmap_unload(void); struct screen_info *alloc_screen_info(void); void free_screen_info(struct screen_info *si); -static inline void efifb_setup_from_dmi(struct screen_info *si, const char *opt) -{ -} - /* * A reasonable upper bound for the uncompressed kernel size is 32 MBytes, * so we will reserve that amount of memory. We have no easy way to tell what diff --git a/arch/arm64/boot/dts/qcom/sc7180-trogdor.dtsi b/arch/arm64/boot/dts/qcom/sc7180-trogdor.dtsi index 77ae7561d436..781625d9344c 100644 --- a/arch/arm64/boot/dts/qcom/sc7180-trogdor.dtsi +++ b/arch/arm64/boot/dts/qcom/sc7180-trogdor.dtsi @@ -255,21 +255,6 @@ #sound-dai-cells = <0>; }; - panel: panel { - /* Compatible will be filled in per-board */ - power-supply = <&pp3300_dx_edp>; - backlight = <&backlight>; - hpd-gpios = <&sn65dsi86_bridge 2 GPIO_ACTIVE_HIGH>; - - ports { - port { - panel_in_edp: endpoint { - remote-endpoint = <&sn65dsi86_out>; - }; - }; - }; - }; - pwmleds { compatible = "pwm-leds"; keyboard_backlight: keyboard-backlight { @@ -666,6 +651,21 @@ edp_brij_i2c: &i2c2 { }; }; }; + + aux-bus { + panel: panel { + /* Compatible will be filled in per-board */ + power-supply = <&pp3300_dx_edp>; + backlight = <&backlight>; + hpd-gpios = <&sn65dsi86_bridge 2 GPIO_ACTIVE_HIGH>; + + port { + panel_in_edp: endpoint { + remote-endpoint = <&sn65dsi86_out>; + }; + }; + }; + }; }; }; diff --git a/arch/arm64/include/asm/efi.h b/arch/arm64/include/asm/efi.h index 1bed37eb013a..d3e1825337be 100644 --- a/arch/arm64/include/asm/efi.h +++ b/arch/arm64/include/asm/efi.h @@ -14,6 +14,7 @@ #ifdef CONFIG_EFI extern void efi_init(void); +extern void efifb_setup_from_dmi(struct screen_info *si, const char *opt); #else #define efi_init() #endif @@ -85,10 +86,6 @@ static inline void free_screen_info(struct screen_info *si) { } -static inline void efifb_setup_from_dmi(struct screen_info *si, const char *opt) -{ -} - #define EFI_ALLOC_ALIGN SZ_64K /* diff --git a/arch/riscv/include/asm/efi.h b/arch/riscv/include/asm/efi.h index 7b3483ba2e84..49b398fe99f1 100644 --- a/arch/riscv/include/asm/efi.h +++ b/arch/riscv/include/asm/efi.h @@ -13,6 +13,7 @@ #ifdef CONFIG_EFI extern void efi_init(void); +extern void efifb_setup_from_dmi(struct screen_info *si, const char *opt); #else #define efi_init() #endif @@ -39,10 +40,6 @@ static inline void free_screen_info(struct screen_info *si) { } -static inline void efifb_setup_from_dmi(struct screen_info *si, const char *opt) -{ -} - void efi_virtmap_load(void); void efi_virtmap_unload(void); diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 421fa9e38c60..45962aaf2b2c 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -2767,32 +2767,6 @@ config AMD_NB def_bool y depends on CPU_SUP_AMD && PCI -config X86_SYSFB - bool "Mark VGA/VBE/EFI FB as generic system framebuffer" - help - Firmwares often provide initial graphics framebuffers so the BIOS, - bootloader or kernel can show basic video-output during boot for - user-guidance and debugging. Historically, x86 used the VESA BIOS - Extensions and EFI-framebuffers for this, which are mostly limited - to x86. - This option, if enabled, marks VGA/VBE/EFI framebuffers as generic - framebuffers so the new generic system-framebuffer drivers can be - used on x86. If the framebuffer is not compatible with the generic - modes, it is advertised as fallback platform framebuffer so legacy - drivers like efifb, vesafb and uvesafb can pick it up. - If this option is not selected, all system framebuffers are always - marked as fallback platform framebuffers as usual. - - Note: Legacy fbdev drivers, including vesafb, efifb, uvesafb, will - not be able to pick up generic system framebuffers if this option - is selected. You are highly encouraged to enable simplefb as - replacement if you select this option. simplefb can correctly deal - with generic system framebuffers. But you should still keep vesafb - and others enabled as fallback if a system framebuffer is - incompatible with simplefb. - - If unsure, say Y. - endmenu diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 3e625c61f008..8f4e8fa6ed75 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -136,9 +136,6 @@ obj-$(CONFIG_X86_CHECK_BIOS_CORRUPTION) += check.o obj-$(CONFIG_SWIOTLB) += pci-swiotlb.o obj-$(CONFIG_OF) += devicetree.o obj-$(CONFIG_UPROBES) += uprobes.o -obj-y += sysfb.o -obj-$(CONFIG_X86_SYSFB) += sysfb_simplefb.o -obj-$(CONFIG_EFI) += sysfb_efi.o obj-$(CONFIG_PERF_EVENTS) += perf_regs.o obj-$(CONFIG_TRACING) += tracepoint.o diff --git a/drivers/dma-buf/Kconfig b/drivers/dma-buf/Kconfig index 4e16c71c24b7..9561e3d2d428 100644 --- a/drivers/dma-buf/Kconfig +++ b/drivers/dma-buf/Kconfig @@ -72,6 +72,17 @@ menuconfig DMABUF_HEAPS allows userspace to allocate dma-bufs that can be shared between drivers. +menuconfig DMABUF_SYSFS_STATS + bool "DMA-BUF sysfs statistics" + select DMA_SHARED_BUFFER + help + Choose this option to enable DMA-BUF sysfs statistics + in location /sys/kernel/dmabuf/buffers. + + /sys/kernel/dmabuf/buffers/<inode_number> will contain + statistics for the DMA-BUF with the unique inode number + <inode_number>. + source "drivers/dma-buf/heaps/Kconfig" endmenu diff --git a/drivers/dma-buf/Makefile b/drivers/dma-buf/Makefile index 995e05f609ff..40d81f23cacf 100644 --- a/drivers/dma-buf/Makefile +++ b/drivers/dma-buf/Makefile @@ -6,6 +6,7 @@ obj-$(CONFIG_DMABUF_HEAPS) += heaps/ obj-$(CONFIG_SYNC_FILE) += sync_file.o obj-$(CONFIG_SW_SYNC) += sw_sync.o sync_debug.o obj-$(CONFIG_UDMABUF) += udmabuf.o +obj-$(CONFIG_DMABUF_SYSFS_STATS) += dma-buf-sysfs-stats.o dmabuf_selftests-y := \ selftest.o \ diff --git a/drivers/dma-buf/dma-buf-sysfs-stats.c b/drivers/dma-buf/dma-buf-sysfs-stats.c new file mode 100644 index 000000000000..053baadcada9 --- /dev/null +++ b/drivers/dma-buf/dma-buf-sysfs-stats.c @@ -0,0 +1,205 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * DMA-BUF sysfs statistics. + * + * Copyright (C) 2021 Google LLC. + */ + +#include <linux/dma-buf.h> +#include <linux/dma-resv.h> +#include <linux/kobject.h> +#include <linux/printk.h> +#include <linux/slab.h> +#include <linux/sysfs.h> + +#include "dma-buf-sysfs-stats.h" + +#define to_dma_buf_entry_from_kobj(x) container_of(x, struct dma_buf_sysfs_entry, kobj) + +/** + * DOC: overview + * + * ``/sys/kernel/debug/dma_buf/bufinfo`` provides an overview of every DMA-BUF + * in the system. However, since debugfs is not safe to be mounted in + * production, procfs and sysfs can be used to gather DMA-BUF statistics on + * production systems. + * + * The ``/proc/<pid>/fdinfo/<fd>`` files in procfs can be used to gather + * information about DMA-BUF fds. Detailed documentation about the interface + * is present in Documentation/filesystems/proc.rst. + * + * Unfortunately, the existing procfs interfaces can only provide information + * about the DMA-BUFs for which processes hold fds or have the buffers mmapped + * into their address space. This necessitated the creation of the DMA-BUF sysfs + * statistics interface to provide per-buffer information on production systems. + * + * The interface at ``/sys/kernel/dma-buf/buffers`` exposes information about + * every DMA-BUF when ``CONFIG_DMABUF_SYSFS_STATS`` is enabled. + * + * The following stats are exposed by the interface: + * + * * ``/sys/kernel/dmabuf/buffers/<inode_number>/exporter_name`` + * * ``/sys/kernel/dmabuf/buffers/<inode_number>/size`` + * + * The information in the interface can also be used to derive per-exporter + * statistics. The data from the interface can be gathered on error conditions + * or other important events to provide a snapshot of DMA-BUF usage. + * It can also be collected periodically by telemetry to monitor various metrics. + * + * Detailed documentation about the interface is present in + * Documentation/ABI/testing/sysfs-kernel-dmabuf-buffers. + */ + +struct dma_buf_stats_attribute { + struct attribute attr; + ssize_t (*show)(struct dma_buf *dmabuf, + struct dma_buf_stats_attribute *attr, char *buf); +}; +#define to_dma_buf_stats_attr(x) container_of(x, struct dma_buf_stats_attribute, attr) + +static ssize_t dma_buf_stats_attribute_show(struct kobject *kobj, + struct attribute *attr, + char *buf) +{ + struct dma_buf_stats_attribute *attribute; + struct dma_buf_sysfs_entry *sysfs_entry; + struct dma_buf *dmabuf; + + attribute = to_dma_buf_stats_attr(attr); + sysfs_entry = to_dma_buf_entry_from_kobj(kobj); + dmabuf = sysfs_entry->dmabuf; + + if (!dmabuf || !attribute->show) + return -EIO; + + return attribute->show(dmabuf, attribute, buf); +} + +static const struct sysfs_ops dma_buf_stats_sysfs_ops = { + .show = dma_buf_stats_attribute_show, +}; + +static ssize_t exporter_name_show(struct dma_buf *dmabuf, + struct dma_buf_stats_attribute *attr, + char *buf) +{ + return sysfs_emit(buf, "%s\n", dmabuf->exp_name); +} + +static ssize_t size_show(struct dma_buf *dmabuf, + struct dma_buf_stats_attribute *attr, + char *buf) +{ + return sysfs_emit(buf, "%zu\n", dmabuf->size); +} + +static struct dma_buf_stats_attribute exporter_name_attribute = + __ATTR_RO(exporter_name); +static struct dma_buf_stats_attribute size_attribute = __ATTR_RO(size); + +static struct attribute *dma_buf_stats_default_attrs[] = { + &exporter_name_attribute.attr, + &size_attribute.attr, + NULL, +}; +ATTRIBUTE_GROUPS(dma_buf_stats_default); + +static void dma_buf_sysfs_release(struct kobject *kobj) +{ + struct dma_buf_sysfs_entry *sysfs_entry; + + sysfs_entry = to_dma_buf_entry_from_kobj(kobj); + kfree(sysfs_entry); +} + +static struct kobj_type dma_buf_ktype = { + .sysfs_ops = &dma_buf_stats_sysfs_ops, + .release = dma_buf_sysfs_release, + .default_groups = dma_buf_stats_default_groups, +}; + +void dma_buf_stats_teardown(struct dma_buf *dmabuf) +{ + struct dma_buf_sysfs_entry *sysfs_entry; + + sysfs_entry = dmabuf->sysfs_entry; + if (!sysfs_entry) + return; + + kobject_del(&sysfs_entry->kobj); + kobject_put(&sysfs_entry->kobj); +} + + +/* Statistics files do not need to send uevents. */ +static int dmabuf_sysfs_uevent_filter(struct kset *kset, struct kobject *kobj) +{ + return 0; +} + +static const struct kset_uevent_ops dmabuf_sysfs_no_uevent_ops = { + .filter = dmabuf_sysfs_uevent_filter, +}; + +static struct kset *dma_buf_stats_kset; +static struct kset *dma_buf_per_buffer_stats_kset; +int dma_buf_init_sysfs_statistics(void) +{ + dma_buf_stats_kset = kset_create_and_add("dmabuf", + &dmabuf_sysfs_no_uevent_ops, + kernel_kobj); + if (!dma_buf_stats_kset) + return -ENOMEM; + + dma_buf_per_buffer_stats_kset = kset_create_and_add("buffers", + &dmabuf_sysfs_no_uevent_ops, + &dma_buf_stats_kset->kobj); + if (!dma_buf_per_buffer_stats_kset) { + kset_unregister(dma_buf_stats_kset); + return -ENOMEM; + } + + return 0; +} + +void dma_buf_uninit_sysfs_statistics(void) +{ + kset_unregister(dma_buf_per_buffer_stats_kset); + kset_unregister(dma_buf_stats_kset); +} + +int dma_buf_stats_setup(struct dma_buf *dmabuf) +{ + struct dma_buf_sysfs_entry *sysfs_entry; + int ret; + + if (!dmabuf || !dmabuf->file) + return -EINVAL; + + if (!dmabuf->exp_name) { + pr_err("exporter name must not be empty if stats needed\n"); + return -EINVAL; + } + + sysfs_entry = kzalloc(sizeof(struct dma_buf_sysfs_entry), GFP_KERNEL); + if (!sysfs_entry) + return -ENOMEM; + + sysfs_entry->kobj.kset = dma_buf_per_buffer_stats_kset; + sysfs_entry->dmabuf = dmabuf; + + dmabuf->sysfs_entry = sysfs_entry; + + /* create the directory for buffer stats */ + ret = kobject_init_and_add(&sysfs_entry->kobj, &dma_buf_ktype, NULL, + "%lu", file_inode(dmabuf->file)->i_ino); + if (ret) + goto err_sysfs_dmabuf; + + return 0; + +err_sysfs_dmabuf: + kobject_put(&sysfs_entry->kobj); + dmabuf->sysfs_entry = NULL; + return ret; +} diff --git a/drivers/dma-buf/dma-buf-sysfs-stats.h b/drivers/dma-buf/dma-buf-sysfs-stats.h new file mode 100644 index 000000000000..a49c6e2650cc --- /dev/null +++ b/drivers/dma-buf/dma-buf-sysfs-stats.h @@ -0,0 +1,35 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * DMA-BUF sysfs statistics. + * + * Copyright (C) 2021 Google LLC. + */ + +#ifndef _DMA_BUF_SYSFS_STATS_H +#define _DMA_BUF_SYSFS_STATS_H + +#ifdef CONFIG_DMABUF_SYSFS_STATS + +int dma_buf_init_sysfs_statistics(void); +void dma_buf_uninit_sysfs_statistics(void); + +int dma_buf_stats_setup(struct dma_buf *dmabuf); + +void dma_buf_stats_teardown(struct dma_buf *dmabuf); +#else + +static inline int dma_buf_init_sysfs_statistics(void) +{ + return 0; +} + +static inline void dma_buf_uninit_sysfs_statistics(void) {} + +static inline int dma_buf_stats_setup(struct dma_buf *dmabuf) +{ + return 0; +} + +static inline void dma_buf_stats_teardown(struct dma_buf *dmabuf) {} +#endif +#endif // _DMA_BUF_SYSFS_STATS_H diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c index 511fe0d217a0..63d32261b63f 100644 --- a/drivers/dma-buf/dma-buf.c +++ b/drivers/dma-buf/dma-buf.c @@ -29,6 +29,8 @@ #include <uapi/linux/dma-buf.h> #include <uapi/linux/magic.h> +#include "dma-buf-sysfs-stats.h" + static inline int is_dma_buf_file(struct file *); struct dma_buf_list { @@ -74,6 +76,7 @@ static void dma_buf_release(struct dentry *dentry) */ BUG_ON(dmabuf->cb_shared.active || dmabuf->cb_excl.active); + dma_buf_stats_teardown(dmabuf); dmabuf->ops->release(dmabuf); if (dmabuf->resv == (struct dma_resv *)&dmabuf[1]) @@ -580,6 +583,10 @@ struct dma_buf *dma_buf_export(const struct dma_buf_export_info *exp_info) file->f_mode |= FMODE_LSEEK; dmabuf->file = file; + ret = dma_buf_stats_setup(dmabuf); + if (ret) + goto err_sysfs; + mutex_init(&dmabuf->lock); INIT_LIST_HEAD(&dmabuf->attachments); @@ -589,6 +596,14 @@ struct dma_buf *dma_buf_export(const struct dma_buf_export_info *exp_info) return dmabuf; +err_sysfs: + /* + * Set file->f_path.dentry->d_fsdata to NULL so that when + * dma_buf_release() gets invoked by dentry_ops, it exits + * early before calling the release() dma_buf op. + */ + file->f_path.dentry->d_fsdata = NULL; + fput(file); err_dmabuf: kfree(dmabuf); err_module: @@ -926,6 +941,9 @@ EXPORT_SYMBOL_GPL(dma_buf_unpin); * the underlying backing storage is pinned for as long as a mapping exists, * therefore users/importers should not hold onto a mapping for undue amounts of * time. + * + * Important: Dynamic importers must wait for the exclusive fence of the struct + * dma_resv attached to the DMA-BUF first. */ struct sg_table *dma_buf_map_attachment(struct dma_buf_attachment *attach, enum dma_data_direction direction) @@ -992,7 +1010,6 @@ struct sg_table *dma_buf_map_attachment(struct dma_buf_attachment *attach, } } #endif /* CONFIG_DMA_API_DEBUG */ - return sg_table; } EXPORT_SYMBOL_GPL(dma_buf_map_attachment); @@ -1469,6 +1486,12 @@ static inline void dma_buf_uninit_debugfs(void) static int __init dma_buf_init(void) { + int ret; + + ret = dma_buf_init_sysfs_statistics(); + if (ret) + return ret; + dma_buf_mnt = kern_mount(&dma_buf_fs_type); if (IS_ERR(dma_buf_mnt)) return PTR_ERR(dma_buf_mnt); @@ -1484,5 +1507,6 @@ static void __exit dma_buf_deinit(void) { dma_buf_uninit_debugfs(); kern_unmount(dma_buf_mnt); + dma_buf_uninit_sysfs_statistics(); } __exitcall(dma_buf_deinit); diff --git a/drivers/dma-buf/dma-fence-chain.c b/drivers/dma-buf/dma-fence-chain.c index 7d129e68ac70..1b4cb3e5cec9 100644 --- a/drivers/dma-buf/dma-fence-chain.c +++ b/drivers/dma-buf/dma-fence-chain.c @@ -137,6 +137,7 @@ static void dma_fence_chain_cb(struct dma_fence *f, struct dma_fence_cb *cb) struct dma_fence_chain *chain; chain = container_of(cb, typeof(*chain), cb); + init_irq_work(&chain->work, dma_fence_chain_irq_work); irq_work_queue(&chain->work); dma_fence_put(f); } @@ -239,7 +240,6 @@ void dma_fence_chain_init(struct dma_fence_chain *chain, rcu_assign_pointer(chain->prev, prev); chain->fence = fence; chain->prev_seqno = 0; - init_irq_work(&chain->work, dma_fence_chain_irq_work); /* Try to reuse the context of the previous chain node. */ if (prev_chain && __dma_fence_is_later(seqno, prev->seqno, prev->ops)) { diff --git a/drivers/dma-buf/dma-resv.c b/drivers/dma-buf/dma-resv.c index f26c71747d43..e744fd87c63c 100644 --- a/drivers/dma-buf/dma-resv.c +++ b/drivers/dma-buf/dma-resv.c @@ -615,25 +615,21 @@ static inline int dma_resv_test_signaled_single(struct dma_fence *passed_fence) */ bool dma_resv_test_signaled(struct dma_resv *obj, bool test_all) { - unsigned int seq, shared_count; + struct dma_fence *fence; + unsigned int seq; int ret; rcu_read_lock(); retry: ret = true; - shared_count = 0; seq = read_seqcount_begin(&obj->seq); if (test_all) { struct dma_resv_list *fobj = dma_resv_shared_list(obj); - unsigned int i; - - if (fobj) - shared_count = fobj->shared_count; + unsigned int i, shared_count; + shared_count = fobj ? fobj->shared_count : 0; for (i = 0; i < shared_count; ++i) { - struct dma_fence *fence; - fence = rcu_dereference(fobj->shared[i]); ret = dma_resv_test_signaled_single(fence); if (ret < 0) @@ -641,24 +637,19 @@ retry: else if (!ret) break; } - - if (read_seqcount_retry(&obj->seq, seq)) - goto retry; } - if (!shared_count) { - struct dma_fence *fence_excl = dma_resv_excl_fence(obj); - - if (fence_excl) { - ret = dma_resv_test_signaled_single(fence_excl); - if (ret < 0) - goto retry; + fence = dma_resv_excl_fence(obj); + if (ret && fence) { + ret = dma_resv_test_signaled_single(fence); + if (ret < 0) + goto retry; - if (read_seqcount_retry(&obj->seq, seq)) - goto retry; - } } + if (read_seqcount_retry(&obj->seq, seq)) + goto retry; + rcu_read_unlock(); return ret; } diff --git a/drivers/dma-buf/st-dma-fence-chain.c b/drivers/dma-buf/st-dma-fence-chain.c index 9525f7f56119..8ce1ea59d31b 100644 --- a/drivers/dma-buf/st-dma-fence-chain.c +++ b/drivers/dma-buf/st-dma-fence-chain.c @@ -58,28 +58,20 @@ static struct dma_fence *mock_fence(void) return &f->base; } -static inline struct mock_chain { - struct dma_fence_chain base; -} *to_mock_chain(struct dma_fence *f) { - return container_of(f, struct mock_chain, base.base); -} - static struct dma_fence *mock_chain(struct dma_fence *prev, struct dma_fence *fence, u64 seqno) { - struct mock_chain *f; + struct dma_fence_chain *f; - f = kmalloc(sizeof(*f), GFP_KERNEL); + f = dma_fence_chain_alloc(); if (!f) return NULL; - dma_fence_chain_init(&f->base, - dma_fence_get(prev), - dma_fence_get(fence), + dma_fence_chain_init(f, dma_fence_get(prev), dma_fence_get(fence), seqno); - return &f->base.base; + return &f->base; } static int sanitycheck(void *arg) diff --git a/drivers/dma-buf/udmabuf.c b/drivers/dma-buf/udmabuf.c index db732f71e59a..c57a609db75b 100644 --- a/drivers/dma-buf/udmabuf.c +++ b/drivers/dma-buf/udmabuf.c @@ -11,9 +11,15 @@ #include <linux/shmem_fs.h> #include <linux/slab.h> #include <linux/udmabuf.h> +#include <linux/hugetlb.h> -static const u32 list_limit = 1024; /* udmabuf_create_list->count limit */ -static const size_t size_limit_mb = 64; /* total dmabuf size, in megabytes */ +static int list_limit = 1024; +module_param(list_limit, int, 0644); +MODULE_PARM_DESC(list_limit, "udmabuf_create_list->count limit. Default is 1024."); + +static int size_limit_mb = 64; +module_param(size_limit_mb, int, 0644); +MODULE_PARM_DESC(size_limit_mb, "Max size of a dmabuf, in megabytes. Default is 64."); struct udmabuf { pgoff_t pagecount; @@ -160,10 +166,13 @@ static long udmabuf_create(struct miscdevice *device, { DEFINE_DMA_BUF_EXPORT_INFO(exp_info); struct file *memfd = NULL; + struct address_space *mapping = NULL; struct udmabuf *ubuf; struct dma_buf *buf; pgoff_t pgoff, pgcnt, pgidx, pgbuf = 0, pglimit; - struct page *page; + struct page *page, *hpage = NULL; + pgoff_t subpgoff, maxsubpgs; + struct hstate *hpstate; int seals, ret = -EINVAL; u32 i, flags; @@ -194,7 +203,8 @@ static long udmabuf_create(struct miscdevice *device, memfd = fget(list[i].memfd); if (!memfd) goto err; - if (!shmem_mapping(file_inode(memfd)->i_mapping)) + mapping = file_inode(memfd)->i_mapping; + if (!shmem_mapping(mapping) && !is_file_hugepages(memfd)) goto err; seals = memfd_fcntl(memfd, F_GET_SEALS, 0); if (seals == -EINVAL) @@ -205,17 +215,48 @@ static long udmabuf_create(struct miscdevice *device, goto err; pgoff = list[i].offset >> PAGE_SHIFT; pgcnt = list[i].size >> PAGE_SHIFT; + if (is_file_hugepages(memfd)) { + hpstate = hstate_file(memfd); + pgoff = list[i].offset >> huge_page_shift(hpstate); + subpgoff = (list[i].offset & + ~huge_page_mask(hpstate)) >> PAGE_SHIFT; + maxsubpgs = huge_page_size(hpstate) >> PAGE_SHIFT; + } for (pgidx = 0; pgidx < pgcnt; pgidx++) { - page = shmem_read_mapping_page( - file_inode(memfd)->i_mapping, pgoff + pgidx); - if (IS_ERR(page)) { - ret = PTR_ERR(page); - goto err; + if (is_file_hugepages(memfd)) { + if (!hpage) { + hpage = find_get_page_flags(mapping, pgoff, + FGP_ACCESSED); + if (!hpage) { + ret = -EINVAL; + goto err; + } + } + page = hpage + subpgoff; + get_page(page); + subpgoff++; + if (subpgoff == maxsubpgs) { + put_page(hpage); + hpage = NULL; + subpgoff = 0; + pgoff++; + } + } else { + page = shmem_read_mapping_page(mapping, + pgoff + pgidx); + if (IS_ERR(page)) { + ret = PTR_ERR(page); + goto err; + } } ubuf->pages[pgbuf++] = page; } fput(memfd); memfd = NULL; + if (hpage) { + put_page(hpage); + hpage = NULL; + } } exp_info.ops = &udmabuf_ops; diff --git a/drivers/firmware/Kconfig b/drivers/firmware/Kconfig index 1db738d5b301..5d3fd803d2bb 100644 --- a/drivers/firmware/Kconfig +++ b/drivers/firmware/Kconfig @@ -251,6 +251,38 @@ config QCOM_SCM_DOWNLOAD_MODE_DEFAULT Say Y here to enable "download mode" by default. +config SYSFB + bool + default y + depends on X86 || EFI + +config SYSFB_SIMPLEFB + bool "Mark VGA/VBE/EFI FB as generic system framebuffer" + depends on SYSFB + help + Firmwares often provide initial graphics framebuffers so the BIOS, + bootloader or kernel can show basic video-output during boot for + user-guidance and debugging. Historically, x86 used the VESA BIOS + Extensions and EFI-framebuffers for this, which are mostly limited + to x86 BIOS or EFI systems. + This option, if enabled, marks VGA/VBE/EFI framebuffers as generic + framebuffers so the new generic system-framebuffer drivers can be + used instead. If the framebuffer is not compatible with the generic + modes, it is advertised as fallback platform framebuffer so legacy + drivers like efifb, vesafb and uvesafb can pick it up. + If this option is not selected, all system framebuffers are always + marked as fallback platform framebuffers as usual. + + Note: Legacy fbdev drivers, including vesafb, efifb, uvesafb, will + not be able to pick up generic system framebuffers if this option + is selected. You are highly encouraged to enable simplefb as + replacement if you select this option. simplefb can correctly deal + with generic system framebuffers. But you should still keep vesafb + and others enabled as fallback if a system framebuffer is + incompatible with simplefb. + + If unsure, say Y. + config TI_SCI_PROTOCOL tristate "TI System Control Interface (TISCI) Message Protocol" depends on TI_MESSAGE_MANAGER diff --git a/drivers/firmware/Makefile b/drivers/firmware/Makefile index 546ac8e7f6d0..705fabe88156 100644 --- a/drivers/firmware/Makefile +++ b/drivers/firmware/Makefile @@ -18,6 +18,8 @@ obj-$(CONFIG_FIRMWARE_MEMMAP) += memmap.o obj-$(CONFIG_RASPBERRYPI_FIRMWARE) += raspberrypi.o obj-$(CONFIG_FW_CFG_SYSFS) += qemu_fw_cfg.o obj-$(CONFIG_QCOM_SCM) += qcom_scm.o qcom_scm-smc.o qcom_scm-legacy.o +obj-$(CONFIG_SYSFB) += sysfb.o +obj-$(CONFIG_SYSFB_SIMPLEFB) += sysfb_simplefb.o obj-$(CONFIG_TI_SCI_PROTOCOL) += ti_sci.o obj-$(CONFIG_TRUSTED_FOUNDATIONS) += trusted_foundations.o obj-$(CONFIG_TURRIS_MOX_RWTM) += turris-mox-rwtm.o diff --git a/drivers/firmware/efi/Makefile b/drivers/firmware/efi/Makefile index 467e94259679..c02ff25dd477 100644 --- a/drivers/firmware/efi/Makefile +++ b/drivers/firmware/efi/Makefile @@ -36,6 +36,8 @@ obj-$(CONFIG_LOAD_UEFI_KEYS) += mokvar-table.o fake_map-y += fake_mem.o fake_map-$(CONFIG_X86) += x86_fake_mem.o +obj-$(CONFIG_SYSFB) += sysfb_efi.o + arm-obj-$(CONFIG_EFI) := efi-init.o arm-runtime.o obj-$(CONFIG_ARM) += $(arm-obj-y) obj-$(CONFIG_ARM64) += $(arm-obj-y) diff --git a/drivers/firmware/efi/efi-init.c b/drivers/firmware/efi/efi-init.c index a552a08a1741..b19ce1a83f91 100644 --- a/drivers/firmware/efi/efi-init.c +++ b/drivers/firmware/efi/efi-init.c @@ -275,93 +275,3 @@ void __init efi_init(void) } #endif } - -static bool efifb_overlaps_pci_range(const struct of_pci_range *range) -{ - u64 fb_base = screen_info.lfb_base; - - if (screen_info.capabilities & VIDEO_CAPABILITY_64BIT_BASE) - fb_base |= (u64)(unsigned long)screen_info.ext_lfb_base << 32; - - return fb_base >= range->cpu_addr && - fb_base < (range->cpu_addr + range->size); -} - -static struct device_node *find_pci_overlap_node(void) -{ - struct device_node *np; - - for_each_node_by_type(np, "pci") { - struct of_pci_range_parser parser; - struct of_pci_range range; - int err; - - err = of_pci_range_parser_init(&parser, np); - if (err) { - pr_warn("of_pci_range_parser_init() failed: %d\n", err); - continue; - } - - for_each_of_pci_range(&parser, &range) - if (efifb_overlaps_pci_range(&range)) - return np; - } - return NULL; -} - -/* - * If the efifb framebuffer is backed by a PCI graphics controller, we have - * to ensure that this relation is expressed using a device link when - * running in DT mode, or the probe order may be reversed, resulting in a - * resource reservation conflict on the memory window that the efifb - * framebuffer steals from the PCIe host bridge. - */ -static int efifb_add_links(struct fwnode_handle *fwnode) -{ - struct device_node *sup_np; - - sup_np = find_pci_overlap_node(); - - /* - * If there's no PCI graphics controller backing the efifb, we are - * done here. - */ - if (!sup_np) - return 0; - - fwnode_link_add(fwnode, of_fwnode_handle(sup_np)); - of_node_put(sup_np); - - return 0; -} - -static const struct fwnode_operations efifb_fwnode_ops = { - .add_links = efifb_add_links, -}; - -static struct fwnode_handle efifb_fwnode; - -static int __init register_gop_device(void) -{ - struct platform_device *pd; - int err; - - if (screen_info.orig_video_isVGA != VIDEO_TYPE_EFI) - return 0; - - pd = platform_device_alloc("efi-framebuffer", 0); - if (!pd) - return -ENOMEM; - - if (IS_ENABLED(CONFIG_PCI)) { - fwnode_init(&efifb_fwnode, &efifb_fwnode_ops); - pd->dev.fwnode = &efifb_fwnode; - } - - err = platform_device_add_data(pd, &screen_info, sizeof(screen_info)); - if (err) - return err; - - return platform_device_add(pd); -} -subsys_initcall(register_gop_device); diff --git a/arch/x86/kernel/sysfb_efi.c b/drivers/firmware/efi/sysfb_efi.c index 8a56a6d80098..4c7c9dd7733f 100644 --- a/arch/x86/kernel/sysfb_efi.c +++ b/drivers/firmware/efi/sysfb_efi.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0-or-later /* - * Generic System Framebuffers on x86 + * Generic System Framebuffers * Copyright (c) 2012-2013 David Herrmann <dh.herrmann@gmail.com> * * EFI Quirks Copyright (c) 2006 Edgar Hucek <gimli@dark-green.com> @@ -19,12 +19,14 @@ #include <linux/init.h> #include <linux/kernel.h> #include <linux/mm.h> +#include <linux/of_address.h> #include <linux/pci.h> +#include <linux/platform_device.h> #include <linux/screen_info.h> +#include <linux/sysfb.h> #include <video/vga.h> #include <asm/efi.h> -#include <asm/sysfb.h> enum { OVERRIDE_NONE = 0x0, @@ -267,7 +269,73 @@ static const struct dmi_system_id efifb_dmi_swap_width_height[] __initconst = { {}, }; -__init void sysfb_apply_efi_quirks(void) +static bool efifb_overlaps_pci_range(const struct of_pci_range *range) +{ + u64 fb_base = screen_info.lfb_base; + + if (screen_info.capabilities & VIDEO_CAPABILITY_64BIT_BASE) + fb_base |= (u64)(unsigned long)screen_info.ext_lfb_base << 32; + + return fb_base >= range->cpu_addr && + fb_base < (range->cpu_addr + range->size); +} + +static struct device_node *find_pci_overlap_node(void) +{ + struct device_node *np; + + for_each_node_by_type(np, "pci") { + struct of_pci_range_parser parser; + struct of_pci_range range; + int err; + + err = of_pci_range_parser_init(&parser, np); + if (err) { + pr_warn("of_pci_range_parser_init() failed: %d\n", err); + continue; + } + + for_each_of_pci_range(&parser, &range) + if (efifb_overlaps_pci_range(&range)) + return np; + } + return NULL; +} + +/* + * If the efifb framebuffer is backed by a PCI graphics controller, we have + * to ensure that this relation is expressed using a device link when + * running in DT mode, or the probe order may be reversed, resulting in a + * resource reservation conflict on the memory window that the efifb + * framebuffer steals from the PCIe host bridge. + */ +static int efifb_add_links(struct fwnode_handle *fwnode) +{ + struct device_node *sup_np; + + sup_np = find_pci_overlap_node(); + + /* + * If there's no PCI graphics controller backing the efifb, we are + * done here. + */ + if (!sup_np) + return 0; + + fwnode_link_add(fwnode, of_fwnode_handle(sup_np)); + of_node_put(sup_np); + + return 0; +} + +static const struct fwnode_operations efifb_fwnode_ops = { + .add_links = efifb_add_links, +}; + +#ifdef CONFIG_EFI +static struct fwnode_handle efifb_fwnode; + +__init void sysfb_apply_efi_quirks(struct platform_device *pd) { if (screen_info.orig_video_isVGA != VIDEO_TYPE_EFI || !(screen_info.capabilities & VIDEO_CAPABILITY_SKIP_QUIRKS)) @@ -281,4 +349,10 @@ __init void sysfb_apply_efi_quirks(void) screen_info.lfb_height = temp; screen_info.lfb_linelength = 4 * screen_info.lfb_width; } + + if (screen_info.orig_video_isVGA == VIDEO_TYPE_EFI && IS_ENABLED(CONFIG_PCI)) { + fwnode_init(&efifb_fwnode, &efifb_fwnode_ops); + pd->dev.fwnode = &efifb_fwnode; + } } +#endif diff --git a/arch/x86/kernel/sysfb.c b/drivers/firmware/sysfb.c index 014ebd8ca869..2bfbb05f7d89 100644 --- a/arch/x86/kernel/sysfb.c +++ b/drivers/firmware/sysfb.c @@ -1,11 +1,11 @@ // SPDX-License-Identifier: GPL-2.0-or-later /* - * Generic System Framebuffers on x86 + * Generic System Framebuffers * Copyright (c) 2012-2013 David Herrmann <dh.herrmann@gmail.com> */ /* - * Simple-Framebuffer support for x86 systems + * Simple-Framebuffer support * Create a platform-device for any available boot framebuffer. The * simple-framebuffer platform device is already available on DT systems, so * this module parses the global "screen_info" object and creates a suitable @@ -16,12 +16,12 @@ * to pick these devices up without messing with simple-framebuffer drivers. * The global "screen_info" is still valid at all times. * - * If CONFIG_X86_SYSFB is not selected, we never register "simple-framebuffer" + * If CONFIG_SYSFB_SIMPLEFB is not selected, never register "simple-framebuffer" * platform devices, but only use legacy framebuffer devices for * backwards compatibility. * * TODO: We set the dev_id field of all platform-devices to 0. This allows - * other x86 OF/DT parsers to create such devices, too. However, they must + * other OF/DT parsers to create such devices, too. However, they must * start at offset 1 for this to work. */ @@ -32,7 +32,7 @@ #include <linux/platform_data/simplefb.h> #include <linux/platform_device.h> #include <linux/screen_info.h> -#include <asm/sysfb.h> +#include <linux/sysfb.h> static __init int sysfb_init(void) { @@ -43,12 +43,10 @@ static __init int sysfb_init(void) bool compatible; int ret; - sysfb_apply_efi_quirks(); - /* try to create a simple-framebuffer device */ - compatible = parse_mode(si, &mode); + compatible = sysfb_parse_mode(si, &mode); if (compatible) { - ret = create_simplefb(si, &mode); + ret = sysfb_create_simplefb(si, &mode); if (!ret) return 0; } @@ -61,9 +59,24 @@ static __init int sysfb_init(void) else name = "platform-framebuffer"; - pd = platform_device_register_resndata(NULL, name, 0, - NULL, 0, si, sizeof(*si)); - return PTR_ERR_OR_ZERO(pd); + pd = platform_device_alloc(name, 0); + if (!pd) + return -ENOMEM; + + sysfb_apply_efi_quirks(pd); + + ret = platform_device_add_data(pd, si, sizeof(*si)); + if (ret) + goto err; + + ret = platform_device_add(pd); + if (ret) + goto err; + + return 0; +err: + platform_device_put(pd); + return ret; } /* must execute after PCI subsystem for EFI quirks */ diff --git a/arch/x86/kernel/sysfb_simplefb.c b/drivers/firmware/sysfb_simplefb.c index 298fc1edd9c9..b86761904949 100644 --- a/arch/x86/kernel/sysfb_simplefb.c +++ b/drivers/firmware/sysfb_simplefb.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0-or-later /* - * Generic System Framebuffers on x86 + * Generic System Framebuffers * Copyright (c) 2012-2013 David Herrmann <dh.herrmann@gmail.com> */ @@ -18,14 +18,14 @@ #include <linux/platform_data/simplefb.h> #include <linux/platform_device.h> #include <linux/screen_info.h> -#include <asm/sysfb.h> +#include <linux/sysfb.h> static const char simplefb_resname[] = "BOOTFB"; static const struct simplefb_format formats[] = SIMPLEFB_FORMATS; -/* try parsing x86 screen_info into a simple-framebuffer mode struct */ -__init bool parse_mode(const struct screen_info *si, - struct simplefb_platform_data *mode) +/* try parsing screen_info into a simple-framebuffer mode struct */ +__init bool sysfb_parse_mode(const struct screen_info *si, + struct simplefb_platform_data *mode) { const struct simplefb_format *f; __u8 type; @@ -57,13 +57,14 @@ __init bool parse_mode(const struct screen_info *si, return false; } -__init int create_simplefb(const struct screen_info *si, - const struct simplefb_platform_data *mode) +__init int sysfb_create_simplefb(const struct screen_info *si, + const struct simplefb_platform_data *mode) { struct platform_device *pd; struct resource res; u64 base, size; u32 length; + int ret; /* * If the 64BIT_BASE capability is set, ext_lfb_base will contain the @@ -105,7 +106,19 @@ __init int create_simplefb(const struct screen_info *si, if (res.end <= res.start) return -EINVAL; - pd = platform_device_register_resndata(NULL, "simple-framebuffer", 0, - &res, 1, mode, sizeof(*mode)); - return PTR_ERR_OR_ZERO(pd); + pd = platform_device_alloc("simple-framebuffer", 0); + if (!pd) + return -ENOMEM; + + sysfb_apply_efi_quirks(pd); + + ret = platform_device_add_resources(pd, &res, 1); + if (ret) + return ret; + + ret = platform_device_add_data(pd, mode, sizeof(*mode)); + if (ret) + return ret; + + return platform_device_add(pd); } diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig index 7ff89690a976..cea777ae7fb9 100644 --- a/drivers/gpu/drm/Kconfig +++ b/drivers/gpu/drm/Kconfig @@ -35,6 +35,11 @@ config DRM_MIPI_DSI bool depends on DRM +config DRM_DP_AUX_BUS + tristate + depends on DRM + depends on OF + config DRM_DP_AUX_CHARDEV bool "DRM DP AUX Interface" depends on DRM @@ -251,7 +256,6 @@ config DRM_AMDGPU select HWMON select BACKLIGHT_CLASS_DEVICE select INTERVAL_TREE - select CHASH help Choose this option if you have a recent AMD Radeon graphics card. @@ -317,8 +321,6 @@ source "drivers/gpu/drm/tilcdc/Kconfig" source "drivers/gpu/drm/qxl/Kconfig" -source "drivers/gpu/drm/bochs/Kconfig" - source "drivers/gpu/drm/virtio/Kconfig" source "drivers/gpu/drm/msm/Kconfig" diff --git a/drivers/gpu/drm/Makefile b/drivers/gpu/drm/Makefile index a118692a6df7..ad1112154898 100644 --- a/drivers/gpu/drm/Makefile +++ b/drivers/gpu/drm/Makefile @@ -33,6 +33,8 @@ drm-$(CONFIG_PCI) += drm_pci.o drm-$(CONFIG_DEBUG_FS) += drm_debugfs.o drm_debugfs_crc.o drm-$(CONFIG_DRM_LOAD_EDID_FIRMWARE) += drm_edid_load.o +obj-$(CONFIG_DRM_DP_AUX_BUS) += drm_dp_aux_bus.o + drm_vram_helper-y := drm_gem_vram_helper.o obj-$(CONFIG_DRM_VRAM_HELPER) += drm_vram_helper.o @@ -96,7 +98,6 @@ obj-y += omapdrm/ obj-$(CONFIG_DRM_SUN4I) += sun4i/ obj-y += tilcdc/ obj-$(CONFIG_DRM_QXL) += qxl/ -obj-$(CONFIG_DRM_BOCHS) += bochs/ obj-$(CONFIG_DRM_VIRTIO_GPU) += virtio/ obj-$(CONFIG_DRM_MSM) += msm/ obj-$(CONFIG_DRM_TEGRA) += tegra/ diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile index c56320e78c0e..8d0748184a14 100644 --- a/drivers/gpu/drm/amd/amdgpu/Makefile +++ b/drivers/gpu/drm/amd/amdgpu/Makefile @@ -57,7 +57,8 @@ amdgpu-y += amdgpu_device.o amdgpu_kms.o \ amdgpu_xgmi.o amdgpu_csa.o amdgpu_ras.o amdgpu_vm_cpu.o \ amdgpu_vm_sdma.o amdgpu_discovery.o amdgpu_ras_eeprom.o amdgpu_nbio.o \ amdgpu_umc.o smu_v11_0_i2c.o amdgpu_fru_eeprom.o amdgpu_rap.o \ - amdgpu_fw_attestation.o amdgpu_securedisplay.o amdgpu_hdp.o + amdgpu_fw_attestation.o amdgpu_securedisplay.o amdgpu_hdp.o \ + amdgpu_eeprom.o amdgpu_mca.o amdgpu-$(CONFIG_PROC_FS) += amdgpu_fdinfo.o @@ -75,7 +76,7 @@ amdgpu-y += \ vega20_reg_init.o nbio_v7_4.o nbio_v2_3.o nv.o navi10_reg_init.o navi14_reg_init.o \ arct_reg_init.o navi12_reg_init.o mxgpu_nv.o sienna_cichlid_reg_init.o vangogh_reg_init.o \ nbio_v7_2.o dimgrey_cavefish_reg_init.o hdp_v4_0.o hdp_v5_0.o aldebaran_reg_init.o aldebaran.o \ - beige_goby_reg_init.o yellow_carp_reg_init.o + beige_goby_reg_init.o yellow_carp_reg_init.o cyan_skillfish_reg_init.o # add DF block amdgpu-y += \ @@ -111,6 +112,7 @@ amdgpu-y += \ psp_v3_1.o \ psp_v10_0.o \ psp_v11_0.o \ + psp_v11_0_8.o \ psp_v12_0.o \ psp_v13_0.o @@ -118,7 +120,7 @@ amdgpu-y += \ amdgpu-y += \ dce_v10_0.o \ dce_v11_0.o \ - dce_virtual.o + amdgpu_vkms.o # add GFX block amdgpu-y += \ @@ -187,6 +189,10 @@ amdgpu-y += \ amdgpu-y += \ amdgpu_reset.o +# add MCA block +amdgpu-y += \ + mca_v3_0.o + # add amdkfd interfaces amdgpu-y += amdgpu_amdkfd.o diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 8ac6eb9f1fdb..dc3c6b3a00e5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -108,6 +108,7 @@ #include "amdgpu_df.h" #include "amdgpu_smuio.h" #include "amdgpu_fdinfo.h" +#include "amdgpu_mca.h" #define MAX_GPU_INSTANCE 16 @@ -916,6 +917,7 @@ struct amdgpu_device { /* display */ bool enable_virtual_display; + struct amdgpu_vkms_output *amdgpu_vkms_output; struct amdgpu_mode_info mode_info; /* For pre-DCE11. DCE11 and later are in "struct amdgpu_device->dm" */ struct work_struct hotplug_work; @@ -1008,6 +1010,9 @@ struct amdgpu_device { /* df */ struct amdgpu_df df; + /* MCA */ + struct amdgpu_mca mca; + struct amdgpu_ip_block ip_blocks[AMDGPU_MAX_IP_NUM]; uint32_t harvest_ip_mask; int num_ip_blocks; @@ -1108,8 +1113,13 @@ void amdgpu_device_fini_sw(struct amdgpu_device *adev); int amdgpu_gpu_wait_for_idle(struct amdgpu_device *adev); +void amdgpu_device_mm_access(struct amdgpu_device *adev, loff_t pos, + void *buf, size_t size, bool write); +size_t amdgpu_device_aper_access(struct amdgpu_device *adev, loff_t pos, + void *buf, size_t size, bool write); + void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos, - uint32_t *buf, size_t size, bool write); + void *buf, size_t size, bool write); uint32_t amdgpu_device_rreg(struct amdgpu_device *adev, uint32_t reg, uint32_t acc_flags); void amdgpu_device_wreg(struct amdgpu_device *adev, @@ -1265,6 +1275,8 @@ int emu_soc_asic_init(struct amdgpu_device *adev); #define amdgpu_inc_vram_lost(adev) atomic_inc(&((adev)->vram_lost_counter)); +#define MIN(X, Y) ((X) < (Y) ? (X) : (Y)) + /* Common functions */ bool amdgpu_device_has_job_running(struct amdgpu_device *adev); bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev); @@ -1385,12 +1397,12 @@ int amdgpu_acpi_smart_shift_update(struct drm_device *dev, enum amdgpu_ss ss_sta int amdgpu_acpi_pcie_notify_device_ready(struct amdgpu_device *adev); void amdgpu_acpi_get_backlight_caps(struct amdgpu_dm_backlight_caps *caps); -bool amdgpu_acpi_is_s0ix_supported(struct amdgpu_device *adev); +bool amdgpu_acpi_is_s0ix_active(struct amdgpu_device *adev); void amdgpu_acpi_detect(void); #else static inline int amdgpu_acpi_init(struct amdgpu_device *adev) { return 0; } static inline void amdgpu_acpi_fini(struct amdgpu_device *adev) { } -static inline bool amdgpu_acpi_is_s0ix_supported(struct amdgpu_device *adev) { return false; } +static inline bool amdgpu_acpi_is_s0ix_active(struct amdgpu_device *adev) { return false; } static inline void amdgpu_acpi_detect(void) { } static inline bool amdgpu_acpi_is_power_shift_control_supported(void) { return false; } static inline int amdgpu_acpi_power_shift_control(struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c index b8655ff73a65..cc9c9f8b23b2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c @@ -160,17 +160,28 @@ static int acp_poweron(struct generic_pm_domain *genpd) return 0; } -static struct device *get_mfd_cell_dev(const char *device_name, int r) +static int acp_genpd_add_device(struct device *dev, void *data) { - char auto_dev_name[25]; - struct device *dev; + struct generic_pm_domain *gpd = data; + int ret; - snprintf(auto_dev_name, sizeof(auto_dev_name), - "%s.%d.auto", device_name, r); - dev = bus_find_device_by_name(&platform_bus_type, NULL, auto_dev_name); - dev_info(dev, "device %s added to pm domain\n", auto_dev_name); + ret = pm_genpd_add_device(gpd, dev); + if (ret) + dev_err(dev, "Failed to add dev to genpd %d\n", ret); - return dev; + return ret; +} + +static int acp_genpd_remove_device(struct device *dev, void *data) +{ + int ret; + + ret = pm_genpd_remove_device(dev); + if (ret) + dev_err(dev, "Failed to remove dev from genpd %d\n", ret); + + /* Continue to remove */ + return 0; } /** @@ -181,11 +192,10 @@ static struct device *get_mfd_cell_dev(const char *device_name, int r) */ static int acp_hw_init(void *handle) { - int r, i; + int r; uint64_t acp_base; u32 val = 0; u32 count = 0; - struct device *dev; struct i2s_platform_data *i2s_pdata = NULL; struct amdgpu_device *adev = (struct amdgpu_device *)handle; @@ -341,15 +351,10 @@ static int acp_hw_init(void *handle) if (r) goto failure; - for (i = 0; i < ACP_DEVS ; i++) { - dev = get_mfd_cell_dev(adev->acp.acp_cell[i].name, i); - r = pm_genpd_add_device(&adev->acp.acp_genpd->gpd, dev); - if (r) { - dev_err(dev, "Failed to add dev to genpd\n"); - goto failure; - } - } - + r = device_for_each_child(adev->acp.parent, &adev->acp.acp_genpd->gpd, + acp_genpd_add_device); + if (r) + goto failure; /* Assert Soft reset of ACP */ val = cgs_read_register(adev->acp.cgs_device, mmACP_SOFT_RESET); @@ -410,10 +415,8 @@ failure: */ static int acp_hw_fini(void *handle) { - int i, ret; u32 val = 0; u32 count = 0; - struct device *dev; struct amdgpu_device *adev = (struct amdgpu_device *)handle; /* return early if no ACP */ @@ -458,13 +461,8 @@ static int acp_hw_fini(void *handle) udelay(100); } - for (i = 0; i < ACP_DEVS ; i++) { - dev = get_mfd_cell_dev(adev->acp.acp_cell[i].name, i); - ret = pm_genpd_remove_device(dev); - /* If removal fails, dont giveup and try rest */ - if (ret) - dev_err(dev, "remove dev from genpd failed\n"); - } + device_for_each_child(adev->acp.parent, NULL, + acp_genpd_remove_device); mfd_remove_devices(adev->acp.parent); kfree(adev->acp.acp_res); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c index a9ce3b20d371..4811b0faafd9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c @@ -854,8 +854,8 @@ int amdgpu_acpi_init(struct amdgpu_device *adev) if (amdgpu_device_has_dc_support(adev)) { #if defined(CONFIG_DRM_AMD_DC) struct amdgpu_display_manager *dm = &adev->dm; - if (dm->backlight_dev) - atif->bd = dm->backlight_dev; + if (dm->backlight_dev[0]) + atif->bd = dm->backlight_dev[0]; #endif } else { struct drm_encoder *tmp; @@ -1032,13 +1032,13 @@ void amdgpu_acpi_detect(void) } /** - * amdgpu_acpi_is_s0ix_supported + * amdgpu_acpi_is_s0ix_active * * @adev: amdgpu_device_pointer * * returns true if supported, false if not. */ -bool amdgpu_acpi_is_s0ix_supported(struct amdgpu_device *adev) +bool amdgpu_acpi_is_s0ix_active(struct amdgpu_device *adev) { #if IS_ENABLED(CONFIG_AMD_PMC) && IS_ENABLED(CONFIG_SUSPEND) if (acpi_gbl_FADT.flags & ACPI_FADT_LOW_POWER_S0) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index f9c01bdc3d4c..3003ee1c9487 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -21,6 +21,7 @@ */ #include "amdgpu_amdkfd.h" +#include "amd_pcie.h" #include "amd_shared.h" #include "amdgpu.h" @@ -553,6 +554,88 @@ uint8_t amdgpu_amdkfd_get_xgmi_hops_count(struct kgd_dev *dst, struct kgd_dev *s return (uint8_t)ret; } +int amdgpu_amdkfd_get_xgmi_bandwidth_mbytes(struct kgd_dev *dst, struct kgd_dev *src, bool is_min) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)dst, *peer_adev; + int num_links; + + if (adev->asic_type != CHIP_ALDEBARAN) + return 0; + + if (src) + peer_adev = (struct amdgpu_device *)src; + + /* num links returns 0 for indirect peers since indirect route is unknown. */ + num_links = is_min ? 1 : amdgpu_xgmi_get_num_links(adev, peer_adev); + if (num_links < 0) { + DRM_ERROR("amdgpu: failed to get xgmi num links between node %d and %d. ret = %d\n", + adev->gmc.xgmi.physical_node_id, + peer_adev->gmc.xgmi.physical_node_id, num_links); + num_links = 0; + } + + /* Aldebaran xGMI DPM is defeatured so assume x16 x 25Gbps for bandwidth. */ + return (num_links * 16 * 25000)/BITS_PER_BYTE; +} + +int amdgpu_amdkfd_get_pcie_bandwidth_mbytes(struct kgd_dev *dev, bool is_min) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)dev; + int num_lanes_shift = (is_min ? ffs(adev->pm.pcie_mlw_mask) : + fls(adev->pm.pcie_mlw_mask)) - 1; + int gen_speed_shift = (is_min ? ffs(adev->pm.pcie_gen_mask & + CAIL_PCIE_LINK_SPEED_SUPPORT_MASK) : + fls(adev->pm.pcie_gen_mask & + CAIL_PCIE_LINK_SPEED_SUPPORT_MASK)) - 1; + uint32_t num_lanes_mask = 1 << num_lanes_shift; + uint32_t gen_speed_mask = 1 << gen_speed_shift; + int num_lanes_factor = 0, gen_speed_mbits_factor = 0; + + switch (num_lanes_mask) { + case CAIL_PCIE_LINK_WIDTH_SUPPORT_X1: + num_lanes_factor = 1; + break; + case CAIL_PCIE_LINK_WIDTH_SUPPORT_X2: + num_lanes_factor = 2; + break; + case CAIL_PCIE_LINK_WIDTH_SUPPORT_X4: + num_lanes_factor = 4; + break; + case CAIL_PCIE_LINK_WIDTH_SUPPORT_X8: + num_lanes_factor = 8; + break; + case CAIL_PCIE_LINK_WIDTH_SUPPORT_X12: + num_lanes_factor = 12; + break; + case CAIL_PCIE_LINK_WIDTH_SUPPORT_X16: + num_lanes_factor = 16; + break; + case CAIL_PCIE_LINK_WIDTH_SUPPORT_X32: + num_lanes_factor = 32; + break; + } + + switch (gen_speed_mask) { + case CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1: + gen_speed_mbits_factor = 2500; + break; + case CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2: + gen_speed_mbits_factor = 5000; + break; + case CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3: + gen_speed_mbits_factor = 8000; + break; + case CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4: + gen_speed_mbits_factor = 16000; + break; + case CAIL_PCIE_LINK_SPEED_SUPPORT_GEN5: + gen_speed_mbits_factor = 32000; + break; + } + + return (num_lanes_factor * gen_speed_mbits_factor)/BITS_PER_BYTE; +} + uint64_t amdgpu_amdkfd_get_mmio_remap_phys_addr(struct kgd_dev *kgd) { struct amdgpu_device *adev = (struct amdgpu_device *)kgd; @@ -631,7 +714,6 @@ int amdgpu_amdkfd_submit_ib(struct kgd_dev *kgd, enum kgd_engine_type engine, ret = dma_fence_wait(f, false); err_ib_sched: - dma_fence_put(f); amdgpu_job_free(job); err: return ret; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index cf62f43a03da..ec028cf963f5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h @@ -226,6 +226,8 @@ uint32_t amdgpu_amdkfd_get_num_gws(struct kgd_dev *kgd); uint32_t amdgpu_amdkfd_get_asic_rev_id(struct kgd_dev *kgd); int amdgpu_amdkfd_get_noretry(struct kgd_dev *kgd); uint8_t amdgpu_amdkfd_get_xgmi_hops_count(struct kgd_dev *dst, struct kgd_dev *src); +int amdgpu_amdkfd_get_xgmi_bandwidth_mbytes(struct kgd_dev *dst, struct kgd_dev *src, bool is_min); +int amdgpu_amdkfd_get_pcie_bandwidth_mbytes(struct kgd_dev *dev, bool is_min); /* Read user wptr from a specified user address space with page fault * disabled. The memory must be pinned and mapped to the hardware when @@ -269,7 +271,7 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu( struct kgd_dev *kgd, struct kgd_mem *mem, void *drm_priv, uint64_t *size); int amdgpu_amdkfd_gpuvm_map_memory_to_gpu( - struct kgd_dev *kgd, struct kgd_mem *mem, void *drm_priv); + struct kgd_dev *kgd, struct kgd_mem *mem, void *drm_priv, bool *table_freed); int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu( struct kgd_dev *kgd, struct kgd_mem *mem, void *drm_priv); int amdgpu_amdkfd_gpuvm_sync_memory( @@ -330,7 +332,7 @@ int kgd2kfd_pre_reset(struct kfd_dev *kfd); int kgd2kfd_post_reset(struct kfd_dev *kfd); void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry); void kgd2kfd_set_sram_ecc_flag(struct kfd_dev *kfd); -void kgd2kfd_smi_event_throttle(struct kfd_dev *kfd, uint32_t throttle_bitmask); +void kgd2kfd_smi_event_throttle(struct kfd_dev *kfd, uint64_t throttle_bitmask); #else static inline int kgd2kfd_init(void) { @@ -389,7 +391,7 @@ void kgd2kfd_set_sram_ecc_flag(struct kfd_dev *kfd) } static inline -void kgd2kfd_smi_event_throttle(struct kfd_dev *kfd, uint32_t throttle_bitmask) +void kgd2kfd_smi_event_throttle(struct kfd_dev *kfd, uint64_t throttle_bitmask) { } #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c index a5434b713856..46cd4ee6bafb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c @@ -44,4 +44,5 @@ const struct kfd2kgd_calls aldebaran_kfd2kgd = { .get_atc_vmid_pasid_mapping_info = kgd_gfx_v9_get_atc_vmid_pasid_mapping_info, .set_vm_context_page_table_base = kgd_gfx_v9_set_vm_context_page_table_base, + .program_trap_handler_settings = kgd_gfx_v9_program_trap_handler_settings }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c index 6409d6b1b2df..5a7f680bcb3f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c @@ -305,5 +305,6 @@ const struct kfd2kgd_calls arcturus_kfd2kgd = { kgd_gfx_v9_get_atc_vmid_pasid_mapping_info, .set_vm_context_page_table_base = kgd_gfx_v9_set_vm_context_page_table_base, - .get_cu_occupancy = kgd_gfx_v9_get_cu_occupancy + .get_cu_occupancy = kgd_gfx_v9_get_cu_occupancy, + .program_trap_handler_settings = kgd_gfx_v9_program_trap_handler_settings }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c index 491acdf92f73..960acf68150a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c @@ -560,6 +560,9 @@ static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd, case KFD_PREEMPT_TYPE_WAVEFRONT_RESET: type = RESET_WAVES; break; + case KFD_PREEMPT_TYPE_WAVEFRONT_SAVE: + type = SAVE_WAVES; + break; default: type = DRAIN_PIPE; break; @@ -754,6 +757,33 @@ static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid, adev->gfxhub.funcs->setup_vm_pt_regs(adev, vmid, page_table_base); } +static void program_trap_handler_settings(struct kgd_dev *kgd, + uint32_t vmid, uint64_t tba_addr, uint64_t tma_addr) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + + lock_srbm(kgd, 0, 0, 0, vmid); + + /* + * Program TBA registers + */ + WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_SHADER_TBA_LO), + lower_32_bits(tba_addr >> 8)); + WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_SHADER_TBA_HI), + upper_32_bits(tba_addr >> 8) | + (1 << SQ_SHADER_TBA_HI__TRAP_EN__SHIFT)); + + /* + * Program TMA registers + */ + WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_SHADER_TMA_LO), + lower_32_bits(tma_addr >> 8)); + WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_SHADER_TMA_HI), + upper_32_bits(tma_addr >> 8)); + + unlock_srbm(kgd); +} + const struct kfd2kgd_calls gfx_v10_kfd2kgd = { .program_sh_mem_settings = kgd_program_sh_mem_settings, .set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping, @@ -774,4 +804,5 @@ const struct kfd2kgd_calls gfx_v10_kfd2kgd = { .get_atc_vmid_pasid_mapping_info = get_atc_vmid_pasid_mapping_info, .set_vm_context_page_table_base = set_vm_context_page_table_base, + .program_trap_handler_settings = program_trap_handler_settings, }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c index 1f5620cc3570..dac0d751d5af 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c @@ -537,6 +537,9 @@ static int hqd_destroy_v10_3(struct kgd_dev *kgd, void *mqd, case KFD_PREEMPT_TYPE_WAVEFRONT_RESET: type = RESET_WAVES; break; + case KFD_PREEMPT_TYPE_WAVEFRONT_SAVE: + type = SAVE_WAVES; + break; default: type = DRAIN_PIPE; break; @@ -658,6 +661,33 @@ static void set_vm_context_page_table_base_v10_3(struct kgd_dev *kgd, uint32_t v adev->gfxhub.funcs->setup_vm_pt_regs(adev, vmid, page_table_base); } +static void program_trap_handler_settings_v10_3(struct kgd_dev *kgd, + uint32_t vmid, uint64_t tba_addr, uint64_t tma_addr) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + + lock_srbm(kgd, 0, 0, 0, vmid); + + /* + * Program TBA registers + */ + WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_SHADER_TBA_LO), + lower_32_bits(tba_addr >> 8)); + WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_SHADER_TBA_HI), + upper_32_bits(tba_addr >> 8) | + (1 << SQ_SHADER_TBA_HI__TRAP_EN__SHIFT)); + + /* + * Program TMA registers + */ + WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_SHADER_TMA_LO), + lower_32_bits(tma_addr >> 8)); + WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_SHADER_TMA_HI), + upper_32_bits(tma_addr >> 8)); + + unlock_srbm(kgd); +} + #if 0 uint32_t enable_debug_trap_v10_3(struct kgd_dev *kgd, uint32_t trap_debug_wave_launch_mode, @@ -820,6 +850,7 @@ const struct kfd2kgd_calls gfx_v10_3_kfd2kgd = { .address_watch_get_offset = address_watch_get_offset_v10_3, .get_atc_vmid_pasid_mapping_info = NULL, .set_vm_context_page_table_base = set_vm_context_page_table_base_v10_3, + .program_trap_handler_settings = program_trap_handler_settings_v10_3, #if 0 .enable_debug_trap = enable_debug_trap_v10_3, .disable_debug_trap = disable_debug_trap_v10_3, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c index ed3014fbb563..bcc1cbeb8799 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c @@ -42,7 +42,8 @@ enum hqd_dequeue_request_type { NO_ACTION = 0, DRAIN_PIPE, - RESET_WAVES + RESET_WAVES, + SAVE_WAVES }; static inline struct amdgpu_device *get_amdgpu_device(struct kgd_dev *kgd) @@ -566,6 +567,9 @@ int kgd_gfx_v9_hqd_destroy(struct kgd_dev *kgd, void *mqd, case KFD_PREEMPT_TYPE_WAVEFRONT_RESET: type = RESET_WAVES; break; + case KFD_PREEMPT_TYPE_WAVEFRONT_SAVE: + type = SAVE_WAVES; + break; default: type = DRAIN_PIPE; break; @@ -878,6 +882,32 @@ void kgd_gfx_v9_get_cu_occupancy(struct kgd_dev *kgd, int pasid, adev->gfx.cu_info.max_waves_per_simd; } +void kgd_gfx_v9_program_trap_handler_settings(struct kgd_dev *kgd, + uint32_t vmid, uint64_t tba_addr, uint64_t tma_addr) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + + lock_srbm(kgd, 0, 0, 0, vmid); + + /* + * Program TBA registers + */ + WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_SHADER_TBA_LO), + lower_32_bits(tba_addr >> 8)); + WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_SHADER_TBA_HI), + upper_32_bits(tba_addr >> 8)); + + /* + * Program TMA registers + */ + WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_SHADER_TMA_LO), + lower_32_bits(tma_addr >> 8)); + WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_SHADER_TMA_HI), + upper_32_bits(tma_addr >> 8)); + + unlock_srbm(kgd); +} + const struct kfd2kgd_calls gfx_v9_kfd2kgd = { .program_sh_mem_settings = kgd_gfx_v9_program_sh_mem_settings, .set_pasid_vmid_mapping = kgd_gfx_v9_set_pasid_vmid_mapping, @@ -899,4 +929,5 @@ const struct kfd2kgd_calls gfx_v9_kfd2kgd = { kgd_gfx_v9_get_atc_vmid_pasid_mapping_info, .set_vm_context_page_table_base = kgd_gfx_v9_set_vm_context_page_table_base, .get_cu_occupancy = kgd_gfx_v9_get_cu_occupancy, + .program_trap_handler_settings = kgd_gfx_v9_program_trap_handler_settings, }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h index e64deba8900f..c63591106879 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h @@ -65,3 +65,5 @@ void kgd_gfx_v9_set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid, uint64_t page_table_base); void kgd_gfx_v9_get_cu_occupancy(struct kgd_dev *kgd, int pasid, int *pasid_wave_cnt, int *max_waves_per_cu); +void kgd_gfx_v9_program_trap_handler_settings(struct kgd_dev *kgd, + uint32_t vmid, uint64_t tba_addr, uint64_t tma_addr); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index 4fb15750b9bb..2d6b2d77b738 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -1057,7 +1057,8 @@ static void unmap_bo_from_gpuvm(struct kgd_mem *mem, static int update_gpuvm_pte(struct kgd_mem *mem, struct kfd_mem_attachment *entry, - struct amdgpu_sync *sync) + struct amdgpu_sync *sync, + bool *table_freed) { struct amdgpu_bo_va *bo_va = entry->bo_va; struct amdgpu_device *adev = entry->adev; @@ -1068,7 +1069,7 @@ static int update_gpuvm_pte(struct kgd_mem *mem, return ret; /* Update the page tables */ - ret = amdgpu_vm_bo_update(adev, bo_va, false); + ret = amdgpu_vm_bo_update(adev, bo_va, false, table_freed); if (ret) { pr_err("amdgpu_vm_bo_update failed\n"); return ret; @@ -1080,7 +1081,8 @@ static int update_gpuvm_pte(struct kgd_mem *mem, static int map_bo_to_gpuvm(struct kgd_mem *mem, struct kfd_mem_attachment *entry, struct amdgpu_sync *sync, - bool no_update_pte) + bool no_update_pte, + bool *table_freed) { int ret; @@ -1097,7 +1099,7 @@ static int map_bo_to_gpuvm(struct kgd_mem *mem, if (no_update_pte) return 0; - ret = update_gpuvm_pte(mem, entry, sync); + ret = update_gpuvm_pte(mem, entry, sync, table_freed); if (ret) { pr_err("update_gpuvm_pte() failed\n"); goto update_gpuvm_pte_failed; @@ -1285,11 +1287,22 @@ int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct kgd_dev *kgd, if (avm->process_info) return -EINVAL; + /* Free the original amdgpu allocated pasid, + * will be replaced with kfd allocated pasid. + */ + if (avm->pasid) { + amdgpu_pasid_free(avm->pasid); + amdgpu_vm_set_pasid(adev, avm, 0); + } + /* Convert VM into a compute VM */ - ret = amdgpu_vm_make_compute(adev, avm, pasid); + ret = amdgpu_vm_make_compute(adev, avm); if (ret) return ret; + ret = amdgpu_vm_set_pasid(adev, avm, pasid); + if (ret) + return ret; /* Initialize KFD part of the VM and process info */ ret = init_kfd_vm(avm, process_info, ef); if (ret) @@ -1594,7 +1607,8 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu( } int amdgpu_amdkfd_gpuvm_map_memory_to_gpu( - struct kgd_dev *kgd, struct kgd_mem *mem, void *drm_priv) + struct kgd_dev *kgd, struct kgd_mem *mem, + void *drm_priv, bool *table_freed) { struct amdgpu_device *adev = get_amdgpu_device(kgd); struct amdgpu_vm *avm = drm_priv_to_vm(drm_priv); @@ -1682,7 +1696,7 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu( entry->va, entry->va + bo_size, entry); ret = map_bo_to_gpuvm(mem, entry, ctx.sync, - is_invalid_userptr); + is_invalid_userptr, table_freed); if (ret) { pr_err("Failed to map bo to gpuvm\n"); goto out_unreserve; @@ -1706,6 +1720,12 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu( true); ret = unreserve_bo_and_vms(&ctx, false, false); + /* Only apply no TLB flush on Aldebaran to + * workaround regressions on other Asics. + */ + if (table_freed && (adev->asic_type != CHIP_ALDEBARAN)) + *table_freed = true; + goto out; out_unreserve: @@ -2132,7 +2152,7 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info) continue; kfd_mem_dmaunmap_attachment(mem, attachment); - ret = update_gpuvm_pte(mem, attachment, &sync); + ret = update_gpuvm_pte(mem, attachment, &sync, NULL); if (ret) { pr_err("%s: update PTE failed\n", __func__); /* make sure this gets validated again */ @@ -2338,7 +2358,7 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef) continue; kfd_mem_dmaunmap_attachment(mem, attachment); - ret = update_gpuvm_pte(mem, attachment, &sync_obj); + ret = update_gpuvm_pte(mem, attachment, &sync_obj, NULL); if (ret) { pr_debug("Memory eviction: update PTE failed. Try again\n"); goto validate_map_fail; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h index a130e766cbdb..c905a4cfc173 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h @@ -34,6 +34,7 @@ struct amdgpu_fpriv; struct amdgpu_bo_list_entry { struct ttm_validate_buffer tv; struct amdgpu_bo_va *bo_va; + struct dma_fence_chain *chain; uint32_t priority; struct page **user_pages; bool user_invalidated; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 30fa1f61e0e5..913f9eaa9cd6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -572,6 +572,20 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, goto out; } + amdgpu_bo_list_for_each_entry(e, p->bo_list) { + struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo); + + e->bo_va = amdgpu_vm_bo_find(vm, bo); + + if (bo->tbo.base.dma_buf && !amdgpu_bo_explicit_sync(bo)) { + e->chain = dma_fence_chain_alloc(); + if (!e->chain) { + r = -ENOMEM; + goto error_validate; + } + } + } + amdgpu_cs_get_threshold_for_moves(p->adev, &p->bytes_moved_threshold, &p->bytes_moved_vis_threshold); p->bytes_moved = 0; @@ -599,15 +613,6 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, gws = p->bo_list->gws_obj; oa = p->bo_list->oa_obj; - amdgpu_bo_list_for_each_entry(e, p->bo_list) { - struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo); - - /* Make sure we use the exclusive slot for shared BOs */ - if (bo->prime_shared_count) - e->tv.num_shared = 0; - e->bo_va = amdgpu_vm_bo_find(vm, bo); - } - if (gds) { p->job->gds_base = amdgpu_bo_gpu_offset(gds) >> PAGE_SHIFT; p->job->gds_size = amdgpu_bo_size(gds) >> PAGE_SHIFT; @@ -629,8 +634,13 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, } error_validate: - if (r) + if (r) { + amdgpu_bo_list_for_each_entry(e, p->bo_list) { + dma_fence_chain_free(e->chain); + e->chain = NULL; + } ttm_eu_backoff_reservation(&p->ticket, &p->validated); + } out: return r; } @@ -670,9 +680,17 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error, { unsigned i; - if (error && backoff) + if (error && backoff) { + struct amdgpu_bo_list_entry *e; + + amdgpu_bo_list_for_each_entry(e, parser->bo_list) { + dma_fence_chain_free(e->chain); + e->chain = NULL; + } + ttm_eu_backoff_reservation(&parser->ticket, &parser->validated); + } for (i = 0; i < parser->num_post_deps; i++) { drm_syncobj_put(parser->post_deps[i].syncobj); @@ -781,7 +799,7 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p) if (r) return r; - r = amdgpu_vm_bo_update(adev, fpriv->prt_va, false); + r = amdgpu_vm_bo_update(adev, fpriv->prt_va, false, NULL); if (r) return r; @@ -792,7 +810,7 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p) if (amdgpu_mcbp || amdgpu_sriov_vf(adev)) { bo_va = fpriv->csa_va; BUG_ON(!bo_va); - r = amdgpu_vm_bo_update(adev, bo_va, false); + r = amdgpu_vm_bo_update(adev, bo_va, false, NULL); if (r) return r; @@ -811,7 +829,7 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p) if (bo_va == NULL) continue; - r = amdgpu_vm_bo_update(adev, bo_va, false); + r = amdgpu_vm_bo_update(adev, bo_va, false, NULL); if (r) return r; @@ -1109,7 +1127,7 @@ static int amdgpu_cs_process_syncobj_timeline_out_dep(struct amdgpu_cs_parser *p dep->chain = NULL; if (syncobj_deps[i].point) { - dep->chain = kmalloc(sizeof(*dep->chain), GFP_KERNEL); + dep->chain = dma_fence_chain_alloc(); if (!dep->chain) return -ENOMEM; } @@ -1117,7 +1135,7 @@ static int amdgpu_cs_process_syncobj_timeline_out_dep(struct amdgpu_cs_parser *p dep->syncobj = drm_syncobj_find(p->filp, syncobj_deps[i].handle); if (!dep->syncobj) { - kfree(dep->chain); + dma_fence_chain_free(dep->chain); return -EINVAL; } dep->point = syncobj_deps[i].point; @@ -1245,6 +1263,28 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, amdgpu_vm_move_to_lru_tail(p->adev, &fpriv->vm); + amdgpu_bo_list_for_each_entry(e, p->bo_list) { + struct dma_resv *resv = e->tv.bo->base.resv; + struct dma_fence_chain *chain = e->chain; + + if (!chain) + continue; + + /* + * Work around dma_resv shortcommings by wrapping up the + * submission in a dma_fence_chain and add it as exclusive + * fence, but first add the submission as shared fence to make + * sure that shared fences never signal before the exclusive + * one. + */ + dma_fence_chain_init(chain, dma_resv_excl_fence(resv), + dma_fence_get(p->fence), 1); + + dma_resv_add_shared_fence(resv, p->fence); + rcu_assign_pointer(resv->fence_excl, &chain->base); + e->chain = NULL; + } + ttm_eu_fence_buffer_objects(&p->ticket, &p->validated, p->fence); mutex_unlock(&p->adev->notifier_lock); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c index 536005bff24a..277128846dd1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c @@ -1414,7 +1414,7 @@ no_preempt: continue; } job = to_amdgpu_job(s_job); - if (preempted && job->fence == fence) + if (preempted && (&job->hw_fence) == fence) /* mark the job as preempted */ job->preemption_status |= AMDGPU_IB_PREEMPTED; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index f944ed858f3e..41c6b3aacd37 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -116,6 +116,7 @@ const char *amdgpu_asic_name[] = { "RENOIR", "ALDEBARAN", "NAVI10", + "CYAN_SKILLFISH", "NAVI14", "NAVI12", "SIENNA_CICHLID", @@ -287,7 +288,7 @@ bool amdgpu_device_supports_smart_shift(struct drm_device *dev) */ /** - * amdgpu_device_vram_access - read/write a buffer in vram + * amdgpu_device_mm_access - access vram by MM_INDEX/MM_DATA * * @adev: amdgpu_device pointer * @pos: offset of the buffer in vram @@ -295,22 +296,65 @@ bool amdgpu_device_supports_smart_shift(struct drm_device *dev) * @size: read/write size, sizeof(@buf) must > @size * @write: true - write to vram, otherwise - read from vram */ -void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos, - uint32_t *buf, size_t size, bool write) +void amdgpu_device_mm_access(struct amdgpu_device *adev, loff_t pos, + void *buf, size_t size, bool write) { unsigned long flags; - uint32_t hi = ~0; + uint32_t hi = ~0, tmp = 0; + uint32_t *data = buf; uint64_t last; int idx; if (!drm_dev_enter(&adev->ddev, &idx)) return; + BUG_ON(!IS_ALIGNED(pos, 4) || !IS_ALIGNED(size, 4)); + + spin_lock_irqsave(&adev->mmio_idx_lock, flags); + for (last = pos + size; pos < last; pos += 4) { + tmp = pos >> 31; + + WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)pos) | 0x80000000); + if (tmp != hi) { + WREG32_NO_KIQ(mmMM_INDEX_HI, tmp); + hi = tmp; + } + if (write) + WREG32_NO_KIQ(mmMM_DATA, *data++); + else + *data++ = RREG32_NO_KIQ(mmMM_DATA); + } + + spin_unlock_irqrestore(&adev->mmio_idx_lock, flags); + drm_dev_exit(idx); +} + +/** + * amdgpu_device_vram_access - access vram by vram aperature + * + * @adev: amdgpu_device pointer + * @pos: offset of the buffer in vram + * @buf: virtual address of the buffer in system memory + * @size: read/write size, sizeof(@buf) must > @size + * @write: true - write to vram, otherwise - read from vram + * + * The return value means how many bytes have been transferred. + */ +size_t amdgpu_device_aper_access(struct amdgpu_device *adev, loff_t pos, + void *buf, size_t size, bool write) +{ #ifdef CONFIG_64BIT + void __iomem *addr; + size_t count = 0; + uint64_t last; + + if (!adev->mman.aper_base_kaddr) + return 0; + last = min(pos + size, adev->gmc.visible_vram_size); if (last > pos) { - void __iomem *addr = adev->mman.aper_base_kaddr + pos; - size_t count = last - pos; + addr = adev->mman.aper_base_kaddr + pos; + count = last - pos; if (write) { memcpy_toio(addr, buf, count); @@ -322,35 +366,37 @@ void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos, memcpy_fromio(buf, addr, count); } - if (count == size) - goto exit; - - pos += count; - buf += count / 4; - size -= count; } + + return count; +#else + return 0; #endif +} - spin_lock_irqsave(&adev->mmio_idx_lock, flags); - for (last = pos + size; pos < last; pos += 4) { - uint32_t tmp = pos >> 31; +/** + * amdgpu_device_vram_access - read/write a buffer in vram + * + * @adev: amdgpu_device pointer + * @pos: offset of the buffer in vram + * @buf: virtual address of the buffer in system memory + * @size: read/write size, sizeof(@buf) must > @size + * @write: true - write to vram, otherwise - read from vram + */ +void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos, + void *buf, size_t size, bool write) +{ + size_t count; - WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)pos) | 0x80000000); - if (tmp != hi) { - WREG32_NO_KIQ(mmMM_INDEX_HI, tmp); - hi = tmp; - } - if (write) - WREG32_NO_KIQ(mmMM_DATA, *buf++); - else - *buf++ = RREG32_NO_KIQ(mmMM_DATA); + /* try to using vram apreature to access vram first */ + count = amdgpu_device_aper_access(adev, pos, buf, size, write); + size -= count; + if (size) { + /* using MM to access rest vram */ + pos += count; + buf += count; + amdgpu_device_mm_access(adev, pos, buf, size, write); } - spin_unlock_irqrestore(&adev->mmio_idx_lock, flags); - -#ifdef CONFIG_64BIT -exit: -#endif - drm_dev_exit(idx); } /* @@ -518,7 +564,7 @@ void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev, adev->gfx.rlc.funcs && adev->gfx.rlc.funcs->is_rlcg_access_range) { if (adev->gfx.rlc.funcs->is_rlcg_access_range(adev, reg)) - return adev->gfx.rlc.funcs->rlcg_wreg(adev, reg, v, 0, 0); + return adev->gfx.rlc.funcs->sriov_wreg(adev, reg, v, 0, 0); } else { writel(v, ((void __iomem *)adev->rmmio) + (reg * 4)); } @@ -1266,15 +1312,16 @@ bool amdgpu_device_need_post(struct amdgpu_device *adev) /** * amdgpu_device_vga_set_decode - enable/disable vga decode * - * @cookie: amdgpu_device pointer + * @pdev: PCI device pointer * @state: enable/disable vga decode * * Enable/disable vga decode (all asics). * Returns VGA resource flags. */ -static unsigned int amdgpu_device_vga_set_decode(void *cookie, bool state) +static unsigned int amdgpu_device_vga_set_decode(struct pci_dev *pdev, + bool state) { - struct amdgpu_device *adev = cookie; + struct amdgpu_device *adev = drm_to_adev(pci_get_drvdata(pdev)); amdgpu_asic_set_vga_state(adev, state); if (state) return VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM | @@ -1394,6 +1441,10 @@ static int amdgpu_device_init_apu_flags(struct amdgpu_device *adev) break; case CHIP_YELLOW_CARP: break; + case CHIP_CYAN_SKILLFISH: + if (adev->pdev->device == 0x13FE) + adev->apu_flags |= AMD_APU_IS_CYAN_SKILLFISH2; + break; default: return -EINVAL; } @@ -2100,6 +2151,7 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev) case CHIP_BEIGE_GOBY: case CHIP_VANGOGH: case CHIP_YELLOW_CARP: + case CHIP_CYAN_SKILLFISH: if (adev->asic_type == CHIP_VANGOGH) adev->family = AMDGPU_FAMILY_VGH; else if (adev->asic_type == CHIP_YELLOW_CARP) @@ -3594,9 +3646,9 @@ int amdgpu_device_init(struct amdgpu_device *adev, fence_driver_init: /* Fence driver */ - r = amdgpu_fence_driver_init(adev); + r = amdgpu_fence_driver_sw_init(adev); if (r) { - dev_err(adev->dev, "amdgpu_fence_driver_init failed\n"); + dev_err(adev->dev, "amdgpu_fence_driver_sw_init failed\n"); amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_FENCE_INIT_FAIL, 0, 0); goto failed; } @@ -3623,6 +3675,8 @@ fence_driver_init: goto release_ras_con; } + amdgpu_fence_driver_hw_init(adev); + dev_info(adev->dev, "SE %d, SH per SE %d, CU per SH %d, active_cu_number %d\n", adev->gfx.config.max_shader_engines, @@ -3714,7 +3768,7 @@ fence_driver_init: /* this will fail for cards that aren't VGA class devices, just * ignore it */ if ((adev->pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA) - vga_client_register(adev->pdev, adev, NULL, amdgpu_device_vga_set_decode); + vga_client_register(adev->pdev, amdgpu_device_vga_set_decode); if (amdgpu_device_supports_px(ddev)) { px = true; @@ -3771,7 +3825,10 @@ void amdgpu_device_fini_hw(struct amdgpu_device *adev) { dev_info(adev->dev, "amdgpu: finishing device.\n"); flush_delayed_work(&adev->delayed_init_work); - ttm_bo_lock_delayed_workqueue(&adev->mman.bdev); + if (adev->mman.initialized) { + flush_delayed_work(&adev->mman.bdev.wq); + ttm_bo_lock_delayed_workqueue(&adev->mman.bdev); + } adev->shutdown = true; /* make sure IB test finished before entering exclusive mode @@ -3790,7 +3847,7 @@ void amdgpu_device_fini_hw(struct amdgpu_device *adev) else drm_atomic_helper_shutdown(adev_to_drm(adev)); } - amdgpu_fence_driver_fini_hw(adev); + amdgpu_fence_driver_hw_fini(adev); if (adev->pm_sysfs_en) amdgpu_pm_sysfs_fini(adev); @@ -3812,7 +3869,7 @@ void amdgpu_device_fini_hw(struct amdgpu_device *adev) void amdgpu_device_fini_sw(struct amdgpu_device *adev) { amdgpu_device_ip_fini(adev); - amdgpu_fence_driver_fini_sw(adev); + amdgpu_fence_driver_sw_fini(adev); release_firmware(adev->firmware.gpu_info_fw); adev->firmware.gpu_info_fw = NULL; adev->accel_working = false; @@ -3833,7 +3890,7 @@ void amdgpu_device_fini_sw(struct amdgpu_device *adev) vga_switcheroo_fini_domain_pm_ops(adev->dev); } if ((adev->pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA) - vga_client_register(adev->pdev, NULL, NULL, NULL); + vga_client_unregister(adev->pdev); if (IS_ENABLED(CONFIG_PERF_EVENTS)) amdgpu_pmu_fini(adev); @@ -3887,7 +3944,7 @@ int amdgpu_device_suspend(struct drm_device *dev, bool fbcon) /* evict vram memory */ amdgpu_bo_evict_vram(adev); - amdgpu_fence_driver_suspend(adev); + amdgpu_fence_driver_hw_fini(adev); amdgpu_device_ip_suspend_phase2(adev); /* evict remaining vram memory @@ -3932,8 +3989,7 @@ int amdgpu_device_resume(struct drm_device *dev, bool fbcon) dev_err(adev->dev, "amdgpu_device_ip_resume failed (%d).\n", r); return r; } - amdgpu_fence_driver_resume(adev); - + amdgpu_fence_driver_hw_init(adev); r = amdgpu_device_ip_late_init(adev); if (r) @@ -4394,7 +4450,7 @@ int amdgpu_device_mode1_reset(struct amdgpu_device *adev) int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev, struct amdgpu_reset_context *reset_context) { - int i, r = 0; + int i, j, r = 0; struct amdgpu_job *job = NULL; bool need_full_reset = test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags); @@ -4418,11 +4474,22 @@ int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev, if (!ring || !ring->sched.thread) continue; + /*clear job fence from fence drv to avoid force_completion + *leave NULL and vm flush fence in fence drv */ + for (j = 0; j <= ring->fence_drv.num_fences_mask; j++) { + struct dma_fence *old, **ptr; + + ptr = &ring->fence_drv.fences[j]; + old = rcu_dereference_protected(*ptr, 1); + if (old && test_bit(AMDGPU_FENCE_FLAG_EMBED_IN_JOB_BIT, &old->flags)) { + RCU_INIT_POINTER(*ptr, NULL); + } + } /* after all hw jobs are reset, hw fence is meaningless, so force_completion */ amdgpu_fence_driver_force_completion(ring); } - if(job) + if (job && job->vm) drm_sched_increase_karma(&job->base); r = amdgpu_reset_prepare_hwcontext(adev, reset_context); @@ -4886,7 +4953,7 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, DRM_INFO("Bailing on TDR for s_job:%llx, hive: %llx as another already in progress", job ? job->base.id : -1, hive->hive_id); amdgpu_put_xgmi_hive(hive); - if (job) + if (job && job->vm) drm_sched_increase_karma(&job->base); return 0; } @@ -4910,7 +4977,7 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, job ? job->base.id : -1); /* even we skipped this reset, still need to set the job to guilty */ - if (job) + if (job && job->vm) drm_sched_increase_karma(&job->base); goto skip_recovery; } @@ -5277,6 +5344,10 @@ int amdgpu_device_baco_exit(struct drm_device *dev) adev->nbio.funcs->enable_doorbell_interrupt) adev->nbio.funcs->enable_doorbell_interrupt(adev, true); + if (amdgpu_passthrough(adev) && + adev->nbio.funcs->clear_doorbell_interrupt) + adev->nbio.funcs->clear_doorbell_interrupt(adev); + return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c index a9475b207510..ae6ab93c868b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c @@ -42,48 +42,6 @@ #include <linux/pci-p2pdma.h> #include <linux/pm_runtime.h> -static int -__dma_resv_make_exclusive(struct dma_resv *obj) -{ - struct dma_fence **fences; - unsigned int count; - int r; - - if (!dma_resv_shared_list(obj)) /* no shared fences to convert */ - return 0; - - r = dma_resv_get_fences(obj, NULL, &count, &fences); - if (r) - return r; - - if (count == 0) { - /* Now that was unexpected. */ - } else if (count == 1) { - dma_resv_add_excl_fence(obj, fences[0]); - dma_fence_put(fences[0]); - kfree(fences); - } else { - struct dma_fence_array *array; - - array = dma_fence_array_create(count, fences, - dma_fence_context_alloc(1), 0, - false); - if (!array) - goto err_fences_put; - - dma_resv_add_excl_fence(obj, &array->base); - dma_fence_put(&array->base); - } - - return 0; - -err_fences_put: - while (count--) - dma_fence_put(fences[count]); - kfree(fences); - return -ENOMEM; -} - /** * amdgpu_dma_buf_attach - &dma_buf_ops.attach implementation * @@ -110,24 +68,6 @@ static int amdgpu_dma_buf_attach(struct dma_buf *dmabuf, if (r < 0) goto out; - r = amdgpu_bo_reserve(bo, false); - if (unlikely(r != 0)) - goto out; - - /* - * We only create shared fences for internal use, but importers - * of the dmabuf rely on exclusive fences for implicitly - * tracking write hazards. As any of the current fences may - * correspond to a write, we need to convert all existing - * fences on the reservation object into a single exclusive - * fence. - */ - r = __dma_resv_make_exclusive(bo->tbo.base.resv); - if (r) - goto out; - - bo->prime_shared_count++; - amdgpu_bo_unreserve(bo); return 0; out: @@ -150,9 +90,6 @@ static void amdgpu_dma_buf_detach(struct dma_buf *dmabuf, struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj); struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); - if (attach->dev->driver != adev->dev->driver && bo->prime_shared_count) - bo->prime_shared_count--; - pm_runtime_mark_last_busy(adev_to_drm(adev)->dev); pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); } @@ -418,8 +355,6 @@ amdgpu_dma_buf_create_obj(struct drm_device *dev, struct dma_buf *dma_buf) bo = gem_to_amdgpu_bo(gobj); bo->allowed_domains = AMDGPU_GEM_DOMAIN_GTT; bo->preferred_domains = AMDGPU_GEM_DOMAIN_GTT; - if (dma_buf->ops != &amdgpu_dmabuf_ops) - bo->prime_shared_count = 1; dma_resv_unlock(resv); return gobj; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 971c5b8e75dc..b6640291f980 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -870,11 +870,10 @@ MODULE_PARM_DESC(reset_method, "GPU reset method (-1 = auto (default), 0 = legac module_param_named(reset_method, amdgpu_reset_method, int, 0444); /** - * DOC: bad_page_threshold (int) - * Bad page threshold is to specify the threshold value of faulty pages - * detected by RAS ECC, that may result in GPU entering bad status if total - * faulty pages by ECC exceed threshold value and leave it for user's further - * check. + * DOC: bad_page_threshold (int) Bad page threshold is specifies the + * threshold value of faulty pages detected by RAS ECC, which may + * result in the GPU entering bad status when the number of total + * faulty pages by ECC exceeds the threshold value. */ MODULE_PARM_DESC(bad_page_threshold, "Bad page threshold(-1 = auto(default value), 0 = disable bad page retirement)"); module_param_named(bad_page_threshold, amdgpu_bad_page_threshold, int, 0444); @@ -1213,6 +1212,9 @@ static const struct pci_device_id pciidlist[] = { {0x1002, 0x740F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ALDEBARAN|AMD_EXP_HW_SUPPORT}, {0x1002, 0x7410, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ALDEBARAN|AMD_EXP_HW_SUPPORT}, + /* CYAN_SKILLFISH */ + {0x1002, 0x13FE, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CYAN_SKILLFISH|AMD_IS_APU}, + /* BEIGE_GOBY */ {0x1002, 0x7420, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_BEIGE_GOBY}, {0x1002, 0x7421, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_BEIGE_GOBY}, @@ -1236,7 +1238,7 @@ static int amdgpu_pci_probe(struct pci_dev *pdev, int ret, retry = 0; bool supports_atomic = false; - if (!amdgpu_virtual_display && + if (amdgpu_virtual_display || amdgpu_device_asic_has_dc_support(flags & AMD_ASIC_MASK)) supports_atomic = true; @@ -1292,7 +1294,7 @@ static int amdgpu_pci_probe(struct pci_dev *pdev, #endif /* Get rid of things like offb */ - ret = drm_aperture_remove_conflicting_pci_framebuffers(pdev, "amdgpudrmfb"); + ret = drm_aperture_remove_conflicting_pci_framebuffers(pdev, &amdgpu_kms_driver); if (ret) return ret; @@ -1474,7 +1476,7 @@ static int amdgpu_pmops_suspend(struct device *dev) struct amdgpu_device *adev = drm_to_adev(drm_dev); int r; - if (amdgpu_acpi_is_s0ix_supported(adev)) + if (amdgpu_acpi_is_s0ix_active(adev)) adev->in_s0ix = true; adev->in_s3 = true; r = amdgpu_device_suspend(drm_dev, true); @@ -1490,7 +1492,7 @@ static int amdgpu_pmops_resume(struct device *dev) int r; r = amdgpu_device_resume(drm_dev, true); - if (amdgpu_acpi_is_s0ix_supported(adev)) + if (amdgpu_acpi_is_s0ix_active(adev)) adev->in_s0ix = false; return r; } @@ -1784,7 +1786,6 @@ static const struct drm_driver amdgpu_kms_driver = { .open = amdgpu_driver_open_kms, .postclose = amdgpu_driver_postclose_kms, .lastclose = amdgpu_driver_lastclose_kms, - .irq_handler = amdgpu_irq_handler, .ioctls = amdgpu_ioctls_kms, .num_ioctls = ARRAY_SIZE(amdgpu_ioctls_kms), .dumb_create = amdgpu_mode_dumb_create, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_eeprom.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_eeprom.c new file mode 100644 index 000000000000..4d9eb0137f8c --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_eeprom.c @@ -0,0 +1,239 @@ +/* + * Copyright 2021 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "amdgpu_eeprom.h" +#include "amdgpu.h" + +/* AT24CM02 and M24M02-R have a 256-byte write page size. + */ +#define EEPROM_PAGE_BITS 8 +#define EEPROM_PAGE_SIZE (1U << EEPROM_PAGE_BITS) +#define EEPROM_PAGE_MASK (EEPROM_PAGE_SIZE - 1) + +#define EEPROM_OFFSET_SIZE 2 + +/* EEPROM memory addresses are 19-bits long, which can + * be partitioned into 3, 8, 8 bits, for a total of 19. + * The upper 3 bits are sent as part of the 7-bit + * "Device Type Identifier"--an I2C concept, which for EEPROM devices + * is hard-coded as 1010b, indicating that it is an EEPROM + * device--this is the wire format, followed by the upper + * 3 bits of the 19-bit address, followed by the direction, + * followed by two bytes holding the rest of the 16-bits of + * the EEPROM memory address. The format on the wire for EEPROM + * devices is: 1010XYZD, A15:A8, A7:A0, + * Where D is the direction and sequenced out by the hardware. + * Bits XYZ are memory address bits 18, 17 and 16. + * These bits are compared to how pins 1-3 of the part are connected, + * depending on the size of the part, more on that later. + * + * Note that of this wire format, a client is in control + * of, and needs to specify only XYZ, A15:A8, A7:0, bits, + * which is exactly the EEPROM memory address, or offset, + * in order to address up to 8 EEPROM devices on the I2C bus. + * + * For instance, a 2-Mbit I2C EEPROM part, addresses all its bytes, + * using an 18-bit address, bit 17 to 0 and thus would use all but one bit of + * the 19 bits previously mentioned. The designer would then not connect + * pins 1 and 2, and pin 3 usually named "A_2" or "E2", would be connected to + * either Vcc or GND. This would allow for up to two 2-Mbit parts on + * the same bus, where one would be addressable with bit 18 as 1, and + * the other with bit 18 of the address as 0. + * + * For a 2-Mbit part, bit 18 is usually known as the "Chip Enable" or + * "Hardware Address Bit". This bit is compared to the load on pin 3 + * of the device, described above, and if there is a match, then this + * device responds to the command. This way, you can connect two + * 2-Mbit EEPROM devices on the same bus, but see one contiguous + * memory from 0 to 7FFFFh, where address 0 to 3FFFF is in the device + * whose pin 3 is connected to GND, and address 40000 to 7FFFFh is in + * the 2nd device, whose pin 3 is connected to Vcc. + * + * This addressing you encode in the 32-bit "eeprom_addr" below, + * namely the 19-bits "XYZ,A15:A0", as a single 19-bit address. For + * instance, eeprom_addr = 0x6DA01, is 110_1101_1010_0000_0001, where + * XYZ=110b, and A15:A0=DA01h. The XYZ bits become part of the device + * address, and the rest of the address bits are sent as the memory + * address bytes. + * + * That is, for an I2C EEPROM driver everything is controlled by + * the "eeprom_addr". + * + * P.S. If you need to write, lock and read the Identification Page, + * (M24M02-DR device only, which we do not use), change the "7" to + * "0xF" in the macro below, and let the client set bit 20 to 1 in + * "eeprom_addr", and set A10 to 0 to write into it, and A10 and A1 to + * 1 to lock it permanently. + */ +#define MAKE_I2C_ADDR(_aa) ((0xA << 3) | (((_aa) >> 16) & 7)) + +static int __amdgpu_eeprom_xfer(struct i2c_adapter *i2c_adap, u32 eeprom_addr, + u8 *eeprom_buf, u16 buf_size, bool read) +{ + u8 eeprom_offset_buf[EEPROM_OFFSET_SIZE]; + struct i2c_msg msgs[] = { + { + .flags = 0, + .len = EEPROM_OFFSET_SIZE, + .buf = eeprom_offset_buf, + }, + { + .flags = read ? I2C_M_RD : 0, + }, + }; + const u8 *p = eeprom_buf; + int r; + u16 len; + + for (r = 0; buf_size > 0; + buf_size -= len, eeprom_addr += len, eeprom_buf += len) { + /* Set the EEPROM address we want to write to/read from. + */ + msgs[0].addr = MAKE_I2C_ADDR(eeprom_addr); + msgs[1].addr = msgs[0].addr; + msgs[0].buf[0] = (eeprom_addr >> 8) & 0xff; + msgs[0].buf[1] = eeprom_addr & 0xff; + + if (!read) { + /* Write the maximum amount of data, without + * crossing the device's page boundary, as per + * its spec. Partial page writes are allowed, + * starting at any location within the page, + * so long as the page boundary isn't crossed + * over (actually the page pointer rolls + * over). + * + * As per the AT24CM02 EEPROM spec, after + * writing into a page, the I2C driver should + * terminate the transfer, i.e. in + * "i2c_transfer()" below, with a STOP + * condition, so that the self-timed write + * cycle begins. This is implied for the + * "i2c_transfer()" abstraction. + */ + len = min(EEPROM_PAGE_SIZE - (eeprom_addr & + EEPROM_PAGE_MASK), + (u32)buf_size); + } else { + /* Reading from the EEPROM has no limitation + * on the number of bytes read from the EEPROM + * device--they are simply sequenced out. + */ + len = buf_size; + } + msgs[1].len = len; + msgs[1].buf = eeprom_buf; + + /* This constitutes a START-STOP transaction. + */ + r = i2c_transfer(i2c_adap, msgs, ARRAY_SIZE(msgs)); + if (r != ARRAY_SIZE(msgs)) + break; + + if (!read) { + /* According to EEPROM specs the length of the + * self-writing cycle, tWR (tW), is 10 ms. + * + * TODO: Use polling on ACK, aka Acknowledge + * Polling, to minimize waiting for the + * internal write cycle to complete, as it is + * usually smaller than tWR (tW). + */ + msleep(10); + } + } + + return r < 0 ? r : eeprom_buf - p; +} + +/** + * amdgpu_eeprom_xfer -- Read/write from/to an I2C EEPROM device + * @i2c_adap: pointer to the I2C adapter to use + * @eeprom_addr: EEPROM address from which to read/write + * @eeprom_buf: pointer to data buffer to read into/write from + * @buf_size: the size of @eeprom_buf + * @read: True if reading from the EEPROM, false if writing + * + * Returns the number of bytes read/written; -errno on error. + */ +static int amdgpu_eeprom_xfer(struct i2c_adapter *i2c_adap, u32 eeprom_addr, + u8 *eeprom_buf, u16 buf_size, bool read) +{ + const struct i2c_adapter_quirks *quirks = i2c_adap->quirks; + u16 limit; + + if (!quirks) + limit = 0; + else if (read) + limit = quirks->max_read_len; + else + limit = quirks->max_write_len; + + if (limit == 0) { + return __amdgpu_eeprom_xfer(i2c_adap, eeprom_addr, + eeprom_buf, buf_size, read); + } else if (limit <= EEPROM_OFFSET_SIZE) { + dev_err_ratelimited(&i2c_adap->dev, + "maddr:0x%04X size:0x%02X:quirk max_%s_len must be > %d", + eeprom_addr, buf_size, + read ? "read" : "write", EEPROM_OFFSET_SIZE); + return -EINVAL; + } else { + u16 ps; /* Partial size */ + int res = 0, r; + + /* The "limit" includes all data bytes sent/received, + * which would include the EEPROM_OFFSET_SIZE bytes. + * Account for them here. + */ + limit -= EEPROM_OFFSET_SIZE; + for ( ; buf_size > 0; + buf_size -= ps, eeprom_addr += ps, eeprom_buf += ps) { + ps = min(limit, buf_size); + + r = __amdgpu_eeprom_xfer(i2c_adap, eeprom_addr, + eeprom_buf, ps, read); + if (r < 0) + return r; + res += r; + } + + return res; + } +} + +int amdgpu_eeprom_read(struct i2c_adapter *i2c_adap, + u32 eeprom_addr, u8 *eeprom_buf, + u16 bytes) +{ + return amdgpu_eeprom_xfer(i2c_adap, eeprom_addr, eeprom_buf, bytes, + true); +} + +int amdgpu_eeprom_write(struct i2c_adapter *i2c_adap, + u32 eeprom_addr, u8 *eeprom_buf, + u16 bytes) +{ + return amdgpu_eeprom_xfer(i2c_adap, eeprom_addr, eeprom_buf, bytes, + false); +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_eeprom.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_eeprom.h new file mode 100644 index 000000000000..6935adb2be1f --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_eeprom.h @@ -0,0 +1,37 @@ +/* + * Copyright 2021 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef _AMDGPU_EEPROM_H +#define _AMDGPU_EEPROM_H + +#include <linux/i2c.h> + +int amdgpu_eeprom_read(struct i2c_adapter *i2c_adap, + u32 eeprom_addr, u8 *eeprom_buf, + u16 bytes); + +int amdgpu_eeprom_write(struct i2c_adapter *i2c_adap, + u32 eeprom_addr, u8 *eeprom_buf, + u16 bytes); + +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c index 09b048647523..cd0acbea75da 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c @@ -273,9 +273,6 @@ static int amdgpufb_create(struct drm_fb_helper *helper, return 0; out: - if (abo) { - - } if (fb && ret) { drm_gem_object_put(gobj); drm_framebuffer_unregister_private(fb); @@ -344,7 +341,7 @@ int amdgpu_fbdev_init(struct amdgpu_device *adev) } /* disable all the possible outputs/crtcs before entering KMS mode */ - if (!amdgpu_device_has_dc_support(adev)) + if (!amdgpu_device_has_dc_support(adev) && !amdgpu_virtual_display) drm_helper_disable_unused_functions(adev_to_drm(adev)); drm_fb_helper_initial_config(&rfbdev->helper, bpp_sel); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c index 72d9b92b1754..14499f0de32d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c @@ -129,30 +129,50 @@ static u32 amdgpu_fence_read(struct amdgpu_ring *ring) * * @ring: ring the fence is associated with * @f: resulting fence object + * @job: job the fence is embedded in * @flags: flags to pass into the subordinate .emit_fence() call * * Emits a fence command on the requested ring (all asics). * Returns 0 on success, -ENOMEM on failure. */ -int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **f, +int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **f, struct amdgpu_job *job, unsigned flags) { struct amdgpu_device *adev = ring->adev; - struct amdgpu_fence *fence; + struct dma_fence *fence; + struct amdgpu_fence *am_fence; struct dma_fence __rcu **ptr; uint32_t seq; int r; - fence = kmem_cache_alloc(amdgpu_fence_slab, GFP_KERNEL); - if (fence == NULL) - return -ENOMEM; + if (job == NULL) { + /* create a sperate hw fence */ + am_fence = kmem_cache_alloc(amdgpu_fence_slab, GFP_ATOMIC); + if (am_fence == NULL) + return -ENOMEM; + fence = &am_fence->base; + am_fence->ring = ring; + } else { + /* take use of job-embedded fence */ + fence = &job->hw_fence; + } seq = ++ring->fence_drv.sync_seq; - fence->ring = ring; - dma_fence_init(&fence->base, &amdgpu_fence_ops, - &ring->fence_drv.lock, - adev->fence_context + ring->idx, - seq); + if (job != NULL && job->job_run_counter) { + /* reinit seq for resubmitted jobs */ + fence->seqno = seq; + } else { + dma_fence_init(fence, &amdgpu_fence_ops, + &ring->fence_drv.lock, + adev->fence_context + ring->idx, + seq); + } + + if (job != NULL) { + /* mark this fence has a parent job */ + set_bit(AMDGPU_FENCE_FLAG_EMBED_IN_JOB_BIT, &fence->flags); + } + amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_addr, seq, flags | AMDGPU_FENCE_FLAG_INT); pm_runtime_get_noresume(adev_to_drm(adev)->dev); @@ -175,9 +195,9 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **f, /* This function can't be called concurrently anyway, otherwise * emitting the fence would mess up the hardware ring buffer. */ - rcu_assign_pointer(*ptr, dma_fence_get(&fence->base)); + rcu_assign_pointer(*ptr, dma_fence_get(fence)); - *f = &fence->base; + *f = fence; return 0; } @@ -417,9 +437,6 @@ int amdgpu_fence_driver_start_ring(struct amdgpu_ring *ring, } amdgpu_fence_write(ring, atomic_read(&ring->fence_drv.last_seq)); - if (irq_src) - amdgpu_irq_get(adev, irq_src, irq_type); - ring->fence_drv.irq_src = irq_src; ring->fence_drv.irq_type = irq_type; ring->fence_drv.initialized = true; @@ -490,7 +507,7 @@ int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring, r = drm_sched_init(&ring->sched, &amdgpu_sched_ops, num_hw_submission, amdgpu_job_hang_limit, - timeout, sched_score, ring->name); + timeout, NULL, sched_score, ring->name); if (r) { DRM_ERROR("Failed to create scheduler on ring %s.\n", ring->name); @@ -501,7 +518,7 @@ int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring, } /** - * amdgpu_fence_driver_init - init the fence driver + * amdgpu_fence_driver_sw_init - init the fence driver * for all possible rings. * * @adev: amdgpu device pointer @@ -512,20 +529,20 @@ int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring, * amdgpu_fence_driver_start_ring(). * Returns 0 for success. */ -int amdgpu_fence_driver_init(struct amdgpu_device *adev) +int amdgpu_fence_driver_sw_init(struct amdgpu_device *adev) { return 0; } /** - * amdgpu_fence_driver_fini - tear down the fence driver + * amdgpu_fence_driver_hw_fini - tear down the fence driver * for all possible rings. * * @adev: amdgpu device pointer * * Tear down the fence driver for all possible rings (all asics). */ -void amdgpu_fence_driver_fini_hw(struct amdgpu_device *adev) +void amdgpu_fence_driver_hw_fini(struct amdgpu_device *adev) { int i, r; @@ -534,8 +551,7 @@ void amdgpu_fence_driver_fini_hw(struct amdgpu_device *adev) if (!ring || !ring->fence_drv.initialized) continue; - if (!ring->no_scheduler) - drm_sched_fini(&ring->sched); + /* You can't wait for HW to signal if it's gone */ if (!drm_dev_is_unplugged(&adev->ddev)) r = amdgpu_fence_wait_empty(ring); @@ -553,7 +569,7 @@ void amdgpu_fence_driver_fini_hw(struct amdgpu_device *adev) } } -void amdgpu_fence_driver_fini_sw(struct amdgpu_device *adev) +void amdgpu_fence_driver_sw_fini(struct amdgpu_device *adev) { unsigned int i, j; @@ -563,6 +579,9 @@ void amdgpu_fence_driver_fini_sw(struct amdgpu_device *adev) if (!ring || !ring->fence_drv.initialized) continue; + if (!ring->no_scheduler) + drm_sched_fini(&ring->sched); + for (j = 0; j <= ring->fence_drv.num_fences_mask; ++j) dma_fence_put(ring->fence_drv.fences[j]); kfree(ring->fence_drv.fences); @@ -572,49 +591,18 @@ void amdgpu_fence_driver_fini_sw(struct amdgpu_device *adev) } /** - * amdgpu_fence_driver_suspend - suspend the fence driver - * for all possible rings. - * - * @adev: amdgpu device pointer - * - * Suspend the fence driver for all possible rings (all asics). - */ -void amdgpu_fence_driver_suspend(struct amdgpu_device *adev) -{ - int i, r; - - for (i = 0; i < AMDGPU_MAX_RINGS; i++) { - struct amdgpu_ring *ring = adev->rings[i]; - if (!ring || !ring->fence_drv.initialized) - continue; - - /* wait for gpu to finish processing current batch */ - r = amdgpu_fence_wait_empty(ring); - if (r) { - /* delay GPU reset to resume */ - amdgpu_fence_driver_force_completion(ring); - } - - /* disable the interrupt */ - if (ring->fence_drv.irq_src) - amdgpu_irq_put(adev, ring->fence_drv.irq_src, - ring->fence_drv.irq_type); - } -} - -/** - * amdgpu_fence_driver_resume - resume the fence driver + * amdgpu_fence_driver_hw_init - enable the fence driver * for all possible rings. * * @adev: amdgpu device pointer * - * Resume the fence driver for all possible rings (all asics). + * Enable the fence driver for all possible rings (all asics). * Not all asics have all rings, so each asic will only * start the fence driver on the rings it has using * amdgpu_fence_driver_start_ring(). * Returns 0 for success. */ -void amdgpu_fence_driver_resume(struct amdgpu_device *adev) +void amdgpu_fence_driver_hw_init(struct amdgpu_device *adev) { int i; @@ -653,8 +641,16 @@ static const char *amdgpu_fence_get_driver_name(struct dma_fence *fence) static const char *amdgpu_fence_get_timeline_name(struct dma_fence *f) { - struct amdgpu_fence *fence = to_amdgpu_fence(f); - return (const char *)fence->ring->name; + struct amdgpu_ring *ring; + + if (test_bit(AMDGPU_FENCE_FLAG_EMBED_IN_JOB_BIT, &f->flags)) { + struct amdgpu_job *job = container_of(f, struct amdgpu_job, hw_fence); + + ring = to_amdgpu_ring(job->base.sched); + } else { + ring = to_amdgpu_fence(f)->ring; + } + return (const char *)ring->name; } /** @@ -667,13 +663,20 @@ static const char *amdgpu_fence_get_timeline_name(struct dma_fence *f) */ static bool amdgpu_fence_enable_signaling(struct dma_fence *f) { - struct amdgpu_fence *fence = to_amdgpu_fence(f); - struct amdgpu_ring *ring = fence->ring; + struct amdgpu_ring *ring; + + if (test_bit(AMDGPU_FENCE_FLAG_EMBED_IN_JOB_BIT, &f->flags)) { + struct amdgpu_job *job = container_of(f, struct amdgpu_job, hw_fence); + + ring = to_amdgpu_ring(job->base.sched); + } else { + ring = to_amdgpu_fence(f)->ring; + } if (!timer_pending(&ring->fence_drv.fallback_timer)) amdgpu_fence_schedule_fallback(ring); - DMA_FENCE_TRACE(&fence->base, "armed on ring %i!\n", ring->idx); + DMA_FENCE_TRACE(f, "armed on ring %i!\n", ring->idx); return true; } @@ -688,8 +691,20 @@ static bool amdgpu_fence_enable_signaling(struct dma_fence *f) static void amdgpu_fence_free(struct rcu_head *rcu) { struct dma_fence *f = container_of(rcu, struct dma_fence, rcu); - struct amdgpu_fence *fence = to_amdgpu_fence(f); - kmem_cache_free(amdgpu_fence_slab, fence); + + if (test_bit(AMDGPU_FENCE_FLAG_EMBED_IN_JOB_BIT, &f->flags)) { + /* free job if fence has a parent job */ + struct amdgpu_job *job; + + job = container_of(f, struct amdgpu_job, hw_fence); + kfree(job); + } else { + /* free fence_slab if it's separated fence*/ + struct amdgpu_fence *fence; + + fence = to_amdgpu_fence(f); + kmem_cache_free(amdgpu_fence_slab, fence); + } } /** @@ -712,6 +727,7 @@ static const struct dma_fence_ops amdgpu_fence_ops = { .release = amdgpu_fence_release, }; + /* * Fence debugfs */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.c index 39b6c6bfab45..7709caeb233d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.c @@ -27,10 +27,10 @@ #include "smu_v11_0_i2c.h" #include "atom.h" #include "amdgpu_fru_eeprom.h" +#include "amdgpu_eeprom.h" -#define I2C_PRODUCT_INFO_ADDR 0xAC -#define I2C_PRODUCT_INFO_ADDR_SIZE 0x2 -#define I2C_PRODUCT_INFO_OFFSET 0xC0 +#define FRU_EEPROM_MADDR 0x60000 +#define I2C_PRODUCT_INFO_OFFSET 0xC0 static bool is_fru_eeprom_supported(struct amdgpu_device *adev) { @@ -62,19 +62,11 @@ static bool is_fru_eeprom_supported(struct amdgpu_device *adev) } static int amdgpu_fru_read_eeprom(struct amdgpu_device *adev, uint32_t addrptr, - unsigned char *buff) + unsigned char *buff) { int ret, size; - struct i2c_msg msg = { - .addr = I2C_PRODUCT_INFO_ADDR, - .flags = I2C_M_RD, - .buf = buff, - }; - buff[0] = 0; - buff[1] = addrptr; - msg.len = I2C_PRODUCT_INFO_ADDR_SIZE + 1; - ret = i2c_transfer(&adev->pm.smu_i2c, &msg, 1); + ret = amdgpu_eeprom_read(&adev->pm.smu_i2c, addrptr, buff, 1); if (ret < 1) { DRM_WARN("FRU: Failed to get size field"); return ret; @@ -83,13 +75,9 @@ static int amdgpu_fru_read_eeprom(struct amdgpu_device *adev, uint32_t addrptr, /* The size returned by the i2c requires subtraction of 0xC0 since the * size apparently always reports as 0xC0+actual size. */ - size = buff[2] - I2C_PRODUCT_INFO_OFFSET; - /* Add 1 since address field was 1 byte */ - buff[1] = addrptr + 1; - - msg.len = I2C_PRODUCT_INFO_ADDR_SIZE + size; - ret = i2c_transfer(&adev->pm.smu_i2c, &msg, 1); + size = buff[0] - I2C_PRODUCT_INFO_OFFSET; + ret = amdgpu_eeprom_read(&adev->pm.smu_i2c, addrptr + 1, buff, size); if (ret < 1) { DRM_WARN("FRU: Failed to get data field"); return ret; @@ -101,8 +89,8 @@ static int amdgpu_fru_read_eeprom(struct amdgpu_device *adev, uint32_t addrptr, int amdgpu_fru_get_product_info(struct amdgpu_device *adev) { unsigned char buff[34]; - int addrptr, size; - int len; + u32 addrptr; + int size, len; if (!is_fru_eeprom_supported(adev)) return 0; @@ -125,7 +113,7 @@ int amdgpu_fru_get_product_info(struct amdgpu_device *adev) * Bytes 8-a are all 1-byte and refer to the size of the entire struct, * and the language field, so just start from 0xb, manufacturer size */ - addrptr = 0xb; + addrptr = FRU_EEPROM_MADDR + 0xb; size = amdgpu_fru_read_eeprom(adev, addrptr, buff); if (size < 1) { DRM_ERROR("Failed to read FRU Manufacturer, ret:%d", size); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c index b36405170ff3..76efd5f8950f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c @@ -76,7 +76,7 @@ static int amdgpu_gart_dummy_page_init(struct amdgpu_device *adev) if (adev->dummy_page_addr) return 0; adev->dummy_page_addr = dma_map_page(&adev->pdev->dev, dummy_page, 0, - PAGE_SIZE, PCI_DMA_BIDIRECTIONAL); + PAGE_SIZE, DMA_BIDIRECTIONAL); if (dma_mapping_error(&adev->pdev->dev, adev->dummy_page_addr)) { dev_err(&adev->pdev->dev, "Failed to DMA MAP the dummy page\n"); adev->dummy_page_addr = 0; @@ -96,8 +96,8 @@ void amdgpu_gart_dummy_page_fini(struct amdgpu_device *adev) { if (!adev->dummy_page_addr) return; - pci_unmap_page(adev->pdev, adev->dummy_page_addr, - PAGE_SIZE, PCI_DMA_BIDIRECTIONAL); + dma_unmap_page(&adev->pdev->dev, adev->dummy_page_addr, PAGE_SIZE, + DMA_BIDIRECTIONAL); adev->dummy_page_addr = 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index 854fc497844b..cb07cc3b06ed 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c @@ -621,7 +621,7 @@ static void amdgpu_gem_va_update_vm(struct amdgpu_device *adev, if (operation == AMDGPU_VA_OP_MAP || operation == AMDGPU_VA_OP_REPLACE) { - r = amdgpu_vm_bo_update(adev, bo_va, false); + r = amdgpu_vm_bo_update(adev, bo_va, false, NULL); if (r) goto error; } @@ -838,7 +838,8 @@ int amdgpu_gem_op_ioctl(struct drm_device *dev, void *data, break; } case AMDGPU_GEM_OP_SET_PLACEMENT: - if (robj->prime_shared_count && (args->value & AMDGPU_GEM_DOMAIN_VRAM)) { + if (robj->tbo.base.import_attach && + args->value & AMDGPU_GEM_DOMAIN_VRAM) { r = -EINVAL; amdgpu_bo_unreserve(robj); break; @@ -903,7 +904,7 @@ int amdgpu_mode_dumb_create(struct drm_file *file_priv, DIV_ROUND_UP(args->bpp, 8), 0); args->size = (u64)args->pitch * args->height; args->size = ALIGN(args->size, PAGE_SIZE); - domain = amdgpu_bo_get_preferred_pin_domain(adev, + domain = amdgpu_bo_get_preferred_domain(adev, amdgpu_display_supported_domains(adev, flags)); r = amdgpu_gem_object_create(adev, args->size, 0, domain, flags, ttm_bo_type_device, NULL, &gobj); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index b4ced45301be..e7f06bd0f0cd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -629,7 +629,6 @@ int amdgpu_gfx_ras_late_init(struct amdgpu_device *adev) adev->gfx.ras_if->block = AMDGPU_RAS_BLOCK__GFX; adev->gfx.ras_if->type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE; adev->gfx.ras_if->sub_block_index = 0; - strcpy(adev->gfx.ras_if->name, "gfx"); } fs_info.head = ih_info.head = *adev->gfx.ras_if; r = amdgpu_ras_late_init(adev, adev->gfx.ras_if, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c index d0b8d415b63b..c7797eac83c3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c @@ -471,6 +471,27 @@ int amdgpu_gmc_ras_late_init(struct amdgpu_device *adev) return r; } + if (adev->mca.mp0.ras_funcs && + adev->mca.mp0.ras_funcs->ras_late_init) { + r = adev->mca.mp0.ras_funcs->ras_late_init(adev); + if (r) + return r; + } + + if (adev->mca.mp1.ras_funcs && + adev->mca.mp1.ras_funcs->ras_late_init) { + r = adev->mca.mp1.ras_funcs->ras_late_init(adev); + if (r) + return r; + } + + if (adev->mca.mpio.ras_funcs && + adev->mca.mpio.ras_funcs->ras_late_init) { + r = adev->mca.mpio.ras_funcs->ras_late_init(adev); + if (r) + return r; + } + return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c index ec96e0b26b11..543000304a1c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c @@ -132,14 +132,11 @@ static int amdgpu_gtt_mgr_new(struct ttm_resource_manager *man, struct amdgpu_gtt_node *node; int r; - spin_lock(&mgr->lock); - if (tbo->resource && tbo->resource->mem_type != TTM_PL_TT && - atomic64_read(&mgr->available) < num_pages) { - spin_unlock(&mgr->lock); + if (!(place->flags & TTM_PL_FLAG_TEMPORARY) && + atomic64_add_return(num_pages, &mgr->used) > man->size) { + atomic64_sub(num_pages, &mgr->used); return -ENOSPC; } - atomic64_sub(num_pages, &mgr->available); - spin_unlock(&mgr->lock); node = kzalloc(struct_size(node, base.mm_nodes, 1), GFP_KERNEL); if (!node) { @@ -175,7 +172,8 @@ err_free: kfree(node); err_out: - atomic64_add(num_pages, &mgr->available); + if (!(place->flags & TTM_PL_FLAG_TEMPORARY)) + atomic64_sub(num_pages, &mgr->used); return r; } @@ -198,7 +196,9 @@ static void amdgpu_gtt_mgr_del(struct ttm_resource_manager *man, if (drm_mm_node_allocated(&node->base.mm_nodes[0])) drm_mm_remove_node(&node->base.mm_nodes[0]); spin_unlock(&mgr->lock); - atomic64_add(res->num_pages, &mgr->available); + + if (!(res->placement & TTM_PL_FLAG_TEMPORARY)) + atomic64_sub(res->num_pages, &mgr->used); kfree(node); } @@ -213,9 +213,8 @@ static void amdgpu_gtt_mgr_del(struct ttm_resource_manager *man, uint64_t amdgpu_gtt_mgr_usage(struct ttm_resource_manager *man) { struct amdgpu_gtt_mgr *mgr = to_gtt_mgr(man); - s64 result = man->size - atomic64_read(&mgr->available); - return (result > 0 ? result : 0) * PAGE_SIZE; + return atomic64_read(&mgr->used) * PAGE_SIZE; } /** @@ -265,9 +264,8 @@ static void amdgpu_gtt_mgr_debug(struct ttm_resource_manager *man, drm_mm_print(&mgr->mm, printer); spin_unlock(&mgr->lock); - drm_printf(printer, "man size:%llu pages, gtt available:%lld pages, usage:%lluMB\n", - man->size, (u64)atomic64_read(&mgr->available), - amdgpu_gtt_mgr_usage(man) >> 20); + drm_printf(printer, "man size:%llu pages, gtt used:%llu pages\n", + man->size, atomic64_read(&mgr->used)); } static const struct ttm_resource_manager_func amdgpu_gtt_mgr_func = { @@ -299,7 +297,7 @@ int amdgpu_gtt_mgr_init(struct amdgpu_device *adev, uint64_t gtt_size) size = (adev->gmc.gart_size >> PAGE_SHIFT) - start; drm_mm_init(&mgr->mm, start, size); spin_lock_init(&mgr->lock); - atomic64_set(&mgr->available, gtt_size >> PAGE_SHIFT); + atomic64_set(&mgr->used, 0); ttm_set_driver_manager(&adev->mman.bdev, TTM_PL_TT, &mgr->manager); ttm_resource_manager_set_used(man, true); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.c index 1d50d534d77c..a766e1aad2b9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.c @@ -41,7 +41,6 @@ int amdgpu_hdp_ras_late_init(struct amdgpu_device *adev) adev->hdp.ras_if->block = AMDGPU_RAS_BLOCK__HDP; adev->hdp.ras_if->type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE; adev->hdp.ras_if->sub_block_index = 0; - strcpy(adev->hdp.ras_if->name, "hdp"); } ih_info.head = fs_info.head = *adev->hdp.ras_if; r = amdgpu_ras_late_init(adev, adev->hdp.ras_if, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_i2c.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_i2c.c index bca4dddd5a15..82608df43396 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_i2c.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_i2c.c @@ -339,7 +339,7 @@ static void amdgpu_i2c_put_byte(struct amdgpu_i2c_chan *i2c_bus, void amdgpu_i2c_router_select_ddc_port(const struct amdgpu_connector *amdgpu_connector) { - u8 val; + u8 val = 0; if (!amdgpu_connector->router.ddc_valid) return; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c index ec65ab0ddf89..c076a6b9a5a2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c @@ -262,7 +262,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, fence_flags | AMDGPU_FENCE_FLAG_64BIT); } - r = amdgpu_fence_emit(ring, f, fence_flags); + r = amdgpu_fence_emit(ring, f, job, fence_flags); if (r) { dev_err(adev->dev, "failed to emit fence (%d)\n", r); if (job && job->vmid) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c index cd2e18f072fc..cc2e0c9cfe0a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c @@ -46,7 +46,6 @@ #include <linux/pci.h> #include <drm/drm_crtc_helper.h> -#include <drm/drm_irq.h> #include <drm/drm_vblank.h> #include <drm/amdgpu_drm.h> #include <drm/drm_drv.h> @@ -184,7 +183,7 @@ void amdgpu_irq_disable_all(struct amdgpu_device *adev) * Returns: * result of handling the IRQ, as defined by &irqreturn_t */ -irqreturn_t amdgpu_irq_handler(int irq, void *arg) +static irqreturn_t amdgpu_irq_handler(int irq, void *arg) { struct drm_device *dev = (struct drm_device *) arg; struct amdgpu_device *adev = drm_to_adev(dev); @@ -307,6 +306,7 @@ static void amdgpu_restore_msix(struct amdgpu_device *adev) int amdgpu_irq_init(struct amdgpu_device *adev) { int r = 0; + unsigned int irq; spin_lock_init(&adev->irq.lock); @@ -349,15 +349,22 @@ int amdgpu_irq_init(struct amdgpu_device *adev) INIT_WORK(&adev->irq.ih2_work, amdgpu_irq_handle_ih2); INIT_WORK(&adev->irq.ih_soft_work, amdgpu_irq_handle_ih_soft); - adev->irq.installed = true; - /* Use vector 0 for MSI-X */ - r = drm_irq_install(adev_to_drm(adev), pci_irq_vector(adev->pdev, 0)); + /* Use vector 0 for MSI-X. */ + r = pci_irq_vector(adev->pdev, 0); + if (r < 0) + return r; + irq = r; + + /* PCI devices require shared interrupts. */ + r = request_irq(irq, amdgpu_irq_handler, IRQF_SHARED, adev_to_drm(adev)->driver->name, + adev_to_drm(adev)); if (r) { - adev->irq.installed = false; if (!amdgpu_device_has_dc_support(adev)) flush_work(&adev->hotplug_work); return r; } + adev->irq.installed = true; + adev->irq.irq = irq; adev_to_drm(adev)->max_vblank_count = 0x00ffffff; DRM_DEBUG("amdgpu: irq initialized.\n"); @@ -368,7 +375,7 @@ int amdgpu_irq_init(struct amdgpu_device *adev) void amdgpu_irq_fini_hw(struct amdgpu_device *adev) { if (adev->irq.installed) { - drm_irq_uninstall(&adev->ddev); + free_irq(adev->irq.irq, adev_to_drm(adev)); adev->irq.installed = false; if (adev->irq.msi_enabled) pci_free_irq_vectors(adev->pdev); @@ -584,7 +591,7 @@ void amdgpu_irq_gpu_reset_resume_helper(struct amdgpu_device *adev) { int i, j, k; - if (amdgpu_sriov_vf(adev)) + if (amdgpu_sriov_vf(adev) || amdgpu_passthrough(adev)) amdgpu_restore_msix(adev); for (i = 0; i < AMDGPU_IRQ_CLIENTID_MAX; ++i) { @@ -617,7 +624,7 @@ void amdgpu_irq_gpu_reset_resume_helper(struct amdgpu_device *adev) int amdgpu_irq_get(struct amdgpu_device *adev, struct amdgpu_irq_src *src, unsigned type) { - if (!adev_to_drm(adev)->irq_enabled) + if (!adev->irq.installed) return -ENOENT; if (type >= src->num_types) @@ -647,7 +654,7 @@ int amdgpu_irq_get(struct amdgpu_device *adev, struct amdgpu_irq_src *src, int amdgpu_irq_put(struct amdgpu_device *adev, struct amdgpu_irq_src *src, unsigned type) { - if (!adev_to_drm(adev)->irq_enabled) + if (!adev->irq.installed) return -ENOENT; if (type >= src->num_types) @@ -678,7 +685,7 @@ int amdgpu_irq_put(struct amdgpu_device *adev, struct amdgpu_irq_src *src, bool amdgpu_irq_enabled(struct amdgpu_device *adev, struct amdgpu_irq_src *src, unsigned type) { - if (!adev_to_drm(adev)->irq_enabled) + if (!adev->irq.installed) return false; if (type >= src->num_types) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h index 78ad4784cc74..e9f2c11ea416 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h @@ -80,6 +80,7 @@ struct amdgpu_irq_src_funcs { struct amdgpu_irq { bool installed; + unsigned int irq; spinlock_t lock; /* interrupt sources */ struct amdgpu_irq_client client[AMDGPU_IRQ_CLIENTID_MAX]; @@ -100,7 +101,6 @@ struct amdgpu_irq { }; void amdgpu_irq_disable_all(struct amdgpu_device *adev); -irqreturn_t amdgpu_irq_handler(int irq, void *arg); int amdgpu_irq_init(struct amdgpu_device *adev); void amdgpu_irq_fini_sw(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c index d33e6d97cc89..de29518673dd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c @@ -127,11 +127,16 @@ void amdgpu_job_free_resources(struct amdgpu_job *job) { struct amdgpu_ring *ring = to_amdgpu_ring(job->base.sched); struct dma_fence *f; + struct dma_fence *hw_fence; unsigned i; - /* use sched fence if available */ - f = job->base.s_fence ? &job->base.s_fence->finished : job->fence; + if (job->hw_fence.ops == NULL) + hw_fence = job->external_hw_fence; + else + hw_fence = &job->hw_fence; + /* use sched fence if available */ + f = job->base.s_fence ? &job->base.s_fence->finished : hw_fence; for (i = 0; i < job->num_ibs; ++i) amdgpu_ib_free(ring->adev, &job->ibs[i], f); } @@ -142,20 +147,27 @@ static void amdgpu_job_free_cb(struct drm_sched_job *s_job) drm_sched_job_cleanup(s_job); - dma_fence_put(job->fence); amdgpu_sync_free(&job->sync); amdgpu_sync_free(&job->sched_sync); - kfree(job); + + /* only put the hw fence if has embedded fence */ + if (job->hw_fence.ops != NULL) + dma_fence_put(&job->hw_fence); + else + kfree(job); } void amdgpu_job_free(struct amdgpu_job *job) { amdgpu_job_free_resources(job); - - dma_fence_put(job->fence); amdgpu_sync_free(&job->sync); amdgpu_sync_free(&job->sched_sync); - kfree(job); + + /* only put the hw fence if has embedded fence */ + if (job->hw_fence.ops != NULL) + dma_fence_put(&job->hw_fence); + else + kfree(job); } int amdgpu_job_submit(struct amdgpu_job *job, struct drm_sched_entity *entity, @@ -184,11 +196,14 @@ int amdgpu_job_submit_direct(struct amdgpu_job *job, struct amdgpu_ring *ring, job->base.sched = &ring->sched; r = amdgpu_ib_schedule(ring, job->num_ibs, job->ibs, NULL, fence); - job->fence = dma_fence_get(*fence); + /* record external_hw_fence for direct submit */ + job->external_hw_fence = dma_fence_get(*fence); if (r) return r; amdgpu_job_free(job); + dma_fence_put(*fence); + return 0; } @@ -246,10 +261,12 @@ static struct dma_fence *amdgpu_job_run(struct drm_sched_job *sched_job) if (r) DRM_ERROR("Error scheduling IBs (%d)\n", r); } - /* if gpu reset, hw fence will be replaced here */ - dma_fence_put(job->fence); - job->fence = dma_fence_get(fence); + if (!job->job_run_counter) + dma_fence_get(fence); + else if (finished->error < 0) + dma_fence_put(&job->hw_fence); + job->job_run_counter++; amdgpu_job_free_resources(job); fence = r ? ERR_PTR(r) : fence; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h index 81caac9b958a..9e65730193b8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h @@ -46,7 +46,8 @@ struct amdgpu_job { struct amdgpu_sync sync; struct amdgpu_sync sched_sync; struct amdgpu_ib *ibs; - struct dma_fence *fence; /* the hw fence */ + struct dma_fence hw_fence; + struct dma_fence *external_hw_fence; uint32_t preamble_status; uint32_t preemption_status; uint32_t num_ibs; @@ -62,6 +63,9 @@ struct amdgpu_job { /* user fence handling */ uint64_t uf_addr; uint64_t uf_sequence; + + /* job_run_counter >= 1 means a resubmit job */ + uint32_t job_run_counter; }; int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.c index 8996cb4ed57a..9342aa23ebd2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.c @@ -47,8 +47,6 @@ int amdgpu_jpeg_sw_fini(struct amdgpu_device *adev) { int i; - cancel_delayed_work_sync(&adev->jpeg.idle_work); - for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { if (adev->jpeg.harvest_config & (1 << i)) continue; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index 96ef3f1051d8..7e45640fbee0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -341,27 +341,27 @@ static int amdgpu_firmware_info(struct drm_amdgpu_info_firmware *fw_info, switch (query_fw->index) { case TA_FW_TYPE_PSP_XGMI: fw_info->ver = adev->psp.ta_fw_version; - fw_info->feature = adev->psp.ta_xgmi_ucode_version; + fw_info->feature = adev->psp.xgmi.feature_version; break; case TA_FW_TYPE_PSP_RAS: fw_info->ver = adev->psp.ta_fw_version; - fw_info->feature = adev->psp.ta_ras_ucode_version; + fw_info->feature = adev->psp.ras.feature_version; break; case TA_FW_TYPE_PSP_HDCP: fw_info->ver = adev->psp.ta_fw_version; - fw_info->feature = adev->psp.ta_hdcp_ucode_version; + fw_info->feature = adev->psp.hdcp.feature_version; break; case TA_FW_TYPE_PSP_DTM: fw_info->ver = adev->psp.ta_fw_version; - fw_info->feature = adev->psp.ta_dtm_ucode_version; + fw_info->feature = adev->psp.dtm.feature_version; break; case TA_FW_TYPE_PSP_RAP: fw_info->ver = adev->psp.ta_fw_version; - fw_info->feature = adev->psp.ta_rap_ucode_version; + fw_info->feature = adev->psp.rap.feature_version; break; case TA_FW_TYPE_PSP_SECUREDISPLAY: fw_info->ver = adev->psp.ta_fw_version; - fw_info->feature = adev->psp.ta_securedisplay_ucode_version; + fw_info->feature = adev->psp.securedisplay.feature_version; break; default: return -EINVAL; @@ -374,12 +374,12 @@ static int amdgpu_firmware_info(struct drm_amdgpu_info_firmware *fw_info, fw_info->feature = adev->sdma.instance[query_fw->index].feature_version; break; case AMDGPU_INFO_FW_SOS: - fw_info->ver = adev->psp.sos_fw_version; - fw_info->feature = adev->psp.sos_feature_version; + fw_info->ver = adev->psp.sos.fw_version; + fw_info->feature = adev->psp.sos.feature_version; break; case AMDGPU_INFO_FW_ASD: - fw_info->ver = adev->psp.asd_fw_version; - fw_info->feature = adev->psp.asd_feature_version; + fw_info->ver = adev->psp.asd.fw_version; + fw_info->feature = adev->psp.asd.feature_version; break; case AMDGPU_INFO_FW_DMCU: fw_info->ver = adev->dm.dmcu_fw_version; @@ -390,8 +390,8 @@ static int amdgpu_firmware_info(struct drm_amdgpu_info_firmware *fw_info, fw_info->feature = 0; break; case AMDGPU_INFO_FW_TOC: - fw_info->ver = adev->psp.toc_fw_version; - fw_info->feature = adev->psp.toc_feature_version; + fw_info->ver = adev->psp.toc.fw_version; + fw_info->feature = adev->psp.toc.feature_version; break; default: return -EINVAL; @@ -1179,10 +1179,14 @@ int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv) pasid = 0; } - r = amdgpu_vm_init(adev, &fpriv->vm, pasid); + r = amdgpu_vm_init(adev, &fpriv->vm); if (r) goto error_pasid; + r = amdgpu_vm_set_pasid(adev, &fpriv->vm, pasid); + if (r) + goto error_vm; + fpriv->prt_va = amdgpu_vm_bo_add(adev, &fpriv->vm, NULL); if (!fpriv->prt_va) { r = -ENOMEM; @@ -1210,8 +1214,10 @@ error_vm: amdgpu_vm_fini(adev, &fpriv->vm); error_pasid: - if (pasid) + if (pasid) { amdgpu_pasid_free(pasid); + amdgpu_vm_set_pasid(adev, &fpriv->vm, 0); + } kfree(fpriv); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c new file mode 100644 index 000000000000..a2d3dbbf7d25 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c @@ -0,0 +1,117 @@ +/* + * Copyright 2021 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#include "amdgpu_ras.h" +#include "amdgpu.h" +#include "amdgpu_mca.h" + +#include "umc/umc_6_7_0_offset.h" +#include "umc/umc_6_7_0_sh_mask.h" + +void amdgpu_mca_query_correctable_error_count(struct amdgpu_device *adev, + uint64_t mc_status_addr, + unsigned long *error_count) +{ + uint64_t mc_status = RREG64_PCIE(mc_status_addr * 4); + + if (REG_GET_FIELD(mc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 && + REG_GET_FIELD(mc_status, MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1) + *error_count += 1; +} + +void amdgpu_mca_query_uncorrectable_error_count(struct amdgpu_device *adev, + uint64_t mc_status_addr, + unsigned long *error_count) +{ + uint64_t mc_status = RREG64_PCIE(mc_status_addr * 4); + + if ((REG_GET_FIELD(mc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1) && + (REG_GET_FIELD(mc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Deferred) == 1 || + REG_GET_FIELD(mc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 || + REG_GET_FIELD(mc_status, MCA_UMC_UMC0_MCUMC_STATUST0, PCC) == 1 || + REG_GET_FIELD(mc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UC) == 1 || + REG_GET_FIELD(mc_status, MCA_UMC_UMC0_MCUMC_STATUST0, TCC) == 1)) + *error_count += 1; +} + +void amdgpu_mca_reset_error_count(struct amdgpu_device *adev, + uint64_t mc_status_addr) +{ + WREG64_PCIE(mc_status_addr * 4, 0x0ULL); +} + +void amdgpu_mca_query_ras_error_count(struct amdgpu_device *adev, + uint64_t mc_status_addr, + void *ras_error_status) +{ + struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status; + + amdgpu_mca_query_correctable_error_count(adev, mc_status_addr, &(err_data->ce_count)); + amdgpu_mca_query_uncorrectable_error_count(adev, mc_status_addr, &(err_data->ue_count)); + + amdgpu_mca_reset_error_count(adev, mc_status_addr); +} + +int amdgpu_mca_ras_late_init(struct amdgpu_device *adev, + struct amdgpu_mca_ras *mca_dev) +{ + int r; + struct ras_ih_if ih_info = { + .cb = NULL, + }; + struct ras_fs_if fs_info = { + .sysfs_name = mca_dev->ras_funcs->sysfs_name, + }; + + if (!mca_dev->ras_if) { + mca_dev->ras_if = kmalloc(sizeof(struct ras_common_if), GFP_KERNEL); + if (!mca_dev->ras_if) + return -ENOMEM; + mca_dev->ras_if->block = mca_dev->ras_funcs->ras_block; + mca_dev->ras_if->type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE; + mca_dev->ras_if->sub_block_index = 0; + } + ih_info.head = fs_info.head = *mca_dev->ras_if; + r = amdgpu_ras_late_init(adev, mca_dev->ras_if, + &fs_info, &ih_info); + if (r || !amdgpu_ras_is_supported(adev, mca_dev->ras_if->block)) { + kfree(mca_dev->ras_if); + mca_dev->ras_if = NULL; + } + + return r; +} + +void amdgpu_mca_ras_fini(struct amdgpu_device *adev, + struct amdgpu_mca_ras *mca_dev) +{ + struct ras_ih_if ih_info = { + .cb = NULL, + }; + + if (!mca_dev->ras_if) + return; + + amdgpu_ras_late_fini(adev, mca_dev->ras_if, &ih_info); + kfree(mca_dev->ras_if); + mca_dev->ras_if = NULL; +}
\ No newline at end of file diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h new file mode 100644 index 000000000000..f860f2f0e296 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h @@ -0,0 +1,72 @@ +/* + * Copyright (C) 2021 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#ifndef __AMDGPU_MCA_H__ +#define __AMDGPU_MCA_H__ + +struct amdgpu_mca_ras_funcs { + int (*ras_late_init)(struct amdgpu_device *adev); + void (*ras_fini)(struct amdgpu_device *adev); + void (*query_ras_error_count)(struct amdgpu_device *adev, + void *ras_error_status); + void (*query_ras_error_address)(struct amdgpu_device *adev, + void *ras_error_status); + uint32_t ras_block; + const char* sysfs_name; +}; + +struct amdgpu_mca_ras { + struct ras_common_if *ras_if; + const struct amdgpu_mca_ras_funcs *ras_funcs; +}; + +struct amdgpu_mca_funcs { + void (*init)(struct amdgpu_device *adev); +}; + +struct amdgpu_mca { + const struct amdgpu_mca_funcs *funcs; + struct amdgpu_mca_ras mp0; + struct amdgpu_mca_ras mp1; + struct amdgpu_mca_ras mpio; +}; + +void amdgpu_mca_query_correctable_error_count(struct amdgpu_device *adev, + uint64_t mc_status_addr, + unsigned long *error_count); + +void amdgpu_mca_query_uncorrectable_error_count(struct amdgpu_device *adev, + uint64_t mc_status_addr, + unsigned long *error_count); + +void amdgpu_mca_reset_error_count(struct amdgpu_device *adev, + uint64_t mc_status_addr); + +void amdgpu_mca_query_ras_error_count(struct amdgpu_device *adev, + uint64_t mc_status_addr, + void *ras_error_status); + +int amdgpu_mca_ras_late_init(struct amdgpu_device *adev, + struct amdgpu_mca_ras *mca_dev); + +void amdgpu_mca_ras_fini(struct amdgpu_device *adev, + struct amdgpu_mca_ras *mca_dev); + +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.c index ead3dc572ec5..24297dc51434 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.c @@ -41,7 +41,6 @@ int amdgpu_mmhub_ras_late_init(struct amdgpu_device *adev) adev->mmhub.ras_if->block = AMDGPU_RAS_BLOCK__MMHUB; adev->mmhub.ras_if->type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE; adev->mmhub.ras_if->sub_block_index = 0; - strcpy(adev->mmhub.ras_if->name, "mmhub"); } ih_info.head = fs_info.head = *adev->mmhub.ras_if; r = amdgpu_ras_late_init(adev, adev->mmhub.ras_if, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.c index 6201a5f4b4fa..6afb02fef8cf 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.c @@ -39,7 +39,6 @@ int amdgpu_nbio_ras_late_init(struct amdgpu_device *adev) adev->nbio.ras_if->block = AMDGPU_RAS_BLOCK__PCIE_BIF; adev->nbio.ras_if->type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE; adev->nbio.ras_if->sub_block_index = 0; - strcpy(adev->nbio.ras_if->name, "pcie_bif"); } ih_info.head = fs_info.head = *adev->nbio.ras_if; r = amdgpu_ras_late_init(adev, adev->nbio.ras_if, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h index 45295dce5c3e..843052205bd5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h @@ -95,6 +95,7 @@ struct amdgpu_nbio_funcs { void (*program_aspm)(struct amdgpu_device *adev); void (*apply_lc_spc_mode_wa)(struct amdgpu_device *adev); void (*apply_l1_link_width_reconfig_wa)(struct amdgpu_device *adev); + void (*clear_doorbell_interrupt)(struct amdgpu_device *adev); }; struct amdgpu_nbio { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index 92c8e6e7f346..01a78c786536 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -196,7 +196,7 @@ void amdgpu_bo_placement_from_domain(struct amdgpu_bo *abo, u32 domain) c++; } - BUG_ON(c >= AMDGPU_BO_MAX_PLACEMENTS); + BUG_ON(c > AMDGPU_BO_MAX_PLACEMENTS); placement->num_placement = c; placement->placement = places; @@ -731,7 +731,7 @@ retry: /** * amdgpu_bo_add_to_shadow_list - add a BO to the shadow list * - * @bo: BO that will be inserted into the shadow list + * @vmbo: BO that will be inserted into the shadow list * * Insert a BO to the shadow list. */ @@ -913,7 +913,7 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain, return -EINVAL; /* A shared bo cannot be migrated to VRAM */ - if (bo->prime_shared_count || bo->tbo.base.import_attach) { + if (bo->tbo.base.import_attach) { if (domain & AMDGPU_GEM_DOMAIN_GTT) domain = AMDGPU_GEM_DOMAIN_GTT; else @@ -947,7 +947,7 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain, /* This assumes only APU display buffers are pinned with (VRAM|GTT). * See function amdgpu_display_supported_domains() */ - domain = amdgpu_bo_get_preferred_pin_domain(adev, domain); + domain = amdgpu_bo_get_preferred_domain(adev, domain); if (bo->tbo.base.import_attach) dma_buf_pin(bo->tbo.base.import_attach); @@ -1518,14 +1518,14 @@ u64 amdgpu_bo_gpu_offset_no_check(struct amdgpu_bo *bo) } /** - * amdgpu_bo_get_preferred_pin_domain - get preferred domain for scanout + * amdgpu_bo_get_preferred_domain - get preferred domain * @adev: amdgpu device object * @domain: allowed :ref:`memory domains <amdgpu_memory_domains>` * * Returns: - * Which of the allowed domains is preferred for pinning the BO for scanout. + * Which of the allowed domains is preferred for allocating the BO. */ -uint32_t amdgpu_bo_get_preferred_pin_domain(struct amdgpu_device *adev, +uint32_t amdgpu_bo_get_preferred_domain(struct amdgpu_device *adev, uint32_t domain) { if (domain == (AMDGPU_GEM_DOMAIN_VRAM | AMDGPU_GEM_DOMAIN_GTT)) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h index 38c834d0f930..9d6c001c15f8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h @@ -100,7 +100,6 @@ struct amdgpu_bo { struct ttm_buffer_object tbo; struct ttm_bo_kmap_obj kmap; u64 flags; - unsigned prime_shared_count; /* per VM structure for page tables and with virtual addresses */ struct amdgpu_vm_bo_base *vm_bo; /* Constant after initialization */ @@ -334,7 +333,7 @@ void amdgpu_bo_get_memory(struct amdgpu_bo *bo, uint64_t *vram_mem, void amdgpu_bo_add_to_shadow_list(struct amdgpu_bo_vm *vmbo); int amdgpu_bo_restore_shadow(struct amdgpu_bo *shadow, struct dma_fence **fence); -uint32_t amdgpu_bo_get_preferred_pin_domain(struct amdgpu_device *adev, +uint32_t amdgpu_bo_get_preferred_domain(struct amdgpu_device *adev, uint32_t domain); /* diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pll.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pll.c index f2e20666c9c1..4eaec446b49d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pll.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pll.c @@ -80,12 +80,17 @@ static void amdgpu_pll_reduce_ratio(unsigned *nom, unsigned *den, * Calculate feedback and reference divider for a given post divider. Makes * sure we stay within the limits. */ -static void amdgpu_pll_get_fb_ref_div(unsigned nom, unsigned den, unsigned post_div, - unsigned fb_div_max, unsigned ref_div_max, - unsigned *fb_div, unsigned *ref_div) +static void amdgpu_pll_get_fb_ref_div(struct amdgpu_device *adev, unsigned int nom, + unsigned int den, unsigned int post_div, + unsigned int fb_div_max, unsigned int ref_div_max, + unsigned int *fb_div, unsigned int *ref_div) { + /* limit reference * post divider to a maximum */ - ref_div_max = min(128 / post_div, ref_div_max); + if (adev->family == AMDGPU_FAMILY_SI) + ref_div_max = min(100 / post_div, ref_div_max); + else + ref_div_max = min(128 / post_div, ref_div_max); /* get matching reference and feedback divider */ *ref_div = min(max(DIV_ROUND_CLOSEST(den, post_div), 1u), ref_div_max); @@ -112,7 +117,8 @@ static void amdgpu_pll_get_fb_ref_div(unsigned nom, unsigned den, unsigned post_ * Try to calculate the PLL parameters to generate the given frequency: * dot_clock = (ref_freq * feedback_div) / (ref_div * post_div) */ -void amdgpu_pll_compute(struct amdgpu_pll *pll, +void amdgpu_pll_compute(struct amdgpu_device *adev, + struct amdgpu_pll *pll, u32 freq, u32 *dot_clock_p, u32 *fb_div_p, @@ -199,7 +205,7 @@ void amdgpu_pll_compute(struct amdgpu_pll *pll, for (post_div = post_div_min; post_div <= post_div_max; ++post_div) { unsigned diff; - amdgpu_pll_get_fb_ref_div(nom, den, post_div, fb_div_max, + amdgpu_pll_get_fb_ref_div(adev, nom, den, post_div, fb_div_max, ref_div_max, &fb_div, &ref_div); diff = abs(target_clock - (pll->reference_freq * fb_div) / (ref_div * post_div)); @@ -214,7 +220,7 @@ void amdgpu_pll_compute(struct amdgpu_pll *pll, post_div = post_div_best; /* get the feedback and reference divider for the optimal value */ - amdgpu_pll_get_fb_ref_div(nom, den, post_div, fb_div_max, ref_div_max, + amdgpu_pll_get_fb_ref_div(adev, nom, den, post_div, fb_div_max, ref_div_max, &fb_div, &ref_div); /* reduce the numbers to a simpler ratio once more */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pll.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_pll.h index db6136f68b82..44a583d6c9b4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pll.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pll.h @@ -24,7 +24,8 @@ #ifndef __AMDGPU_PLL_H__ #define __AMDGPU_PLL_H__ -void amdgpu_pll_compute(struct amdgpu_pll *pll, +void amdgpu_pll_compute(struct amdgpu_device *adev, + struct amdgpu_pll *pll, u32 freq, u32 *dot_clock_p, u32 *fb_div_p, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index 3ec5099ffeb6..23efdc672502 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -24,16 +24,17 @@ */ #include <linux/firmware.h> -#include <linux/dma-mapping.h> #include <drm/drm_drv.h> #include "amdgpu.h" #include "amdgpu_psp.h" #include "amdgpu_ucode.h" +#include "amdgpu_xgmi.h" #include "soc15_common.h" #include "psp_v3_1.h" #include "psp_v10_0.h" #include "psp_v11_0.h" +#include "psp_v11_0_8.h" #include "psp_v12_0.h" #include "psp_v13_0.h" @@ -41,8 +42,6 @@ #include "amdgpu_securedisplay.h" #include "amdgpu_atomfirmware.h" -#include <drm/drm_drv.h> - static int psp_sysfs_init(struct amdgpu_device *adev); static void psp_sysfs_fini(struct amdgpu_device *adev); @@ -122,6 +121,12 @@ static int psp_early_init(void *handle) psp_v13_0_set_psp_funcs(psp); psp->autoload_supported = true; break; + case CHIP_CYAN_SKILLFISH: + if (adev->apu_flags & AMD_APU_IS_CYAN_SKILLFISH2) { + psp_v11_0_8_set_psp_funcs(psp); + psp->autoload_supported = false; + } + break; default: return -EINVAL; } @@ -247,6 +252,12 @@ static int psp_sw_init(void *handle) struct psp_runtime_boot_cfg_entry boot_cfg_entry; struct psp_memory_training_context *mem_training_ctx = &psp->mem_train_ctx; + psp->cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL); + if (!psp->cmd) { + DRM_ERROR("Failed to allocate memory to command buffer!\n"); + ret = -ENOMEM; + } + if (!amdgpu_sriov_vf(adev)) { ret = psp_init_microcode(psp); if (ret) { @@ -309,25 +320,30 @@ static int psp_sw_init(void *handle) static int psp_sw_fini(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct psp_context *psp = &adev->psp; + struct psp_gfx_cmd_resp *cmd = psp->cmd; - psp_memory_training_fini(&adev->psp); - if (adev->psp.sos_fw) { - release_firmware(adev->psp.sos_fw); - adev->psp.sos_fw = NULL; + psp_memory_training_fini(psp); + if (psp->sos_fw) { + release_firmware(psp->sos_fw); + psp->sos_fw = NULL; } - if (adev->psp.asd_fw) { - release_firmware(adev->psp.asd_fw); - adev->psp.asd_fw = NULL; + if (psp->asd_fw) { + release_firmware(psp->asd_fw); + psp->asd_fw = NULL; } - if (adev->psp.ta_fw) { - release_firmware(adev->psp.ta_fw); - adev->psp.ta_fw = NULL; + if (psp->ta_fw) { + release_firmware(psp->ta_fw); + psp->ta_fw = NULL; } if (adev->asic_type == CHIP_NAVI10 || adev->asic_type == CHIP_SIENNA_CICHLID) psp_sysfs_fini(adev); + kfree(cmd); + cmd = NULL; + return 0; } @@ -356,6 +372,44 @@ int psp_wait_for(struct psp_context *psp, uint32_t reg_index, return -ETIME; } +static const char *psp_gfx_cmd_name(enum psp_gfx_cmd_id cmd_id) +{ + switch (cmd_id) { + case GFX_CMD_ID_LOAD_TA: + return "LOAD_TA"; + case GFX_CMD_ID_UNLOAD_TA: + return "UNLOAD_TA"; + case GFX_CMD_ID_INVOKE_CMD: + return "INVOKE_CMD"; + case GFX_CMD_ID_LOAD_ASD: + return "LOAD_ASD"; + case GFX_CMD_ID_SETUP_TMR: + return "SETUP_TMR"; + case GFX_CMD_ID_LOAD_IP_FW: + return "LOAD_IP_FW"; + case GFX_CMD_ID_DESTROY_TMR: + return "DESTROY_TMR"; + case GFX_CMD_ID_SAVE_RESTORE: + return "SAVE_RESTORE_IP_FW"; + case GFX_CMD_ID_SETUP_VMR: + return "SETUP_VMR"; + case GFX_CMD_ID_DESTROY_VMR: + return "DESTROY_VMR"; + case GFX_CMD_ID_PROG_REG: + return "PROG_REG"; + case GFX_CMD_ID_GET_FW_ATTESTATION: + return "GET_FW_ATTESTATION"; + case GFX_CMD_ID_LOAD_TOC: + return "ID_LOAD_TOC"; + case GFX_CMD_ID_AUTOLOAD_RLC: + return "AUTOLOAD_RLC"; + case GFX_CMD_ID_BOOT_CFG: + return "BOOT_CFG"; + default: + return "UNKNOWN CMD"; + } +} + static int psp_cmd_submit_buf(struct psp_context *psp, struct amdgpu_firmware_info *ucode, @@ -373,8 +427,6 @@ psp_cmd_submit_buf(struct psp_context *psp, if (!drm_dev_enter(&psp->adev->ddev, &idx)) return 0; - mutex_lock(&psp->mutex); - memset(psp->cmd_buf_mem, 0, PSP_CMD_BUFFER_SIZE); memcpy(psp->cmd_buf_mem, cmd, sizeof(struct psp_gfx_cmd_resp)); @@ -417,10 +469,10 @@ psp_cmd_submit_buf(struct psp_context *psp, */ if (!skip_unsupport && (psp->cmd_buf_mem->resp.status || !timeout) && !ras_intr) { if (ucode) - DRM_WARN("failed to load ucode id (%d) ", - ucode->ucode_id); - DRM_WARN("psp command (0x%X) failed and response status is (0x%X)\n", - psp->cmd_buf_mem->cmd_id, + DRM_WARN("failed to load ucode (%s) ", + amdgpu_ucode_name(ucode->ucode_id)); + DRM_WARN("psp gfx command (%s) failed and response status is (0x%X)\n", + psp_gfx_cmd_name(psp->cmd_buf_mem->cmd_id), psp->cmd_buf_mem->resp.status); if (!timeout) { ret = -EINVAL; @@ -434,11 +486,26 @@ psp_cmd_submit_buf(struct psp_context *psp, } exit: - mutex_unlock(&psp->mutex); drm_dev_exit(idx); return ret; } +static struct psp_gfx_cmd_resp *acquire_psp_cmd_buf(struct psp_context *psp) +{ + struct psp_gfx_cmd_resp *cmd = psp->cmd; + + mutex_lock(&psp->mutex); + + memset(cmd, 0, sizeof(struct psp_gfx_cmd_resp)); + + return cmd; +} + +void release_psp_cmd_buf(struct psp_context *psp) +{ + mutex_unlock(&psp->mutex); +} + static void psp_prep_tmr_cmd_buf(struct psp_context *psp, struct psp_gfx_cmd_resp *cmd, uint64_t tmr_mc, struct amdgpu_bo *tmr_bo) @@ -473,21 +540,20 @@ static int psp_load_toc(struct psp_context *psp, uint32_t *tmr_size) { int ret; - struct psp_gfx_cmd_resp *cmd; + struct psp_gfx_cmd_resp *cmd = acquire_psp_cmd_buf(psp); - cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL); - if (!cmd) - return -ENOMEM; /* Copy toc to psp firmware private buffer */ - psp_copy_fw(psp, psp->toc_start_addr, psp->toc_bin_size); + psp_copy_fw(psp, psp->toc.start_addr, psp->toc.size_bytes); - psp_prep_load_toc_cmd_buf(cmd, psp->fw_pri_mc_addr, psp->toc_bin_size); + psp_prep_load_toc_cmd_buf(cmd, psp->fw_pri_mc_addr, psp->toc.size_bytes); ret = psp_cmd_submit_buf(psp, NULL, cmd, psp->fence_buf_mc_addr); if (!ret) *tmr_size = psp->cmd_buf_mem->resp.tmr_size; - kfree(cmd); + + release_psp_cmd_buf(psp); + return ret; } @@ -511,8 +577,8 @@ static int psp_tmr_init(struct psp_context *psp) /* For ASICs support RLC autoload, psp will parse the toc * and calculate the total size of TMR needed */ if (!amdgpu_sriov_vf(psp->adev) && - psp->toc_start_addr && - psp->toc_bin_size && + psp->toc.start_addr && + psp->toc.size_bytes && psp->fw_pri_buf) { ret = psp_load_toc(psp, &tmr_size); if (ret) { @@ -552,9 +618,7 @@ static int psp_tmr_load(struct psp_context *psp) if (amdgpu_sriov_vf(psp->adev) && psp_skip_tmr(psp)) return 0; - cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL); - if (!cmd) - return -ENOMEM; + cmd = acquire_psp_cmd_buf(psp); psp_prep_tmr_cmd_buf(psp, cmd, psp->tmr_mc_addr, psp->tmr_bo); DRM_INFO("reserve 0x%lx from 0x%llx for PSP TMR\n", @@ -563,13 +627,13 @@ static int psp_tmr_load(struct psp_context *psp) ret = psp_cmd_submit_buf(psp, NULL, cmd, psp->fence_buf_mc_addr); - kfree(cmd); + release_psp_cmd_buf(psp); return ret; } static void psp_prep_tmr_unload_cmd_buf(struct psp_context *psp, - struct psp_gfx_cmd_resp *cmd) + struct psp_gfx_cmd_resp *cmd) { if (amdgpu_sriov_vf(psp->adev)) cmd->cmd_id = GFX_CMD_ID_DESTROY_VMR; @@ -580,11 +644,7 @@ static void psp_prep_tmr_unload_cmd_buf(struct psp_context *psp, static int psp_tmr_unload(struct psp_context *psp) { int ret; - struct psp_gfx_cmd_resp *cmd; - - cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL); - if (!cmd) - return -ENOMEM; + struct psp_gfx_cmd_resp *cmd = acquire_psp_cmd_buf(psp); psp_prep_tmr_unload_cmd_buf(psp, cmd); DRM_INFO("free PSP TMR buffer\n"); @@ -592,7 +652,7 @@ static int psp_tmr_unload(struct psp_context *psp) ret = psp_cmd_submit_buf(psp, NULL, cmd, psp->fence_buf_mc_addr); - kfree(cmd); + release_psp_cmd_buf(psp); return ret; } @@ -626,9 +686,7 @@ int psp_get_fw_attestation_records_addr(struct psp_context *psp, if (amdgpu_sriov_vf(psp->adev)) return 0; - cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL); - if (!cmd) - return -ENOMEM; + cmd = acquire_psp_cmd_buf(psp); cmd->cmd_id = GFX_CMD_ID_GET_FW_ATTESTATION; @@ -640,7 +698,7 @@ int psp_get_fw_attestation_records_addr(struct psp_context *psp, ((uint64_t)cmd->resp.uresp.fwar_db_info.fwar_db_addr_hi << 32); } - kfree(cmd); + release_psp_cmd_buf(psp); return ret; } @@ -648,13 +706,13 @@ int psp_get_fw_attestation_records_addr(struct psp_context *psp, static int psp_boot_config_get(struct amdgpu_device *adev, uint32_t *boot_cfg) { struct psp_context *psp = &adev->psp; - struct psp_gfx_cmd_resp *cmd = psp->cmd; + struct psp_gfx_cmd_resp *cmd; int ret; if (amdgpu_sriov_vf(adev)) return 0; - memset(cmd, 0, sizeof(struct psp_gfx_cmd_resp)); + cmd = acquire_psp_cmd_buf(psp); cmd->cmd_id = GFX_CMD_ID_BOOT_CFG; cmd->cmd.boot_cfg.sub_cmd = BOOTCFG_CMD_GET; @@ -665,47 +723,59 @@ static int psp_boot_config_get(struct amdgpu_device *adev, uint32_t *boot_cfg) (cmd->resp.uresp.boot_cfg.boot_cfg & BOOT_CONFIG_GECC) ? 1 : 0; } + release_psp_cmd_buf(psp); + return ret; } static int psp_boot_config_set(struct amdgpu_device *adev, uint32_t boot_cfg) { + int ret; struct psp_context *psp = &adev->psp; - struct psp_gfx_cmd_resp *cmd = psp->cmd; + struct psp_gfx_cmd_resp *cmd; if (amdgpu_sriov_vf(adev)) return 0; - memset(cmd, 0, sizeof(struct psp_gfx_cmd_resp)); + cmd = acquire_psp_cmd_buf(psp); cmd->cmd_id = GFX_CMD_ID_BOOT_CFG; cmd->cmd.boot_cfg.sub_cmd = BOOTCFG_CMD_SET; cmd->cmd.boot_cfg.boot_config = boot_cfg; cmd->cmd.boot_cfg.boot_config_valid = boot_cfg; - return psp_cmd_submit_buf(psp, NULL, cmd, psp->fence_buf_mc_addr); + ret = psp_cmd_submit_buf(psp, NULL, cmd, psp->fence_buf_mc_addr); + + release_psp_cmd_buf(psp); + + return ret; } static int psp_rl_load(struct amdgpu_device *adev) { + int ret; struct psp_context *psp = &adev->psp; - struct psp_gfx_cmd_resp *cmd = psp->cmd; + struct psp_gfx_cmd_resp *cmd; - if (psp->rl_bin_size == 0) + if (!is_psp_fw_valid(psp->rl)) return 0; - memset(psp->fw_pri_buf, 0, PSP_1_MEG); - memcpy(psp->fw_pri_buf, psp->rl_start_addr, psp->rl_bin_size); + cmd = acquire_psp_cmd_buf(psp); - memset(cmd, 0, sizeof(struct psp_gfx_cmd_resp)); + memset(psp->fw_pri_buf, 0, PSP_1_MEG); + memcpy(psp->fw_pri_buf, psp->rl.start_addr, psp->rl.size_bytes); cmd->cmd_id = GFX_CMD_ID_LOAD_IP_FW; cmd->cmd.cmd_load_ip_fw.fw_phy_addr_lo = lower_32_bits(psp->fw_pri_mc_addr); cmd->cmd.cmd_load_ip_fw.fw_phy_addr_hi = upper_32_bits(psp->fw_pri_mc_addr); - cmd->cmd.cmd_load_ip_fw.fw_size = psp->rl_bin_size; + cmd->cmd.cmd_load_ip_fw.fw_size = psp->rl.size_bytes; cmd->cmd.cmd_load_ip_fw.fw_type = GFX_FW_TYPE_REG_LIST; - return psp_cmd_submit_buf(psp, NULL, cmd, psp->fence_buf_mc_addr); + ret = psp_cmd_submit_buf(psp, NULL, cmd, psp->fence_buf_mc_addr); + + release_psp_cmd_buf(psp); + + return ret; } static void psp_prep_asd_load_cmd_buf(struct psp_gfx_cmd_resp *cmd, @@ -730,17 +800,15 @@ static int psp_asd_load(struct psp_context *psp) * add workaround to bypass it for sriov now. * TODO: add version check to make it common */ - if (amdgpu_sriov_vf(psp->adev) || !psp->asd_ucode_size) + if (amdgpu_sriov_vf(psp->adev) || !psp->asd.size_bytes) return 0; - cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL); - if (!cmd) - return -ENOMEM; + cmd = acquire_psp_cmd_buf(psp); - psp_copy_fw(psp, psp->asd_start_addr, psp->asd_ucode_size); + psp_copy_fw(psp, psp->asd.start_addr, psp->asd.size_bytes); psp_prep_asd_load_cmd_buf(cmd, psp->fw_pri_mc_addr, - psp->asd_ucode_size); + psp->asd.size_bytes); ret = psp_cmd_submit_buf(psp, NULL, cmd, psp->fence_buf_mc_addr); @@ -749,7 +817,7 @@ static int psp_asd_load(struct psp_context *psp) psp->asd_context.session_id = cmd->resp.session_id; } - kfree(cmd); + release_psp_cmd_buf(psp); return ret; } @@ -772,9 +840,7 @@ static int psp_asd_unload(struct psp_context *psp) if (!psp->asd_context.asd_initialized) return 0; - cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL); - if (!cmd) - return -ENOMEM; + cmd = acquire_psp_cmd_buf(psp); psp_prep_ta_unload_cmd_buf(cmd, psp->asd_context.session_id); @@ -783,7 +849,7 @@ static int psp_asd_unload(struct psp_context *psp) if (!ret) psp->asd_context.asd_initialized = false; - kfree(cmd); + release_psp_cmd_buf(psp); return ret; } @@ -799,22 +865,21 @@ static void psp_prep_reg_prog_cmd_buf(struct psp_gfx_cmd_resp *cmd, int psp_reg_program(struct psp_context *psp, enum psp_reg_prog_id reg, uint32_t value) { - struct psp_gfx_cmd_resp *cmd = NULL; + struct psp_gfx_cmd_resp *cmd; int ret = 0; if (reg >= PSP_REG_LAST) return -EINVAL; - cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL); - if (!cmd) - return -ENOMEM; + cmd = acquire_psp_cmd_buf(psp); psp_prep_reg_prog_cmd_buf(cmd, reg, value); ret = psp_cmd_submit_buf(psp, NULL, cmd, psp->fence_buf_mc_addr); if (ret) DRM_ERROR("PSP failed to program reg id %d", reg); - kfree(cmd); + release_psp_cmd_buf(psp); + return ret; } @@ -834,23 +899,37 @@ static void psp_prep_ta_load_cmd_buf(struct psp_gfx_cmd_resp *cmd, cmd->cmd.cmd_load_ta.cmd_buf_len = ta_shared_size; } -static int psp_xgmi_init_shared_buf(struct psp_context *psp) +static int psp_ta_init_shared_buf(struct psp_context *psp, + struct ta_mem_context *mem_ctx, + uint32_t shared_mem_size) { int ret; /* - * Allocate 16k memory aligned to 4k from Frame Buffer (local - * physical) for xgmi ta <-> Driver - */ - ret = amdgpu_bo_create_kernel(psp->adev, PSP_XGMI_SHARED_MEM_SIZE, - PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM, - &psp->xgmi_context.xgmi_shared_bo, - &psp->xgmi_context.xgmi_shared_mc_addr, - &psp->xgmi_context.xgmi_shared_buf); + * Allocate 16k memory aligned to 4k from Frame Buffer (local + * physical) for ta to host memory + */ + ret = amdgpu_bo_create_kernel(psp->adev, shared_mem_size, PAGE_SIZE, + AMDGPU_GEM_DOMAIN_VRAM, + &mem_ctx->shared_bo, + &mem_ctx->shared_mc_addr, + &mem_ctx->shared_buf); return ret; } +static void psp_ta_free_shared_buf(struct ta_mem_context *mem_ctx) +{ + amdgpu_bo_free_kernel(&mem_ctx->shared_bo, &mem_ctx->shared_mc_addr, + &mem_ctx->shared_buf); +} + +static int psp_xgmi_init_shared_buf(struct psp_context *psp) +{ + return psp_ta_init_shared_buf(psp, &psp->xgmi_context.context.mem_context, + PSP_XGMI_SHARED_MEM_SIZE); +} + static void psp_prep_ta_invoke_cmd_buf(struct psp_gfx_cmd_resp *cmd, uint32_t ta_cmd_id, uint32_t session_id) @@ -865,18 +944,14 @@ static int psp_ta_invoke(struct psp_context *psp, uint32_t session_id) { int ret; - struct psp_gfx_cmd_resp *cmd; - - cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL); - if (!cmd) - return -ENOMEM; + struct psp_gfx_cmd_resp *cmd = acquire_psp_cmd_buf(psp); psp_prep_ta_invoke_cmd_buf(cmd, ta_cmd_id, session_id); ret = psp_cmd_submit_buf(psp, NULL, cmd, psp->fence_buf_mc_addr); - kfree(cmd); + release_psp_cmd_buf(psp); return ret; } @@ -890,27 +965,25 @@ static int psp_xgmi_load(struct psp_context *psp) * TODO: bypass the loading in sriov for now */ - cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL); - if (!cmd) - return -ENOMEM; + cmd = acquire_psp_cmd_buf(psp); - psp_copy_fw(psp, psp->ta_xgmi_start_addr, psp->ta_xgmi_ucode_size); + psp_copy_fw(psp, psp->xgmi.start_addr, psp->xgmi.size_bytes); psp_prep_ta_load_cmd_buf(cmd, psp->fw_pri_mc_addr, - psp->ta_xgmi_ucode_size, - psp->xgmi_context.xgmi_shared_mc_addr, + psp->xgmi.size_bytes, + psp->xgmi_context.context.mem_context.shared_mc_addr, PSP_XGMI_SHARED_MEM_SIZE); ret = psp_cmd_submit_buf(psp, NULL, cmd, psp->fence_buf_mc_addr); if (!ret) { - psp->xgmi_context.initialized = 1; - psp->xgmi_context.session_id = cmd->resp.session_id; + psp->xgmi_context.context.initialized = true; + psp->xgmi_context.context.session_id = cmd->resp.session_id; } - kfree(cmd); + release_psp_cmd_buf(psp); return ret; } @@ -930,57 +1003,56 @@ static int psp_xgmi_unload(struct psp_context *psp) * TODO: bypass the unloading in sriov for now */ - cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL); - if (!cmd) - return -ENOMEM; + cmd = acquire_psp_cmd_buf(psp); - psp_prep_ta_unload_cmd_buf(cmd, psp->xgmi_context.session_id); + psp_prep_ta_unload_cmd_buf(cmd, psp->xgmi_context.context.session_id); ret = psp_cmd_submit_buf(psp, NULL, cmd, psp->fence_buf_mc_addr); - kfree(cmd); + release_psp_cmd_buf(psp); return ret; } int psp_xgmi_invoke(struct psp_context *psp, uint32_t ta_cmd_id) { - return psp_ta_invoke(psp, ta_cmd_id, psp->xgmi_context.session_id); + return psp_ta_invoke(psp, ta_cmd_id, psp->xgmi_context.context.session_id); } int psp_xgmi_terminate(struct psp_context *psp) { int ret; - if (!psp->xgmi_context.initialized) + if (!psp->xgmi_context.context.initialized) return 0; ret = psp_xgmi_unload(psp); if (ret) return ret; - psp->xgmi_context.initialized = 0; + psp->xgmi_context.context.initialized = false; /* free xgmi shared memory */ - amdgpu_bo_free_kernel(&psp->xgmi_context.xgmi_shared_bo, - &psp->xgmi_context.xgmi_shared_mc_addr, - &psp->xgmi_context.xgmi_shared_buf); + psp_ta_free_shared_buf(&psp->xgmi_context.context.mem_context); return 0; } -int psp_xgmi_initialize(struct psp_context *psp) +int psp_xgmi_initialize(struct psp_context *psp, bool set_extended_data, bool load_ta) { struct ta_xgmi_shared_memory *xgmi_cmd; int ret; - if (!psp->adev->psp.ta_fw || - !psp->adev->psp.ta_xgmi_ucode_size || - !psp->adev->psp.ta_xgmi_start_addr) + if (!psp->ta_fw || + !psp->xgmi.size_bytes || + !psp->xgmi.start_addr) return -ENOENT; - if (!psp->xgmi_context.initialized) { + if (!load_ta) + goto invoke; + + if (!psp->xgmi_context.context.initialized) { ret = psp_xgmi_init_shared_buf(psp); if (ret) return ret; @@ -991,9 +1063,11 @@ int psp_xgmi_initialize(struct psp_context *psp) if (ret) return ret; +invoke: /* Initialize XGMI session */ - xgmi_cmd = (struct ta_xgmi_shared_memory *)(psp->xgmi_context.xgmi_shared_buf); + xgmi_cmd = (struct ta_xgmi_shared_memory *)(psp->xgmi_context.context.mem_context.shared_buf); memset(xgmi_cmd, 0, sizeof(struct ta_xgmi_shared_memory)); + xgmi_cmd->flag_extend_link_record = set_extended_data; xgmi_cmd->cmd_id = TA_COMMAND_XGMI__INITIALIZE; ret = psp_xgmi_invoke(psp, xgmi_cmd->cmd_id); @@ -1006,7 +1080,7 @@ int psp_xgmi_get_hive_id(struct psp_context *psp, uint64_t *hive_id) struct ta_xgmi_shared_memory *xgmi_cmd; int ret; - xgmi_cmd = (struct ta_xgmi_shared_memory *)psp->xgmi_context.xgmi_shared_buf; + xgmi_cmd = (struct ta_xgmi_shared_memory *)psp->xgmi_context.context.mem_context.shared_buf; memset(xgmi_cmd, 0, sizeof(struct ta_xgmi_shared_memory)); xgmi_cmd->cmd_id = TA_COMMAND_XGMI__GET_HIVE_ID; @@ -1026,7 +1100,7 @@ int psp_xgmi_get_node_id(struct psp_context *psp, uint64_t *node_id) struct ta_xgmi_shared_memory *xgmi_cmd; int ret; - xgmi_cmd = (struct ta_xgmi_shared_memory *)psp->xgmi_context.xgmi_shared_buf; + xgmi_cmd = (struct ta_xgmi_shared_memory *)psp->xgmi_context.context.mem_context.shared_buf; memset(xgmi_cmd, 0, sizeof(struct ta_xgmi_shared_memory)); xgmi_cmd->cmd_id = TA_COMMAND_XGMI__GET_NODE_ID; @@ -1041,9 +1115,62 @@ int psp_xgmi_get_node_id(struct psp_context *psp, uint64_t *node_id) return 0; } +static bool psp_xgmi_peer_link_info_supported(struct psp_context *psp) +{ + return psp->adev->asic_type == CHIP_ALDEBARAN && + psp->xgmi.feature_version >= 0x2000000b; +} + +/* + * Chips that support extended topology information require the driver to + * reflect topology information in the opposite direction. This is + * because the TA has already exceeded its link record limit and if the + * TA holds bi-directional information, the driver would have to do + * multiple fetches instead of just two. + */ +static void psp_xgmi_reflect_topology_info(struct psp_context *psp, + struct psp_xgmi_node_info node_info) +{ + struct amdgpu_device *mirror_adev; + struct amdgpu_hive_info *hive; + uint64_t src_node_id = psp->adev->gmc.xgmi.node_id; + uint64_t dst_node_id = node_info.node_id; + uint8_t dst_num_hops = node_info.num_hops; + uint8_t dst_num_links = node_info.num_links; + + hive = amdgpu_get_xgmi_hive(psp->adev); + list_for_each_entry(mirror_adev, &hive->device_list, gmc.xgmi.head) { + struct psp_xgmi_topology_info *mirror_top_info; + int j; + + if (mirror_adev->gmc.xgmi.node_id != dst_node_id) + continue; + + mirror_top_info = &mirror_adev->psp.xgmi_context.top_info; + for (j = 0; j < mirror_top_info->num_nodes; j++) { + if (mirror_top_info->nodes[j].node_id != src_node_id) + continue; + + mirror_top_info->nodes[j].num_hops = dst_num_hops; + /* + * prevent 0 num_links value re-reflection since reflection + * criteria is based on num_hops (direct or indirect). + * + */ + if (dst_num_links) + mirror_top_info->nodes[j].num_links = dst_num_links; + + break; + } + + break; + } +} + int psp_xgmi_get_topology_info(struct psp_context *psp, int number_devices, - struct psp_xgmi_topology_info *topology) + struct psp_xgmi_topology_info *topology, + bool get_extended_data) { struct ta_xgmi_shared_memory *xgmi_cmd; struct ta_xgmi_cmd_get_topology_info_input *topology_info_input; @@ -1054,8 +1181,9 @@ int psp_xgmi_get_topology_info(struct psp_context *psp, if (!topology || topology->num_nodes > TA_XGMI__MAX_CONNECTED_NODES) return -EINVAL; - xgmi_cmd = (struct ta_xgmi_shared_memory *)psp->xgmi_context.xgmi_shared_buf; + xgmi_cmd = (struct ta_xgmi_shared_memory *)psp->xgmi_context.context.mem_context.shared_buf; memset(xgmi_cmd, 0, sizeof(struct ta_xgmi_shared_memory)); + xgmi_cmd->flag_extend_link_record = get_extended_data; /* Fill in the shared memory with topology information as input */ topology_info_input = &xgmi_cmd->xgmi_in_message.get_topology_info; @@ -1078,10 +1206,45 @@ int psp_xgmi_get_topology_info(struct psp_context *psp, topology_info_output = &xgmi_cmd->xgmi_out_message.get_topology_info; topology->num_nodes = xgmi_cmd->xgmi_out_message.get_topology_info.num_nodes; for (i = 0; i < topology->num_nodes; i++) { - topology->nodes[i].node_id = topology_info_output->nodes[i].node_id; - topology->nodes[i].num_hops = topology_info_output->nodes[i].num_hops; - topology->nodes[i].is_sharing_enabled = topology_info_output->nodes[i].is_sharing_enabled; - topology->nodes[i].sdma_engine = topology_info_output->nodes[i].sdma_engine; + /* extended data will either be 0 or equal to non-extended data */ + if (topology_info_output->nodes[i].num_hops) + topology->nodes[i].num_hops = topology_info_output->nodes[i].num_hops; + + /* non-extended data gets everything here so no need to update */ + if (!get_extended_data) { + topology->nodes[i].node_id = topology_info_output->nodes[i].node_id; + topology->nodes[i].is_sharing_enabled = + topology_info_output->nodes[i].is_sharing_enabled; + topology->nodes[i].sdma_engine = + topology_info_output->nodes[i].sdma_engine; + } + + } + + /* Invoke xgmi ta again to get the link information */ + if (psp_xgmi_peer_link_info_supported(psp)) { + struct ta_xgmi_cmd_get_peer_link_info_output *link_info_output; + + xgmi_cmd->cmd_id = TA_COMMAND_XGMI__GET_PEER_LINKS; + + ret = psp_xgmi_invoke(psp, TA_COMMAND_XGMI__GET_PEER_LINKS); + + if (ret) + return ret; + + link_info_output = &xgmi_cmd->xgmi_out_message.get_link_info; + for (i = 0; i < topology->num_nodes; i++) { + /* accumulate num_links on extended data */ + topology->nodes[i].num_links = get_extended_data ? + topology->nodes[i].num_links + + link_info_output->nodes[i].num_links : + link_info_output->nodes[i].num_links; + + /* reflect the topology information for bi-directionality */ + if (psp->xgmi_context.supports_extended_data && + get_extended_data && topology->nodes[i].num_hops) + psp_xgmi_reflect_topology_info(psp, topology->nodes[i]); + } } return 0; @@ -1098,7 +1261,7 @@ int psp_xgmi_set_topology_info(struct psp_context *psp, if (!topology || topology->num_nodes > TA_XGMI__MAX_CONNECTED_NODES) return -EINVAL; - xgmi_cmd = (struct ta_xgmi_shared_memory *)psp->xgmi_context.xgmi_shared_buf; + xgmi_cmd = (struct ta_xgmi_shared_memory *)psp->xgmi_context.context.mem_context.shared_buf; memset(xgmi_cmd, 0, sizeof(struct ta_xgmi_shared_memory)); topology_info_input = &xgmi_cmd->xgmi_in_message.get_topology_info; @@ -1119,19 +1282,8 @@ int psp_xgmi_set_topology_info(struct psp_context *psp, // ras begin static int psp_ras_init_shared_buf(struct psp_context *psp) { - int ret; - - /* - * Allocate 16k memory aligned to 4k from Frame Buffer (local - * physical) for ras ta <-> Driver - */ - ret = amdgpu_bo_create_kernel(psp->adev, PSP_RAS_SHARED_MEM_SIZE, - PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM, - &psp->ras.ras_shared_bo, - &psp->ras.ras_shared_mc_addr, - &psp->ras.ras_shared_buf); - - return ret; + return psp_ta_init_shared_buf(psp, &psp->ras_context.context.mem_context, + PSP_RAS_SHARED_MEM_SIZE); } static int psp_ras_load(struct psp_context *psp) @@ -1146,42 +1298,40 @@ static int psp_ras_load(struct psp_context *psp) if (amdgpu_sriov_vf(psp->adev)) return 0; - cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL); - if (!cmd) - return -ENOMEM; - - psp_copy_fw(psp, psp->ta_ras_start_addr, psp->ta_ras_ucode_size); + psp_copy_fw(psp, psp->ras.start_addr, psp->ras.size_bytes); - ras_cmd = (struct ta_ras_shared_memory *)psp->ras.ras_shared_buf; + ras_cmd = (struct ta_ras_shared_memory *)psp->ras_context.context.mem_context.shared_buf; if (psp->adev->gmc.xgmi.connected_to_cpu) ras_cmd->ras_in_message.init_flags.poison_mode_en = 1; else ras_cmd->ras_in_message.init_flags.dgpu_mode = 1; + cmd = acquire_psp_cmd_buf(psp); + psp_prep_ta_load_cmd_buf(cmd, psp->fw_pri_mc_addr, - psp->ta_ras_ucode_size, - psp->ras.ras_shared_mc_addr, + psp->ras.size_bytes, + psp->ras_context.context.mem_context.shared_mc_addr, PSP_RAS_SHARED_MEM_SIZE); ret = psp_cmd_submit_buf(psp, NULL, cmd, psp->fence_buf_mc_addr); if (!ret) { - psp->ras.session_id = cmd->resp.session_id; + psp->ras_context.context.session_id = cmd->resp.session_id; if (!ras_cmd->ras_status) - psp->ras.ras_initialized = true; + psp->ras_context.context.initialized = true; else dev_warn(psp->adev->dev, "RAS Init Status: 0x%X\n", ras_cmd->ras_status); } + release_psp_cmd_buf(psp); + if (ret || ras_cmd->ras_status) amdgpu_ras_fini(psp->adev); - kfree(cmd); - return ret; } @@ -1196,16 +1346,14 @@ static int psp_ras_unload(struct psp_context *psp) if (amdgpu_sriov_vf(psp->adev)) return 0; - cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL); - if (!cmd) - return -ENOMEM; + cmd = acquire_psp_cmd_buf(psp); - psp_prep_ta_unload_cmd_buf(cmd, psp->ras.session_id); + psp_prep_ta_unload_cmd_buf(cmd, psp->ras_context.context.session_id); ret = psp_cmd_submit_buf(psp, NULL, cmd, psp->fence_buf_mc_addr); - kfree(cmd); + release_psp_cmd_buf(psp); return ret; } @@ -1215,7 +1363,7 @@ int psp_ras_invoke(struct psp_context *psp, uint32_t ta_cmd_id) struct ta_ras_shared_memory *ras_cmd; int ret; - ras_cmd = (struct ta_ras_shared_memory *)psp->ras.ras_shared_buf; + ras_cmd = (struct ta_ras_shared_memory *)psp->ras_context.context.mem_context.shared_buf; /* * TODO: bypass the loading in sriov for now @@ -1223,7 +1371,7 @@ int psp_ras_invoke(struct psp_context *psp, uint32_t ta_cmd_id) if (amdgpu_sriov_vf(psp->adev)) return 0; - ret = psp_ta_invoke(psp, ta_cmd_id, psp->ras.session_id); + ret = psp_ta_invoke(psp, ta_cmd_id, psp->ras_context.context.session_id); if (amdgpu_ras_intr_triggered()) return ret; @@ -1279,10 +1427,10 @@ int psp_ras_enable_features(struct psp_context *psp, struct ta_ras_shared_memory *ras_cmd; int ret; - if (!psp->ras.ras_initialized) + if (!psp->ras_context.context.initialized) return -EINVAL; - ras_cmd = (struct ta_ras_shared_memory *)psp->ras.ras_shared_buf; + ras_cmd = (struct ta_ras_shared_memory *)psp->ras_context.context.mem_context.shared_buf; memset(ras_cmd, 0, sizeof(struct ta_ras_shared_memory)); if (enable) @@ -1309,19 +1457,17 @@ static int psp_ras_terminate(struct psp_context *psp) if (amdgpu_sriov_vf(psp->adev)) return 0; - if (!psp->ras.ras_initialized) + if (!psp->ras_context.context.initialized) return 0; ret = psp_ras_unload(psp); if (ret) return ret; - psp->ras.ras_initialized = false; + psp->ras_context.context.initialized = false; /* free ras shared memory */ - amdgpu_bo_free_kernel(&psp->ras.ras_shared_bo, - &psp->ras.ras_shared_mc_addr, - &psp->ras.ras_shared_buf); + psp_ta_free_shared_buf(&psp->ras_context.context.mem_context); return 0; } @@ -1338,8 +1484,8 @@ static int psp_ras_initialize(struct psp_context *psp) if (amdgpu_sriov_vf(adev)) return 0; - if (!adev->psp.ta_ras_ucode_size || - !adev->psp.ta_ras_start_addr) { + if (!adev->psp.ras.size_bytes || + !adev->psp.ras.start_addr) { dev_info(adev->dev, "RAS: optional ras ta ucode is not available\n"); return 0; } @@ -1385,7 +1531,7 @@ static int psp_ras_initialize(struct psp_context *psp) } } - if (!psp->ras.ras_initialized) { + if (!psp->ras_context.context.initialized) { ret = psp_ras_init_shared_buf(psp); if (ret) return ret; @@ -1404,10 +1550,10 @@ int psp_ras_trigger_error(struct psp_context *psp, struct ta_ras_shared_memory *ras_cmd; int ret; - if (!psp->ras.ras_initialized) + if (!psp->ras_context.context.initialized) return -EINVAL; - ras_cmd = (struct ta_ras_shared_memory *)psp->ras.ras_shared_buf; + ras_cmd = (struct ta_ras_shared_memory *)psp->ras_context.context.mem_context.shared_buf; memset(ras_cmd, 0, sizeof(struct ta_ras_shared_memory)); ras_cmd->cmd_id = TA_RAS_COMMAND__TRIGGER_ERROR; @@ -1429,19 +1575,8 @@ int psp_ras_trigger_error(struct psp_context *psp, // HDCP start static int psp_hdcp_init_shared_buf(struct psp_context *psp) { - int ret; - - /* - * Allocate 16k memory aligned to 4k from Frame Buffer (local - * physical) for hdcp ta <-> Driver - */ - ret = amdgpu_bo_create_kernel(psp->adev, PSP_HDCP_SHARED_MEM_SIZE, - PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM, - &psp->hdcp_context.hdcp_shared_bo, - &psp->hdcp_context.hdcp_shared_mc_addr, - &psp->hdcp_context.hdcp_shared_buf); - - return ret; + return psp_ta_init_shared_buf(psp, &psp->hdcp_context.context.mem_context, + PSP_HDCP_SHARED_MEM_SIZE); } static int psp_hdcp_load(struct psp_context *psp) @@ -1455,28 +1590,26 @@ static int psp_hdcp_load(struct psp_context *psp) if (amdgpu_sriov_vf(psp->adev)) return 0; - cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL); - if (!cmd) - return -ENOMEM; + psp_copy_fw(psp, psp->hdcp.start_addr, + psp->hdcp.size_bytes); - psp_copy_fw(psp, psp->ta_hdcp_start_addr, - psp->ta_hdcp_ucode_size); + cmd = acquire_psp_cmd_buf(psp); psp_prep_ta_load_cmd_buf(cmd, psp->fw_pri_mc_addr, - psp->ta_hdcp_ucode_size, - psp->hdcp_context.hdcp_shared_mc_addr, + psp->hdcp.size_bytes, + psp->hdcp_context.context.mem_context.shared_mc_addr, PSP_HDCP_SHARED_MEM_SIZE); ret = psp_cmd_submit_buf(psp, NULL, cmd, psp->fence_buf_mc_addr); if (!ret) { - psp->hdcp_context.hdcp_initialized = true; - psp->hdcp_context.session_id = cmd->resp.session_id; + psp->hdcp_context.context.initialized = true; + psp->hdcp_context.context.session_id = cmd->resp.session_id; mutex_init(&psp->hdcp_context.mutex); } - kfree(cmd); + release_psp_cmd_buf(psp); return ret; } @@ -1490,13 +1623,13 @@ static int psp_hdcp_initialize(struct psp_context *psp) if (amdgpu_sriov_vf(psp->adev)) return 0; - if (!psp->adev->psp.ta_hdcp_ucode_size || - !psp->adev->psp.ta_hdcp_start_addr) { + if (!psp->hdcp.size_bytes || + !psp->hdcp.start_addr) { dev_info(psp->adev->dev, "HDCP: optional hdcp ta ucode is not available\n"); return 0; } - if (!psp->hdcp_context.hdcp_initialized) { + if (!psp->hdcp_context.context.initialized) { ret = psp_hdcp_init_shared_buf(psp); if (ret) return ret; @@ -1520,15 +1653,13 @@ static int psp_hdcp_unload(struct psp_context *psp) if (amdgpu_sriov_vf(psp->adev)) return 0; - cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL); - if (!cmd) - return -ENOMEM; + cmd = acquire_psp_cmd_buf(psp); - psp_prep_ta_unload_cmd_buf(cmd, psp->hdcp_context.session_id); + psp_prep_ta_unload_cmd_buf(cmd, psp->hdcp_context.context.session_id); ret = psp_cmd_submit_buf(psp, NULL, cmd, psp->fence_buf_mc_addr); - kfree(cmd); + release_psp_cmd_buf(psp); return ret; } @@ -1541,7 +1672,7 @@ int psp_hdcp_invoke(struct psp_context *psp, uint32_t ta_cmd_id) if (amdgpu_sriov_vf(psp->adev)) return 0; - return psp_ta_invoke(psp, ta_cmd_id, psp->hdcp_context.session_id); + return psp_ta_invoke(psp, ta_cmd_id, psp->hdcp_context.context.session_id); } static int psp_hdcp_terminate(struct psp_context *psp) @@ -1554,8 +1685,8 @@ static int psp_hdcp_terminate(struct psp_context *psp) if (amdgpu_sriov_vf(psp->adev)) return 0; - if (!psp->hdcp_context.hdcp_initialized) { - if (psp->hdcp_context.hdcp_shared_buf) + if (!psp->hdcp_context.context.initialized) { + if (psp->hdcp_context.context.mem_context.shared_buf) goto out; else return 0; @@ -1565,13 +1696,11 @@ static int psp_hdcp_terminate(struct psp_context *psp) if (ret) return ret; - psp->hdcp_context.hdcp_initialized = false; + psp->hdcp_context.context.initialized = false; out: /* free hdcp shared memory */ - amdgpu_bo_free_kernel(&psp->hdcp_context.hdcp_shared_bo, - &psp->hdcp_context.hdcp_shared_mc_addr, - &psp->hdcp_context.hdcp_shared_buf); + psp_ta_free_shared_buf(&psp->hdcp_context.context.mem_context); return 0; } @@ -1580,19 +1709,8 @@ out: // DTM start static int psp_dtm_init_shared_buf(struct psp_context *psp) { - int ret; - - /* - * Allocate 16k memory aligned to 4k from Frame Buffer (local - * physical) for dtm ta <-> Driver - */ - ret = amdgpu_bo_create_kernel(psp->adev, PSP_DTM_SHARED_MEM_SIZE, - PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM, - &psp->dtm_context.dtm_shared_bo, - &psp->dtm_context.dtm_shared_mc_addr, - &psp->dtm_context.dtm_shared_buf); - - return ret; + return psp_ta_init_shared_buf(psp, &psp->dtm_context.context.mem_context, + PSP_DTM_SHARED_MEM_SIZE); } static int psp_dtm_load(struct psp_context *psp) @@ -1606,27 +1724,25 @@ static int psp_dtm_load(struct psp_context *psp) if (amdgpu_sriov_vf(psp->adev)) return 0; - cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL); - if (!cmd) - return -ENOMEM; + psp_copy_fw(psp, psp->dtm.start_addr, psp->dtm.size_bytes); - psp_copy_fw(psp, psp->ta_dtm_start_addr, psp->ta_dtm_ucode_size); + cmd = acquire_psp_cmd_buf(psp); psp_prep_ta_load_cmd_buf(cmd, psp->fw_pri_mc_addr, - psp->ta_dtm_ucode_size, - psp->dtm_context.dtm_shared_mc_addr, + psp->dtm.size_bytes, + psp->dtm_context.context.mem_context.shared_mc_addr, PSP_DTM_SHARED_MEM_SIZE); ret = psp_cmd_submit_buf(psp, NULL, cmd, psp->fence_buf_mc_addr); if (!ret) { - psp->dtm_context.dtm_initialized = true; - psp->dtm_context.session_id = cmd->resp.session_id; + psp->dtm_context.context.initialized = true; + psp->dtm_context.context.session_id = cmd->resp.session_id; mutex_init(&psp->dtm_context.mutex); } - kfree(cmd); + release_psp_cmd_buf(psp); return ret; } @@ -1641,13 +1757,13 @@ static int psp_dtm_initialize(struct psp_context *psp) if (amdgpu_sriov_vf(psp->adev)) return 0; - if (!psp->adev->psp.ta_dtm_ucode_size || - !psp->adev->psp.ta_dtm_start_addr) { + if (!psp->dtm.size_bytes || + !psp->dtm.start_addr) { dev_info(psp->adev->dev, "DTM: optional dtm ta ucode is not available\n"); return 0; } - if (!psp->dtm_context.dtm_initialized) { + if (!psp->dtm_context.context.initialized) { ret = psp_dtm_init_shared_buf(psp); if (ret) return ret; @@ -1671,15 +1787,13 @@ static int psp_dtm_unload(struct psp_context *psp) if (amdgpu_sriov_vf(psp->adev)) return 0; - cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL); - if (!cmd) - return -ENOMEM; + cmd = acquire_psp_cmd_buf(psp); - psp_prep_ta_unload_cmd_buf(cmd, psp->dtm_context.session_id); + psp_prep_ta_unload_cmd_buf(cmd, psp->dtm_context.context.session_id); ret = psp_cmd_submit_buf(psp, NULL, cmd, psp->fence_buf_mc_addr); - kfree(cmd); + release_psp_cmd_buf(psp); return ret; } @@ -1692,7 +1806,7 @@ int psp_dtm_invoke(struct psp_context *psp, uint32_t ta_cmd_id) if (amdgpu_sriov_vf(psp->adev)) return 0; - return psp_ta_invoke(psp, ta_cmd_id, psp->dtm_context.session_id); + return psp_ta_invoke(psp, ta_cmd_id, psp->dtm_context.context.session_id); } static int psp_dtm_terminate(struct psp_context *psp) @@ -1705,8 +1819,8 @@ static int psp_dtm_terminate(struct psp_context *psp) if (amdgpu_sriov_vf(psp->adev)) return 0; - if (!psp->dtm_context.dtm_initialized) { - if (psp->dtm_context.dtm_shared_buf) + if (!psp->dtm_context.context.initialized) { + if (psp->dtm_context.context.mem_context.shared_buf) goto out; else return 0; @@ -1716,13 +1830,11 @@ static int psp_dtm_terminate(struct psp_context *psp) if (ret) return ret; - psp->dtm_context.dtm_initialized = false; + psp->dtm_context.context.initialized = false; out: - /* free hdcp shared memory */ - amdgpu_bo_free_kernel(&psp->dtm_context.dtm_shared_bo, - &psp->dtm_context.dtm_shared_mc_addr, - &psp->dtm_context.dtm_shared_buf); + /* free dtm shared memory */ + psp_ta_free_shared_buf(&psp->dtm_context.context.mem_context); return 0; } @@ -1731,19 +1843,8 @@ out: // RAP start static int psp_rap_init_shared_buf(struct psp_context *psp) { - int ret; - - /* - * Allocate 16k memory aligned to 4k from Frame Buffer (local - * physical) for rap ta <-> Driver - */ - ret = amdgpu_bo_create_kernel(psp->adev, PSP_RAP_SHARED_MEM_SIZE, - PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM, - &psp->rap_context.rap_shared_bo, - &psp->rap_context.rap_shared_mc_addr, - &psp->rap_context.rap_shared_buf); - - return ret; + return psp_ta_init_shared_buf(psp, &psp->rap_context.context.mem_context, + PSP_RAP_SHARED_MEM_SIZE); } static int psp_rap_load(struct psp_context *psp) @@ -1751,27 +1852,25 @@ static int psp_rap_load(struct psp_context *psp) int ret; struct psp_gfx_cmd_resp *cmd; - cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL); - if (!cmd) - return -ENOMEM; + psp_copy_fw(psp, psp->rap.start_addr, psp->rap.size_bytes); - psp_copy_fw(psp, psp->ta_rap_start_addr, psp->ta_rap_ucode_size); + cmd = acquire_psp_cmd_buf(psp); psp_prep_ta_load_cmd_buf(cmd, psp->fw_pri_mc_addr, - psp->ta_rap_ucode_size, - psp->rap_context.rap_shared_mc_addr, + psp->rap.size_bytes, + psp->rap_context.context.mem_context.shared_mc_addr, PSP_RAP_SHARED_MEM_SIZE); ret = psp_cmd_submit_buf(psp, NULL, cmd, psp->fence_buf_mc_addr); if (!ret) { - psp->rap_context.rap_initialized = true; - psp->rap_context.session_id = cmd->resp.session_id; + psp->rap_context.context.initialized = true; + psp->rap_context.context.session_id = cmd->resp.session_id; mutex_init(&psp->rap_context.mutex); } - kfree(cmd); + release_psp_cmd_buf(psp); return ret; } @@ -1779,17 +1878,13 @@ static int psp_rap_load(struct psp_context *psp) static int psp_rap_unload(struct psp_context *psp) { int ret; - struct psp_gfx_cmd_resp *cmd; + struct psp_gfx_cmd_resp *cmd = acquire_psp_cmd_buf(psp); - cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL); - if (!cmd) - return -ENOMEM; - - psp_prep_ta_unload_cmd_buf(cmd, psp->rap_context.session_id); + psp_prep_ta_unload_cmd_buf(cmd, psp->rap_context.context.session_id); ret = psp_cmd_submit_buf(psp, NULL, cmd, psp->fence_buf_mc_addr); - kfree(cmd); + release_psp_cmd_buf(psp); return ret; } @@ -1805,13 +1900,13 @@ static int psp_rap_initialize(struct psp_context *psp) if (amdgpu_sriov_vf(psp->adev)) return 0; - if (!psp->adev->psp.ta_rap_ucode_size || - !psp->adev->psp.ta_rap_start_addr) { + if (!psp->rap.size_bytes || + !psp->rap.start_addr) { dev_info(psp->adev->dev, "RAP: optional rap ta ucode is not available\n"); return 0; } - if (!psp->rap_context.rap_initialized) { + if (!psp->rap_context.context.initialized) { ret = psp_rap_init_shared_buf(psp); if (ret) return ret; @@ -1825,11 +1920,9 @@ static int psp_rap_initialize(struct psp_context *psp) if (ret || status != TA_RAP_STATUS__SUCCESS) { psp_rap_unload(psp); - amdgpu_bo_free_kernel(&psp->rap_context.rap_shared_bo, - &psp->rap_context.rap_shared_mc_addr, - &psp->rap_context.rap_shared_buf); + psp_ta_free_shared_buf(&psp->rap_context.context.mem_context); - psp->rap_context.rap_initialized = false; + psp->rap_context.context.initialized = false; dev_warn(psp->adev->dev, "RAP TA initialize fail (%d) status %d.\n", ret, status); @@ -1844,17 +1937,15 @@ static int psp_rap_terminate(struct psp_context *psp) { int ret; - if (!psp->rap_context.rap_initialized) + if (!psp->rap_context.context.initialized) return 0; ret = psp_rap_unload(psp); - psp->rap_context.rap_initialized = false; + psp->rap_context.context.initialized = false; /* free rap shared memory */ - amdgpu_bo_free_kernel(&psp->rap_context.rap_shared_bo, - &psp->rap_context.rap_shared_mc_addr, - &psp->rap_context.rap_shared_buf); + psp_ta_free_shared_buf(&psp->rap_context.context.mem_context); return ret; } @@ -1864,7 +1955,7 @@ int psp_rap_invoke(struct psp_context *psp, uint32_t ta_cmd_id, enum ta_rap_stat struct ta_rap_shared_memory *rap_cmd; int ret = 0; - if (!psp->rap_context.rap_initialized) + if (!psp->rap_context.context.initialized) return 0; if (ta_cmd_id != TA_CMD_RAP__INITIALIZE && @@ -1874,13 +1965,13 @@ int psp_rap_invoke(struct psp_context *psp, uint32_t ta_cmd_id, enum ta_rap_stat mutex_lock(&psp->rap_context.mutex); rap_cmd = (struct ta_rap_shared_memory *) - psp->rap_context.rap_shared_buf; + psp->rap_context.context.mem_context.shared_buf; memset(rap_cmd, 0, sizeof(struct ta_rap_shared_memory)); rap_cmd->cmd_id = ta_cmd_id; rap_cmd->validation_method_id = METHOD_A; - ret = psp_ta_invoke(psp, rap_cmd->cmd_id, psp->rap_context.session_id); + ret = psp_ta_invoke(psp, rap_cmd->cmd_id, psp->rap_context.context.session_id); if (ret) goto out_unlock; @@ -1897,67 +1988,48 @@ out_unlock: /* securedisplay start */ static int psp_securedisplay_init_shared_buf(struct psp_context *psp) { - int ret; - - /* - * Allocate 16k memory aligned to 4k from Frame Buffer (local - * physical) for sa ta <-> Driver - */ - ret = amdgpu_bo_create_kernel(psp->adev, PSP_SECUREDISPLAY_SHARED_MEM_SIZE, - PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM, - &psp->securedisplay_context.securedisplay_shared_bo, - &psp->securedisplay_context.securedisplay_shared_mc_addr, - &psp->securedisplay_context.securedisplay_shared_buf); - - return ret; + return psp_ta_init_shared_buf( + psp, &psp->securedisplay_context.context.mem_context, + PSP_SECUREDISPLAY_SHARED_MEM_SIZE); } static int psp_securedisplay_load(struct psp_context *psp) { int ret; - struct psp_gfx_cmd_resp *cmd; - - cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL); - if (!cmd) - return -ENOMEM; + struct psp_gfx_cmd_resp *cmd = acquire_psp_cmd_buf(psp); memset(psp->fw_pri_buf, 0, PSP_1_MEG); - memcpy(psp->fw_pri_buf, psp->ta_securedisplay_start_addr, psp->ta_securedisplay_ucode_size); + memcpy(psp->fw_pri_buf, psp->securedisplay.start_addr, psp->securedisplay.size_bytes); psp_prep_ta_load_cmd_buf(cmd, psp->fw_pri_mc_addr, - psp->ta_securedisplay_ucode_size, - psp->securedisplay_context.securedisplay_shared_mc_addr, + psp->securedisplay.size_bytes, + psp->securedisplay_context.context.mem_context.shared_mc_addr, PSP_SECUREDISPLAY_SHARED_MEM_SIZE); ret = psp_cmd_submit_buf(psp, NULL, cmd, psp->fence_buf_mc_addr); - if (ret) - goto failed; + if (!ret) { + psp->securedisplay_context.context.initialized = true; + psp->securedisplay_context.context.session_id = cmd->resp.session_id; + mutex_init(&psp->securedisplay_context.mutex); + } - psp->securedisplay_context.securedisplay_initialized = true; - psp->securedisplay_context.session_id = cmd->resp.session_id; - mutex_init(&psp->securedisplay_context.mutex); + release_psp_cmd_buf(psp); -failed: - kfree(cmd); return ret; } static int psp_securedisplay_unload(struct psp_context *psp) { int ret; - struct psp_gfx_cmd_resp *cmd; - - cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL); - if (!cmd) - return -ENOMEM; + struct psp_gfx_cmd_resp *cmd = acquire_psp_cmd_buf(psp); - psp_prep_ta_unload_cmd_buf(cmd, psp->securedisplay_context.session_id); + psp_prep_ta_unload_cmd_buf(cmd, psp->securedisplay_context.context.session_id); ret = psp_cmd_submit_buf(psp, NULL, cmd, psp->fence_buf_mc_addr); - kfree(cmd); + release_psp_cmd_buf(psp); return ret; } @@ -1973,13 +2045,13 @@ static int psp_securedisplay_initialize(struct psp_context *psp) if (amdgpu_sriov_vf(psp->adev)) return 0; - if (!psp->adev->psp.ta_securedisplay_ucode_size || - !psp->adev->psp.ta_securedisplay_start_addr) { + if (!psp->securedisplay.size_bytes || + !psp->securedisplay.start_addr) { dev_info(psp->adev->dev, "SECUREDISPLAY: securedisplay ta ucode is not available\n"); return 0; } - if (!psp->securedisplay_context.securedisplay_initialized) { + if (!psp->securedisplay_context.context.initialized) { ret = psp_securedisplay_init_shared_buf(psp); if (ret) return ret; @@ -1996,11 +2068,9 @@ static int psp_securedisplay_initialize(struct psp_context *psp) if (ret) { psp_securedisplay_unload(psp); - amdgpu_bo_free_kernel(&psp->securedisplay_context.securedisplay_shared_bo, - &psp->securedisplay_context.securedisplay_shared_mc_addr, - &psp->securedisplay_context.securedisplay_shared_buf); + psp_ta_free_shared_buf(&psp->securedisplay_context.context.mem_context); - psp->securedisplay_context.securedisplay_initialized = false; + psp->securedisplay_context.context.initialized = false; dev_err(psp->adev->dev, "SECUREDISPLAY TA initialize fail.\n"); return -EINVAL; @@ -2025,19 +2095,17 @@ static int psp_securedisplay_terminate(struct psp_context *psp) if (amdgpu_sriov_vf(psp->adev)) return 0; - if (!psp->securedisplay_context.securedisplay_initialized) + if (!psp->securedisplay_context.context.initialized) return 0; ret = psp_securedisplay_unload(psp); if (ret) return ret; - psp->securedisplay_context.securedisplay_initialized = false; + psp->securedisplay_context.context.initialized = false; /* free securedisplay shared memory */ - amdgpu_bo_free_kernel(&psp->securedisplay_context.securedisplay_shared_bo, - &psp->securedisplay_context.securedisplay_shared_mc_addr, - &psp->securedisplay_context.securedisplay_shared_buf); + psp_ta_free_shared_buf(&psp->securedisplay_context.context.mem_context); return ret; } @@ -2046,7 +2114,7 @@ int psp_securedisplay_invoke(struct psp_context *psp, uint32_t ta_cmd_id) { int ret; - if (!psp->securedisplay_context.securedisplay_initialized) + if (!psp->securedisplay_context.context.initialized) return -EINVAL; if (ta_cmd_id != TA_SECUREDISPLAY_COMMAND__QUERY_TA && @@ -2055,7 +2123,7 @@ int psp_securedisplay_invoke(struct psp_context *psp, uint32_t ta_cmd_id) mutex_lock(&psp->securedisplay_context.mutex); - ret = psp_ta_invoke(psp, ta_cmd_id, psp->securedisplay_context.session_id); + ret = psp_ta_invoke(psp, ta_cmd_id, psp->securedisplay_context.context.session_id); mutex_unlock(&psp->securedisplay_context.mutex); @@ -2069,7 +2137,7 @@ static int psp_hw_start(struct psp_context *psp) int ret; if (!amdgpu_sriov_vf(adev)) { - if (psp->kdb_bin_size && + if ((is_psp_fw_valid(psp->kdb)) && (psp->funcs->bootloader_load_kdb != NULL)) { ret = psp_bootloader_load_kdb(psp); if (ret) { @@ -2078,7 +2146,8 @@ static int psp_hw_start(struct psp_context *psp) } } - if (psp->spl_bin_size) { + if ((is_psp_fw_valid(psp->spl)) && + (psp->funcs->bootloader_load_spl != NULL)) { ret = psp_bootloader_load_spl(psp); if (ret) { DRM_ERROR("PSP load spl failed!\n"); @@ -2086,16 +2155,49 @@ static int psp_hw_start(struct psp_context *psp) } } - ret = psp_bootloader_load_sysdrv(psp); - if (ret) { - DRM_ERROR("PSP load sysdrv failed!\n"); - return ret; + if ((is_psp_fw_valid(psp->sys)) && + (psp->funcs->bootloader_load_sysdrv != NULL)) { + ret = psp_bootloader_load_sysdrv(psp); + if (ret) { + DRM_ERROR("PSP load sys drv failed!\n"); + return ret; + } } - ret = psp_bootloader_load_sos(psp); - if (ret) { - DRM_ERROR("PSP load sos failed!\n"); - return ret; + if ((is_psp_fw_valid(psp->soc_drv)) && + (psp->funcs->bootloader_load_soc_drv != NULL)) { + ret = psp_bootloader_load_soc_drv(psp); + if (ret) { + DRM_ERROR("PSP load soc drv failed!\n"); + return ret; + } + } + + if ((is_psp_fw_valid(psp->intf_drv)) && + (psp->funcs->bootloader_load_intf_drv != NULL)) { + ret = psp_bootloader_load_intf_drv(psp); + if (ret) { + DRM_ERROR("PSP load intf drv failed!\n"); + return ret; + } + } + + if ((is_psp_fw_valid(psp->dbg_drv)) && + (psp->funcs->bootloader_load_dbg_drv != NULL)) { + ret = psp_bootloader_load_dbg_drv(psp); + if (ret) { + DRM_ERROR("PSP load dbg drv failed!\n"); + return ret; + } + } + + if ((is_psp_fw_valid(psp->sos)) && + (psp->funcs->bootloader_load_sos != NULL)) { + ret = psp_bootloader_load_sos(psp); + if (ret) { + DRM_ERROR("PSP load sos failed!\n"); + return ret; + } } } @@ -2299,8 +2401,6 @@ static int psp_prep_load_ip_fw_cmd_buf(struct amdgpu_firmware_info *ucode, int ret; uint64_t fw_mem_mc_addr = ucode->mc_addr; - memset(cmd, 0, sizeof(struct psp_gfx_cmd_resp)); - cmd->cmd_id = GFX_CMD_ID_LOAD_IP_FW; cmd->cmd.cmd_load_ip_fw.fw_phy_addr_lo = lower_32_bits(fw_mem_mc_addr); cmd->cmd.cmd_load_ip_fw.fw_phy_addr_hi = upper_32_bits(fw_mem_mc_addr); @@ -2313,17 +2413,19 @@ static int psp_prep_load_ip_fw_cmd_buf(struct amdgpu_firmware_info *ucode, return ret; } -static int psp_execute_np_fw_load(struct psp_context *psp, +static int psp_execute_non_psp_fw_load(struct psp_context *psp, struct amdgpu_firmware_info *ucode) { int ret = 0; + struct psp_gfx_cmd_resp *cmd = acquire_psp_cmd_buf(psp); - ret = psp_prep_load_ip_fw_cmd_buf(ucode, psp->cmd); - if (ret) - return ret; + ret = psp_prep_load_ip_fw_cmd_buf(ucode, cmd); + if (!ret) { + ret = psp_cmd_submit_buf(psp, ucode, cmd, + psp->fence_buf_mc_addr); + } - ret = psp_cmd_submit_buf(psp, ucode, psp->cmd, - psp->fence_buf_mc_addr); + release_psp_cmd_buf(psp); return ret; } @@ -2334,7 +2436,7 @@ static int psp_load_smu_fw(struct psp_context *psp) struct amdgpu_device *adev = psp->adev; struct amdgpu_firmware_info *ucode = &adev->firmware.ucode[AMDGPU_UCODE_ID_SMC]; - struct amdgpu_ras *ras = psp->ras.ras; + struct amdgpu_ras *ras = psp->ras_context.ras; if (!ucode->fw || amdgpu_sriov_vf(psp->adev)) return 0; @@ -2349,7 +2451,7 @@ static int psp_load_smu_fw(struct psp_context *psp) } } - ret = psp_execute_np_fw_load(psp, ucode); + ret = psp_execute_non_psp_fw_load(psp, ucode); if (ret) DRM_ERROR("PSP load smu failed!\n"); @@ -2404,14 +2506,14 @@ int psp_load_fw_list(struct psp_context *psp, for (i = 0; i < ucode_count; ++i) { ucode = ucode_list[i]; psp_print_fw_hdr(psp, ucode); - ret = psp_execute_np_fw_load(psp, ucode); + ret = psp_execute_non_psp_fw_load(psp, ucode); if (ret) return ret; } return ret; } -static int psp_np_fw_load(struct psp_context *psp) +static int psp_load_non_psp_fw(struct psp_context *psp) { int i, ret; struct amdgpu_firmware_info *ucode; @@ -2450,7 +2552,7 @@ static int psp_np_fw_load(struct psp_context *psp) psp_print_fw_hdr(psp, ucode); - ret = psp_execute_np_fw_load(psp, ucode); + ret = psp_execute_non_psp_fw_load(psp, ucode); if (ret) return ret; @@ -2478,10 +2580,6 @@ static int psp_load_fw(struct amdgpu_device *adev) goto skip_memalloc; } - psp->cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL); - if (!psp->cmd) - return -ENOMEM; - if (amdgpu_sriov_vf(adev)) { ret = amdgpu_bo_create_kernel(adev, PSP_1_MEG, PSP_1_MEG, AMDGPU_GEM_DOMAIN_VRAM, @@ -2527,7 +2625,7 @@ skip_memalloc: if (ret) goto failed; - ret = psp_np_fw_load(psp); + ret = psp_load_non_psp_fw(psp); if (ret) goto failed; @@ -2543,7 +2641,7 @@ skip_memalloc: return ret; } - if (psp->adev->psp.ta_fw) { + if (psp->ta_fw) { ret = psp_ras_initialize(psp); if (ret) dev_err(psp->adev->dev, @@ -2615,7 +2713,7 @@ static int psp_hw_fini(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; struct psp_context *psp = &adev->psp; - if (psp->adev->psp.ta_fw) { + if (psp->ta_fw) { psp_ras_terminate(psp); psp_securedisplay_terminate(psp); psp_rap_terminate(psp); @@ -2635,9 +2733,6 @@ static int psp_hw_fini(void *handle) amdgpu_bo_free_kernel(&psp->cmd_buf_bo, &psp->cmd_buf_mc_addr, (void **)&psp->cmd_buf_mem); - kfree(psp->cmd); - psp->cmd = NULL; - return 0; } @@ -2648,7 +2743,7 @@ static int psp_suspend(void *handle) struct psp_context *psp = &adev->psp; if (adev->gmc.xgmi.num_physical_nodes > 1 && - psp->xgmi_context.initialized == 1) { + psp->xgmi_context.context.initialized) { ret = psp_xgmi_terminate(psp); if (ret) { DRM_ERROR("Failed to terminate xgmi ta\n"); @@ -2656,7 +2751,7 @@ static int psp_suspend(void *handle) } } - if (psp->adev->psp.ta_fw) { + if (psp->ta_fw) { ret = psp_ras_terminate(psp); if (ret) { DRM_ERROR("Failed to terminate ras ta\n"); @@ -2727,7 +2822,7 @@ static int psp_resume(void *handle) if (ret) goto failed; - ret = psp_np_fw_load(psp); + ret = psp_load_non_psp_fw(psp); if (ret) goto failed; @@ -2738,7 +2833,7 @@ static int psp_resume(void *handle) } if (adev->gmc.xgmi.num_physical_nodes > 1) { - ret = psp_xgmi_initialize(psp); + ret = psp_xgmi_initialize(psp, false, true); /* Warning the XGMI seesion initialize failure * Instead of stop driver initialization */ @@ -2747,7 +2842,7 @@ static int psp_resume(void *handle) "XGMI: Failed to initialize XGMI session\n"); } - if (psp->adev->psp.ta_fw) { + if (psp->ta_fw) { ret = psp_ras_initialize(psp); if (ret) dev_err(psp->adev->dev, @@ -2801,17 +2896,15 @@ int psp_gpu_reset(struct amdgpu_device *adev) int psp_rlc_autoload_start(struct psp_context *psp) { int ret; - struct psp_gfx_cmd_resp *cmd; - - cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL); - if (!cmd) - return -ENOMEM; + struct psp_gfx_cmd_resp *cmd = acquire_psp_cmd_buf(psp); cmd->cmd_id = GFX_CMD_ID_AUTOLOAD_RLC; ret = psp_cmd_submit_buf(psp, NULL, cmd, psp->fence_buf_mc_addr); - kfree(cmd); + + release_psp_cmd_buf(psp); + return ret; } @@ -2825,7 +2918,7 @@ int psp_update_vcn_sram(struct amdgpu_device *adev, int inst_idx, ucode.mc_addr = cmd_gpu_addr; ucode.ucode_size = cmd_size; - return psp_execute_np_fw_load(&adev->psp, &ucode); + return psp_execute_non_psp_fw_load(&adev->psp, &ucode); } int psp_ring_cmd_submit(struct psp_context *psp, @@ -2901,10 +2994,10 @@ int psp_init_asd_microcode(struct psp_context *psp, goto out; asd_hdr = (const struct psp_firmware_header_v1_0 *)adev->psp.asd_fw->data; - adev->psp.asd_fw_version = le32_to_cpu(asd_hdr->header.ucode_version); - adev->psp.asd_feature_version = le32_to_cpu(asd_hdr->sos.fw_version); - adev->psp.asd_ucode_size = le32_to_cpu(asd_hdr->header.ucode_size_bytes); - adev->psp.asd_start_addr = (uint8_t *)asd_hdr + + adev->psp.asd.fw_version = le32_to_cpu(asd_hdr->header.ucode_version); + adev->psp.asd.feature_version = le32_to_cpu(asd_hdr->sos.fw_version); + adev->psp.asd.size_bytes = le32_to_cpu(asd_hdr->header.ucode_size_bytes); + adev->psp.asd.start_addr = (uint8_t *)asd_hdr + le32_to_cpu(asd_hdr->header.ucode_array_offset_bytes); return 0; out: @@ -2918,7 +3011,7 @@ int psp_init_toc_microcode(struct psp_context *psp, const char *chip_name) { struct amdgpu_device *adev = psp->adev; - char fw_name[30]; + char fw_name[PSP_FW_NAME_LEN]; const struct psp_firmware_header_v1_0 *toc_hdr; int err = 0; @@ -2937,10 +3030,10 @@ int psp_init_toc_microcode(struct psp_context *psp, goto out; toc_hdr = (const struct psp_firmware_header_v1_0 *)adev->psp.toc_fw->data; - adev->psp.toc_fw_version = le32_to_cpu(toc_hdr->header.ucode_version); - adev->psp.toc_feature_version = le32_to_cpu(toc_hdr->sos.fw_version); - adev->psp.toc_bin_size = le32_to_cpu(toc_hdr->header.ucode_size_bytes); - adev->psp.toc_start_addr = (uint8_t *)toc_hdr + + adev->psp.toc.fw_version = le32_to_cpu(toc_hdr->header.ucode_version); + adev->psp.toc.feature_version = le32_to_cpu(toc_hdr->sos.fw_version); + adev->psp.toc.size_bytes = le32_to_cpu(toc_hdr->header.ucode_size_bytes); + adev->psp.toc.start_addr = (uint8_t *)toc_hdr + le32_to_cpu(toc_hdr->header.ucode_array_offset_bytes); return 0; out: @@ -2950,6 +3043,82 @@ out: return err; } +static int parse_sos_bin_descriptor(struct psp_context *psp, + const struct psp_fw_bin_desc *desc, + const struct psp_firmware_header_v2_0 *sos_hdr) +{ + uint8_t *ucode_start_addr = NULL; + + if (!psp || !desc || !sos_hdr) + return -EINVAL; + + ucode_start_addr = (uint8_t *)sos_hdr + + le32_to_cpu(desc->offset_bytes) + + le32_to_cpu(sos_hdr->header.ucode_array_offset_bytes); + + switch (desc->fw_type) { + case PSP_FW_TYPE_PSP_SOS: + psp->sos.fw_version = le32_to_cpu(desc->fw_version); + psp->sos.feature_version = le32_to_cpu(desc->fw_version); + psp->sos.size_bytes = le32_to_cpu(desc->size_bytes); + psp->sos.start_addr = ucode_start_addr; + break; + case PSP_FW_TYPE_PSP_SYS_DRV: + psp->sys.fw_version = le32_to_cpu(desc->fw_version); + psp->sys.feature_version = le32_to_cpu(desc->fw_version); + psp->sys.size_bytes = le32_to_cpu(desc->size_bytes); + psp->sys.start_addr = ucode_start_addr; + break; + case PSP_FW_TYPE_PSP_KDB: + psp->kdb.fw_version = le32_to_cpu(desc->fw_version); + psp->kdb.feature_version = le32_to_cpu(desc->fw_version); + psp->kdb.size_bytes = le32_to_cpu(desc->size_bytes); + psp->kdb.start_addr = ucode_start_addr; + break; + case PSP_FW_TYPE_PSP_TOC: + psp->toc.fw_version = le32_to_cpu(desc->fw_version); + psp->toc.feature_version = le32_to_cpu(desc->fw_version); + psp->toc.size_bytes = le32_to_cpu(desc->size_bytes); + psp->toc.start_addr = ucode_start_addr; + break; + case PSP_FW_TYPE_PSP_SPL: + psp->spl.fw_version = le32_to_cpu(desc->fw_version); + psp->spl.feature_version = le32_to_cpu(desc->fw_version); + psp->spl.size_bytes = le32_to_cpu(desc->size_bytes); + psp->spl.start_addr = ucode_start_addr; + break; + case PSP_FW_TYPE_PSP_RL: + psp->rl.fw_version = le32_to_cpu(desc->fw_version); + psp->rl.feature_version = le32_to_cpu(desc->fw_version); + psp->rl.size_bytes = le32_to_cpu(desc->size_bytes); + psp->rl.start_addr = ucode_start_addr; + break; + case PSP_FW_TYPE_PSP_SOC_DRV: + psp->soc_drv.fw_version = le32_to_cpu(desc->fw_version); + psp->soc_drv.feature_version = le32_to_cpu(desc->fw_version); + psp->soc_drv.size_bytes = le32_to_cpu(desc->size_bytes); + psp->soc_drv.start_addr = ucode_start_addr; + break; + case PSP_FW_TYPE_PSP_INTF_DRV: + psp->intf_drv.fw_version = le32_to_cpu(desc->fw_version); + psp->intf_drv.feature_version = le32_to_cpu(desc->fw_version); + psp->intf_drv.size_bytes = le32_to_cpu(desc->size_bytes); + psp->intf_drv.start_addr = ucode_start_addr; + break; + case PSP_FW_TYPE_PSP_DBG_DRV: + psp->dbg_drv.fw_version = le32_to_cpu(desc->fw_version); + psp->dbg_drv.feature_version = le32_to_cpu(desc->fw_version); + psp->dbg_drv.size_bytes = le32_to_cpu(desc->size_bytes); + psp->dbg_drv.start_addr = ucode_start_addr; + break; + default: + dev_warn(psp->adev->dev, "Unsupported PSP FW type: %d\n", desc->fw_type); + break; + } + + return 0; +} + static int psp_init_sos_base_fw(struct amdgpu_device *adev) { const struct psp_firmware_header_v1_0 *sos_hdr; @@ -2961,32 +3130,34 @@ static int psp_init_sos_base_fw(struct amdgpu_device *adev) le32_to_cpu(sos_hdr->header.ucode_array_offset_bytes); if (adev->gmc.xgmi.connected_to_cpu || (adev->asic_type != CHIP_ALDEBARAN)) { - adev->psp.sos_fw_version = le32_to_cpu(sos_hdr->header.ucode_version); - adev->psp.sos_feature_version = le32_to_cpu(sos_hdr->sos.fw_version); + adev->psp.sos.fw_version = le32_to_cpu(sos_hdr->header.ucode_version); + adev->psp.sos.feature_version = le32_to_cpu(sos_hdr->sos.fw_version); - adev->psp.sys_bin_size = le32_to_cpu(sos_hdr->sos.offset_bytes); - adev->psp.sys_start_addr = ucode_array_start_addr; + adev->psp.sys.size_bytes = le32_to_cpu(sos_hdr->sos.offset_bytes); + adev->psp.sys.start_addr = ucode_array_start_addr; - adev->psp.sos_bin_size = le32_to_cpu(sos_hdr->sos.size_bytes); - adev->psp.sos_start_addr = ucode_array_start_addr + + adev->psp.sos.size_bytes = le32_to_cpu(sos_hdr->sos.size_bytes); + adev->psp.sos.start_addr = ucode_array_start_addr + le32_to_cpu(sos_hdr->sos.offset_bytes); + adev->psp.xgmi_context.supports_extended_data = false; } else { /* Load alternate PSP SOS FW */ sos_hdr_v1_3 = (const struct psp_firmware_header_v1_3 *)adev->psp.sos_fw->data; - adev->psp.sos_fw_version = le32_to_cpu(sos_hdr_v1_3->sos_aux.fw_version); - adev->psp.sos_feature_version = le32_to_cpu(sos_hdr_v1_3->sos_aux.fw_version); + adev->psp.sos.fw_version = le32_to_cpu(sos_hdr_v1_3->sos_aux.fw_version); + adev->psp.sos.feature_version = le32_to_cpu(sos_hdr_v1_3->sos_aux.fw_version); - adev->psp.sys_bin_size = le32_to_cpu(sos_hdr_v1_3->sys_drv_aux.size_bytes); - adev->psp.sys_start_addr = ucode_array_start_addr + + adev->psp.sys.size_bytes = le32_to_cpu(sos_hdr_v1_3->sys_drv_aux.size_bytes); + adev->psp.sys.start_addr = ucode_array_start_addr + le32_to_cpu(sos_hdr_v1_3->sys_drv_aux.offset_bytes); - adev->psp.sos_bin_size = le32_to_cpu(sos_hdr_v1_3->sos_aux.size_bytes); - adev->psp.sos_start_addr = ucode_array_start_addr + + adev->psp.sos.size_bytes = le32_to_cpu(sos_hdr_v1_3->sos_aux.size_bytes); + adev->psp.sos.start_addr = ucode_array_start_addr + le32_to_cpu(sos_hdr_v1_3->sos_aux.offset_bytes); + adev->psp.xgmi_context.supports_extended_data = true; } - if ((adev->psp.sys_bin_size == 0) || (adev->psp.sos_bin_size == 0)) { + if ((adev->psp.sys.size_bytes == 0) || (adev->psp.sos.size_bytes == 0)) { dev_warn(adev->dev, "PSP SOS FW not available"); return -EINVAL; } @@ -3003,8 +3174,10 @@ int psp_init_sos_microcode(struct psp_context *psp, const struct psp_firmware_header_v1_1 *sos_hdr_v1_1; const struct psp_firmware_header_v1_2 *sos_hdr_v1_2; const struct psp_firmware_header_v1_3 *sos_hdr_v1_3; + const struct psp_firmware_header_v2_0 *sos_hdr_v2_0; int err = 0; uint8_t *ucode_array_start_addr; + int fw_index = 0; if (!chip_name) { dev_err(adev->dev, "invalid chip name for sos microcode\n"); @@ -3033,35 +3206,52 @@ int psp_init_sos_microcode(struct psp_context *psp, if (sos_hdr->header.header_version_minor == 1) { sos_hdr_v1_1 = (const struct psp_firmware_header_v1_1 *)adev->psp.sos_fw->data; - adev->psp.toc_bin_size = le32_to_cpu(sos_hdr_v1_1->toc.size_bytes); - adev->psp.toc_start_addr = (uint8_t *)adev->psp.sys_start_addr + + adev->psp.toc.size_bytes = le32_to_cpu(sos_hdr_v1_1->toc.size_bytes); + adev->psp.toc.start_addr = (uint8_t *)adev->psp.sys.start_addr + le32_to_cpu(sos_hdr_v1_1->toc.offset_bytes); - adev->psp.kdb_bin_size = le32_to_cpu(sos_hdr_v1_1->kdb.size_bytes); - adev->psp.kdb_start_addr = (uint8_t *)adev->psp.sys_start_addr + + adev->psp.kdb.size_bytes = le32_to_cpu(sos_hdr_v1_1->kdb.size_bytes); + adev->psp.kdb.start_addr = (uint8_t *)adev->psp.sys.start_addr + le32_to_cpu(sos_hdr_v1_1->kdb.offset_bytes); } if (sos_hdr->header.header_version_minor == 2) { sos_hdr_v1_2 = (const struct psp_firmware_header_v1_2 *)adev->psp.sos_fw->data; - adev->psp.kdb_bin_size = le32_to_cpu(sos_hdr_v1_2->kdb.size_bytes); - adev->psp.kdb_start_addr = (uint8_t *)adev->psp.sys_start_addr + + adev->psp.kdb.size_bytes = le32_to_cpu(sos_hdr_v1_2->kdb.size_bytes); + adev->psp.kdb.start_addr = (uint8_t *)adev->psp.sys.start_addr + le32_to_cpu(sos_hdr_v1_2->kdb.offset_bytes); } if (sos_hdr->header.header_version_minor == 3) { sos_hdr_v1_3 = (const struct psp_firmware_header_v1_3 *)adev->psp.sos_fw->data; - adev->psp.toc_bin_size = le32_to_cpu(sos_hdr_v1_3->v1_1.toc.size_bytes); - adev->psp.toc_start_addr = ucode_array_start_addr + + adev->psp.toc.size_bytes = le32_to_cpu(sos_hdr_v1_3->v1_1.toc.size_bytes); + adev->psp.toc.start_addr = ucode_array_start_addr + le32_to_cpu(sos_hdr_v1_3->v1_1.toc.offset_bytes); - adev->psp.kdb_bin_size = le32_to_cpu(sos_hdr_v1_3->v1_1.kdb.size_bytes); - adev->psp.kdb_start_addr = ucode_array_start_addr + + adev->psp.kdb.size_bytes = le32_to_cpu(sos_hdr_v1_3->v1_1.kdb.size_bytes); + adev->psp.kdb.start_addr = ucode_array_start_addr + le32_to_cpu(sos_hdr_v1_3->v1_1.kdb.offset_bytes); - adev->psp.spl_bin_size = le32_to_cpu(sos_hdr_v1_3->spl.size_bytes); - adev->psp.spl_start_addr = ucode_array_start_addr + + adev->psp.spl.size_bytes = le32_to_cpu(sos_hdr_v1_3->spl.size_bytes); + adev->psp.spl.start_addr = ucode_array_start_addr + le32_to_cpu(sos_hdr_v1_3->spl.offset_bytes); - adev->psp.rl_bin_size = le32_to_cpu(sos_hdr_v1_3->rl.size_bytes); - adev->psp.rl_start_addr = ucode_array_start_addr + + adev->psp.rl.size_bytes = le32_to_cpu(sos_hdr_v1_3->rl.size_bytes); + adev->psp.rl.start_addr = ucode_array_start_addr + le32_to_cpu(sos_hdr_v1_3->rl.offset_bytes); } break; + case 2: + sos_hdr_v2_0 = (const struct psp_firmware_header_v2_0 *)adev->psp.sos_fw->data; + + if (le32_to_cpu(sos_hdr_v2_0->psp_fw_bin_count) >= UCODE_MAX_PSP_PACKAGING) { + dev_err(adev->dev, "packed SOS count exceeds maximum limit\n"); + err = -EINVAL; + goto out; + } + + for (fw_index = 0; fw_index < le32_to_cpu(sos_hdr_v2_0->psp_fw_bin_count); fw_index++) { + err = parse_sos_bin_descriptor(psp, + &sos_hdr_v2_0->psp_fw_bin[fw_index], + sos_hdr_v2_0); + if (err) + goto out; + } + break; default: dev_err(adev->dev, "unsupported psp sos firmware\n"); @@ -3080,7 +3270,7 @@ out: } static int parse_ta_bin_descriptor(struct psp_context *psp, - const struct ta_fw_bin_desc *desc, + const struct psp_fw_bin_desc *desc, const struct ta_firmware_header_v2_0 *ta_hdr) { uint8_t *ucode_start_addr = NULL; @@ -3094,40 +3284,40 @@ static int parse_ta_bin_descriptor(struct psp_context *psp, switch (desc->fw_type) { case TA_FW_TYPE_PSP_ASD: - psp->asd_fw_version = le32_to_cpu(desc->fw_version); - psp->asd_feature_version = le32_to_cpu(desc->fw_version); - psp->asd_ucode_size = le32_to_cpu(desc->size_bytes); - psp->asd_start_addr = ucode_start_addr; + psp->asd.fw_version = le32_to_cpu(desc->fw_version); + psp->asd.feature_version = le32_to_cpu(desc->fw_version); + psp->asd.size_bytes = le32_to_cpu(desc->size_bytes); + psp->asd.start_addr = ucode_start_addr; break; case TA_FW_TYPE_PSP_XGMI: - psp->ta_xgmi_ucode_version = le32_to_cpu(desc->fw_version); - psp->ta_xgmi_ucode_size = le32_to_cpu(desc->size_bytes); - psp->ta_xgmi_start_addr = ucode_start_addr; + psp->xgmi.feature_version = le32_to_cpu(desc->fw_version); + psp->xgmi.size_bytes = le32_to_cpu(desc->size_bytes); + psp->xgmi.start_addr = ucode_start_addr; break; case TA_FW_TYPE_PSP_RAS: - psp->ta_ras_ucode_version = le32_to_cpu(desc->fw_version); - psp->ta_ras_ucode_size = le32_to_cpu(desc->size_bytes); - psp->ta_ras_start_addr = ucode_start_addr; + psp->ras.feature_version = le32_to_cpu(desc->fw_version); + psp->ras.size_bytes = le32_to_cpu(desc->size_bytes); + psp->ras.start_addr = ucode_start_addr; break; case TA_FW_TYPE_PSP_HDCP: - psp->ta_hdcp_ucode_version = le32_to_cpu(desc->fw_version); - psp->ta_hdcp_ucode_size = le32_to_cpu(desc->size_bytes); - psp->ta_hdcp_start_addr = ucode_start_addr; + psp->hdcp.feature_version = le32_to_cpu(desc->fw_version); + psp->hdcp.size_bytes = le32_to_cpu(desc->size_bytes); + psp->hdcp.start_addr = ucode_start_addr; break; case TA_FW_TYPE_PSP_DTM: - psp->ta_dtm_ucode_version = le32_to_cpu(desc->fw_version); - psp->ta_dtm_ucode_size = le32_to_cpu(desc->size_bytes); - psp->ta_dtm_start_addr = ucode_start_addr; + psp->dtm.feature_version = le32_to_cpu(desc->fw_version); + psp->dtm.size_bytes = le32_to_cpu(desc->size_bytes); + psp->dtm.start_addr = ucode_start_addr; break; case TA_FW_TYPE_PSP_RAP: - psp->ta_rap_ucode_version = le32_to_cpu(desc->fw_version); - psp->ta_rap_ucode_size = le32_to_cpu(desc->size_bytes); - psp->ta_rap_start_addr = ucode_start_addr; + psp->rap.feature_version = le32_to_cpu(desc->fw_version); + psp->rap.size_bytes = le32_to_cpu(desc->size_bytes); + psp->rap.start_addr = ucode_start_addr; break; case TA_FW_TYPE_PSP_SECUREDISPLAY: - psp->ta_securedisplay_ucode_version = le32_to_cpu(desc->fw_version); - psp->ta_securedisplay_ucode_size = le32_to_cpu(desc->size_bytes); - psp->ta_securedisplay_start_addr = ucode_start_addr; + psp->securedisplay.feature_version = le32_to_cpu(desc->fw_version); + psp->securedisplay.size_bytes = le32_to_cpu(desc->size_bytes); + psp->securedisplay.start_addr = ucode_start_addr; break; default: dev_warn(psp->adev->dev, "Unsupported TA type: %d\n", desc->fw_type); @@ -3168,7 +3358,7 @@ int psp_init_ta_microcode(struct psp_context *psp, goto out; } - if (le32_to_cpu(ta_hdr->ta_fw_bin_count) >= UCODE_MAX_TA_PACKAGING) { + if (le32_to_cpu(ta_hdr->ta_fw_bin_count) >= UCODE_MAX_PSP_PACKAGING) { dev_err(adev->dev, "packed TA count exceeds maximum limit\n"); err = -EINVAL; goto out; @@ -3235,11 +3425,12 @@ static ssize_t psp_usbc_pd_fw_sysfs_write(struct device *dev, { struct drm_device *ddev = dev_get_drvdata(dev); struct amdgpu_device *adev = drm_to_adev(ddev); - void *cpu_addr; - dma_addr_t dma_addr; int ret, idx; char fw_name[100]; const struct firmware *usbc_pd_fw; + struct amdgpu_bo *fw_buf_bo = NULL; + uint64_t fw_pri_mc_addr; + void *fw_pri_cpu_addr; if (!adev->ip_blocks[AMD_IP_BLOCK_TYPE_PSP].status.late_initialized) { DRM_INFO("PSP block is not ready yet."); @@ -3254,31 +3445,24 @@ static ssize_t psp_usbc_pd_fw_sysfs_write(struct device *dev, if (ret) goto fail; - /* We need contiguous physical mem to place the FW for psp to access */ - cpu_addr = dma_alloc_coherent(adev->dev, usbc_pd_fw->size, &dma_addr, GFP_KERNEL); - - ret = dma_mapping_error(adev->dev, dma_addr); + /* LFB address which is aligned to 1MB boundary per PSP request */ + ret = amdgpu_bo_create_kernel(adev, usbc_pd_fw->size, 0x100000, + AMDGPU_GEM_DOMAIN_VRAM, + &fw_buf_bo, + &fw_pri_mc_addr, + &fw_pri_cpu_addr); if (ret) goto rel_buf; - memcpy_toio(cpu_addr, usbc_pd_fw->data, usbc_pd_fw->size); - - /* - * x86 specific workaround. - * Without it the buffer is invisible in PSP. - * - * TODO Remove once PSP starts snooping CPU cache - */ -#ifdef CONFIG_X86 - clflush_cache_range(cpu_addr, (usbc_pd_fw->size & ~(L1_CACHE_BYTES - 1))); -#endif + memcpy_toio(fw_pri_cpu_addr, usbc_pd_fw->data, usbc_pd_fw->size); mutex_lock(&adev->psp.mutex); - ret = psp_load_usbc_pd_fw(&adev->psp, dma_addr); + ret = psp_load_usbc_pd_fw(&adev->psp, fw_pri_mc_addr); mutex_unlock(&adev->psp.mutex); + amdgpu_bo_free_kernel(&fw_buf_bo, &fw_pri_mc_addr, &fw_pri_cpu_addr); + rel_buf: - dma_free_coherent(adev->dev, usbc_pd_fw->size, cpu_addr, dma_addr); release_firmware(usbc_pd_fw); fail: if (ret) { @@ -3307,7 +3491,10 @@ static DEVICE_ATTR(usbc_pd_fw, S_IRUGO | S_IWUSR, psp_usbc_pd_fw_sysfs_read, psp_usbc_pd_fw_sysfs_write); - +int is_psp_fw_valid(struct psp_bin_desc bin) +{ + return bin.size_bytes; +} const struct amd_ip_funcs psp_ip_funcs = { .name = "psp", @@ -3369,6 +3556,14 @@ const struct amdgpu_ip_block_version psp_v11_0_ip_block = .funcs = &psp_ip_funcs, }; +const struct amdgpu_ip_block_version psp_v11_0_8_ip_block = { + .type = AMD_IP_BLOCK_TYPE_PSP, + .major = 11, + .minor = 0, + .rev = 8, + .funcs = &psp_ip_funcs, +}; + const struct amdgpu_ip_block_version psp_v12_0_ip_block = { .type = AMD_IP_BLOCK_TYPE_PSP, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h index 3030ec24eb3b..8ef2d28af92a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h @@ -48,11 +48,15 @@ struct psp_context; struct psp_xgmi_node_info; struct psp_xgmi_topology_info; +struct psp_bin_desc; enum psp_bootloader_cmd { PSP_BL__LOAD_SYSDRV = 0x10000, PSP_BL__LOAD_SOSDRV = 0x20000, PSP_BL__LOAD_KEY_DATABASE = 0x80000, + PSP_BL__LOAD_SOCDRV = 0xB0000, + PSP_BL__LOAD_INTFDRV = 0xC0000, + PSP_BL__LOAD_DBGDRV = 0xD0000, PSP_BL__DRAM_LONG_TRAIN = 0x100000, PSP_BL__DRAM_SHORT_TRAIN = 0x200000, PSP_BL__LOAD_TOS_SPL_TABLE = 0x10000000, @@ -93,6 +97,9 @@ struct psp_funcs int (*bootloader_load_kdb)(struct psp_context *psp); int (*bootloader_load_spl)(struct psp_context *psp); int (*bootloader_load_sysdrv)(struct psp_context *psp); + int (*bootloader_load_soc_drv)(struct psp_context *psp); + int (*bootloader_load_intf_drv)(struct psp_context *psp); + int (*bootloader_load_dbg_drv)(struct psp_context *psp); int (*bootloader_load_sos)(struct psp_context *psp); int (*ring_init)(struct psp_context *psp, enum psp_ring_type ring_type); int (*ring_create)(struct psp_context *psp, @@ -106,7 +113,7 @@ struct psp_funcs int (*mem_training)(struct psp_context *psp, uint32_t ops); uint32_t (*ring_get_wptr)(struct psp_context *psp); void (*ring_set_wptr)(struct psp_context *psp, uint32_t value); - int (*load_usbc_pd_fw)(struct psp_context *psp, dma_addr_t dma_addr); + int (*load_usbc_pd_fw)(struct psp_context *psp, uint64_t fw_pri_mc_addr); int (*read_usbc_pd_fw)(struct psp_context *psp, uint32_t *fw_ver); }; @@ -116,6 +123,7 @@ struct psp_xgmi_node_info { uint8_t num_hops; uint8_t is_sharing_enabled; enum ta_xgmi_assigned_sdma_engine sdma_engine; + uint8_t num_links; }; struct psp_xgmi_topology_info { @@ -128,59 +136,32 @@ struct psp_asd_context { uint32_t session_id; }; -struct psp_xgmi_context { - uint8_t initialized; - uint32_t session_id; - struct amdgpu_bo *xgmi_shared_bo; - uint64_t xgmi_shared_mc_addr; - void *xgmi_shared_buf; - struct psp_xgmi_topology_info top_info; -}; - -struct psp_ras_context { - /*ras fw*/ - bool ras_initialized; - uint32_t session_id; - struct amdgpu_bo *ras_shared_bo; - uint64_t ras_shared_mc_addr; - void *ras_shared_buf; - struct amdgpu_ras *ras; +struct ta_mem_context { + struct amdgpu_bo *shared_bo; + uint64_t shared_mc_addr; + void *shared_buf; }; -struct psp_hdcp_context { - bool hdcp_initialized; +struct ta_context { + bool initialized; uint32_t session_id; - struct amdgpu_bo *hdcp_shared_bo; - uint64_t hdcp_shared_mc_addr; - void *hdcp_shared_buf; - struct mutex mutex; + struct ta_mem_context mem_context; }; -struct psp_dtm_context { - bool dtm_initialized; - uint32_t session_id; - struct amdgpu_bo *dtm_shared_bo; - uint64_t dtm_shared_mc_addr; - void *dtm_shared_buf; - struct mutex mutex; +struct ta_cp_context { + struct ta_context context; + struct mutex mutex; }; -struct psp_rap_context { - bool rap_initialized; - uint32_t session_id; - struct amdgpu_bo *rap_shared_bo; - uint64_t rap_shared_mc_addr; - void *rap_shared_buf; - struct mutex mutex; +struct psp_xgmi_context { + struct ta_context context; + struct psp_xgmi_topology_info top_info; + bool supports_extended_data; }; -struct psp_securedisplay_context { - bool securedisplay_initialized; - uint32_t session_id; - struct amdgpu_bo *securedisplay_shared_bo; - uint64_t securedisplay_shared_mc_addr; - void *securedisplay_shared_buf; - struct mutex mutex; +struct psp_ras_context { + struct ta_context context; + struct amdgpu_ras *ras; }; #define MEM_TRAIN_SYSTEM_SIGNATURE 0x54534942 @@ -282,6 +263,13 @@ struct psp_runtime_boot_cfg_entry { uint32_t reserved; }; +struct psp_bin_desc { + uint32_t fw_version; + uint32_t feature_version; + uint32_t size_bytes; + uint8_t *start_addr; +}; + struct psp_context { struct amdgpu_device *adev; @@ -297,36 +285,26 @@ struct psp_context /* sos firmware */ const struct firmware *sos_fw; - uint32_t sos_fw_version; - uint32_t sos_feature_version; - uint32_t sys_bin_size; - uint32_t sos_bin_size; - uint32_t toc_bin_size; - uint32_t kdb_bin_size; - uint32_t spl_bin_size; - uint32_t rl_bin_size; - uint8_t *sys_start_addr; - uint8_t *sos_start_addr; - uint8_t *toc_start_addr; - uint8_t *kdb_start_addr; - uint8_t *spl_start_addr; - uint8_t *rl_start_addr; + struct psp_bin_desc sys; + struct psp_bin_desc sos; + struct psp_bin_desc toc; + struct psp_bin_desc kdb; + struct psp_bin_desc spl; + struct psp_bin_desc rl; + struct psp_bin_desc soc_drv; + struct psp_bin_desc intf_drv; + struct psp_bin_desc dbg_drv; /* tmr buffer */ struct amdgpu_bo *tmr_bo; uint64_t tmr_mc_addr; /* asd firmware */ - const struct firmware *asd_fw; - uint32_t asd_fw_version; - uint32_t asd_feature_version; - uint32_t asd_ucode_size; - uint8_t *asd_start_addr; + const struct firmware *asd_fw; + struct psp_bin_desc asd; /* toc firmware */ const struct firmware *toc_fw; - uint32_t toc_fw_version; - uint32_t toc_feature_version; /* fence buffer */ struct amdgpu_bo *fence_buf_bo; @@ -348,36 +326,20 @@ struct psp_context /* xgmi ta firmware and buffer */ const struct firmware *ta_fw; uint32_t ta_fw_version; - uint32_t ta_xgmi_ucode_version; - uint32_t ta_xgmi_ucode_size; - uint8_t *ta_xgmi_start_addr; - uint32_t ta_ras_ucode_version; - uint32_t ta_ras_ucode_size; - uint8_t *ta_ras_start_addr; - - uint32_t ta_hdcp_ucode_version; - uint32_t ta_hdcp_ucode_size; - uint8_t *ta_hdcp_start_addr; - - uint32_t ta_dtm_ucode_version; - uint32_t ta_dtm_ucode_size; - uint8_t *ta_dtm_start_addr; - - uint32_t ta_rap_ucode_version; - uint32_t ta_rap_ucode_size; - uint8_t *ta_rap_start_addr; - - uint32_t ta_securedisplay_ucode_version; - uint32_t ta_securedisplay_ucode_size; - uint8_t *ta_securedisplay_start_addr; + struct psp_bin_desc xgmi; + struct psp_bin_desc ras; + struct psp_bin_desc hdcp; + struct psp_bin_desc dtm; + struct psp_bin_desc rap; + struct psp_bin_desc securedisplay; struct psp_asd_context asd_context; struct psp_xgmi_context xgmi_context; - struct psp_ras_context ras; - struct psp_hdcp_context hdcp_context; - struct psp_dtm_context dtm_context; - struct psp_rap_context rap_context; - struct psp_securedisplay_context securedisplay_context; + struct psp_ras_context ras_context; + struct ta_cp_context hdcp_context; + struct ta_cp_context dtm_context; + struct ta_cp_context rap_context; + struct ta_cp_context securedisplay_context; struct mutex mutex; struct psp_memory_training_context mem_train_ctx; @@ -402,6 +364,12 @@ struct amdgpu_psp_funcs { ((psp)->funcs->bootloader_load_spl ? (psp)->funcs->bootloader_load_spl((psp)) : 0) #define psp_bootloader_load_sysdrv(psp) \ ((psp)->funcs->bootloader_load_sysdrv ? (psp)->funcs->bootloader_load_sysdrv((psp)) : 0) +#define psp_bootloader_load_soc_drv(psp) \ + ((psp)->funcs->bootloader_load_soc_drv ? (psp)->funcs->bootloader_load_soc_drv((psp)) : 0) +#define psp_bootloader_load_intf_drv(psp) \ + ((psp)->funcs->bootloader_load_intf_drv ? (psp)->funcs->bootloader_load_intf_drv((psp)) : 0) +#define psp_bootloader_load_dbg_drv(psp) \ + ((psp)->funcs->bootloader_load_dbg_drv ? (psp)->funcs->bootloader_load_dbg_drv((psp)) : 0) #define psp_bootloader_load_sos(psp) \ ((psp)->funcs->bootloader_load_sos ? (psp)->funcs->bootloader_load_sos((psp)) : 0) #define psp_smu_reload_quirk(psp) \ @@ -414,9 +382,9 @@ struct amdgpu_psp_funcs { #define psp_ring_get_wptr(psp) (psp)->funcs->ring_get_wptr((psp)) #define psp_ring_set_wptr(psp, value) (psp)->funcs->ring_set_wptr((psp), (value)) -#define psp_load_usbc_pd_fw(psp, dma_addr) \ +#define psp_load_usbc_pd_fw(psp, fw_pri_mc_addr) \ ((psp)->funcs->load_usbc_pd_fw ? \ - (psp)->funcs->load_usbc_pd_fw((psp), (dma_addr)) : -EINVAL) + (psp)->funcs->load_usbc_pd_fw((psp), (fw_pri_mc_addr)) : -EINVAL) #define psp_read_usbc_pd_fw(psp, fw_ver) \ ((psp)->funcs->read_usbc_pd_fw ? \ @@ -427,6 +395,7 @@ extern const struct amd_ip_funcs psp_ip_funcs; extern const struct amdgpu_ip_block_version psp_v3_1_ip_block; extern const struct amdgpu_ip_block_version psp_v10_0_ip_block; extern const struct amdgpu_ip_block_version psp_v11_0_ip_block; +extern const struct amdgpu_ip_block_version psp_v11_0_8_ip_block; extern const struct amdgpu_ip_block_version psp_v12_0_ip_block; extern const struct amdgpu_ip_block_version psp_v13_0_ip_block; @@ -437,14 +406,15 @@ int psp_gpu_reset(struct amdgpu_device *adev); int psp_update_vcn_sram(struct amdgpu_device *adev, int inst_idx, uint64_t cmd_gpu_addr, int cmd_size); -int psp_xgmi_initialize(struct psp_context *psp); +int psp_xgmi_initialize(struct psp_context *psp, bool set_extended_data, bool load_ta); int psp_xgmi_terminate(struct psp_context *psp); int psp_xgmi_invoke(struct psp_context *psp, uint32_t ta_cmd_id); int psp_xgmi_get_hive_id(struct psp_context *psp, uint64_t *hive_id); int psp_xgmi_get_node_id(struct psp_context *psp, uint64_t *node_id); int psp_xgmi_get_topology_info(struct psp_context *psp, int number_devices, - struct psp_xgmi_topology_info *topology); + struct psp_xgmi_topology_info *topology, + bool get_extended_data); int psp_xgmi_set_topology_info(struct psp_context *psp, int number_devices, struct psp_xgmi_topology_info *topology); @@ -483,4 +453,5 @@ int psp_load_fw_list(struct psp_context *psp, struct amdgpu_firmware_info **ucode_list, int ucode_count); void psp_copy_fw(struct psp_context *psp, uint8_t *start_addr, uint32_t bin_size); +int is_psp_fw_valid(struct psp_bin_desc bin); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_rap.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_rap.c index 51909bf8798c..12010c988c8b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_rap.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_rap.c @@ -76,7 +76,7 @@ static ssize_t amdgpu_rap_debugfs_write(struct file *f, const char __user *buf, dev_info(adev->dev, "RAP L0 validate test success.\n"); } else { rap_shared_mem = (struct ta_rap_shared_memory *) - adev->psp.rap_context.rap_shared_buf; + adev->psp.rap_context.context.mem_context.shared_buf; rap_cmd_output = &(rap_shared_mem->rap_out_message.output); dev_info(adev->dev, "RAP test failed, the output is:\n"); @@ -119,7 +119,7 @@ void amdgpu_rap_debugfs_init(struct amdgpu_device *adev) #if defined(CONFIG_DEBUG_FS) struct drm_minor *minor = adev_to_drm(adev)->primary; - if (!adev->psp.rap_context.rap_initialized) + if (!adev->psp.rap_context.context.initialized) return; debugfs_create_file("rap_test", S_IWUSR, minor->debugfs_root, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index fc66aca28594..96a8fd0ca1df 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -64,15 +64,14 @@ const char *ras_block_string[] = { }; #define ras_err_str(i) (ras_error_string[ffs(i)]) -#define ras_block_str(i) (ras_block_string[i]) #define RAS_DEFAULT_FLAGS (AMDGPU_RAS_FLAG_INIT_BY_VBIOS) /* inject address is 52 bits */ #define RAS_UMC_INJECT_ADDR_LIMIT (0x1ULL << 52) -/* typical ECC bad page rate(1 bad page per 100MB VRAM) */ -#define RAS_BAD_PAGE_RATE (100 * 1024 * 1024ULL) +/* typical ECC bad page rate is 1 bad page per 100MB VRAM */ +#define RAS_BAD_PAGE_COVER (100 * 1024 * 1024ULL) enum amdgpu_ras_retire_page_reservation { AMDGPU_RAS_RETIRE_PAGE_RESERVED, @@ -355,8 +354,9 @@ static int amdgpu_ras_debugfs_ctrl_parse_data(struct file *f, * to see which blocks support RAS on a particular asic. * */ -static ssize_t amdgpu_ras_debugfs_ctrl_write(struct file *f, const char __user *buf, - size_t size, loff_t *pos) +static ssize_t amdgpu_ras_debugfs_ctrl_write(struct file *f, + const char __user *buf, + size_t size, loff_t *pos) { struct amdgpu_device *adev = (struct amdgpu_device *)file_inode(f)->i_private; struct ras_debug_if data; @@ -370,7 +370,7 @@ static ssize_t amdgpu_ras_debugfs_ctrl_write(struct file *f, const char __user * ret = amdgpu_ras_debugfs_ctrl_parse_data(f, buf, size, pos, &data); if (ret) - return -EINVAL; + return ret; if (data.op == 3) { ret = amdgpu_reserve_page_direct(adev, data.inject.address); @@ -403,9 +403,9 @@ static ssize_t amdgpu_ras_debugfs_ctrl_write(struct file *f, const char __user * /* umc ce/ue error injection for a bad page is not allowed */ if ((data.head.block == AMDGPU_RAS_BLOCK__UMC) && amdgpu_ras_check_bad_page(adev, data.inject.address)) { - dev_warn(adev->dev, "RAS WARN: 0x%llx has been marked " - "as bad before error injection!\n", - data.inject.address); + dev_warn(adev->dev, "RAS WARN: inject: 0x%llx has " + "already been marked as bad!\n", + data.inject.address); break; } @@ -439,21 +439,24 @@ static ssize_t amdgpu_ras_debugfs_ctrl_write(struct file *f, const char __user * * will reset EEPROM table to 0 entries. * */ -static ssize_t amdgpu_ras_debugfs_eeprom_write(struct file *f, const char __user *buf, - size_t size, loff_t *pos) +static ssize_t amdgpu_ras_debugfs_eeprom_write(struct file *f, + const char __user *buf, + size_t size, loff_t *pos) { struct amdgpu_device *adev = (struct amdgpu_device *)file_inode(f)->i_private; int ret; ret = amdgpu_ras_eeprom_reset_table( - &(amdgpu_ras_get_context(adev)->eeprom_control)); + &(amdgpu_ras_get_context(adev)->eeprom_control)); - if (ret == 1) { + if (!ret) { + /* Something was written to EEPROM. + */ amdgpu_ras_get_context(adev)->flags = RAS_DEFAULT_FLAGS; return size; } else { - return -EIO; + return ret; } } @@ -526,7 +529,7 @@ static inline void put_obj(struct ras_manager *obj) if (obj && (--obj->use == 0)) list_del(&obj->node); if (obj && (obj->use < 0)) - DRM_ERROR("RAS ERROR: Unbalance obj(%s) use\n", obj->head.name); + DRM_ERROR("RAS ERROR: Unbalance obj(%s) use\n", ras_block_str(obj->head.block)); } /* make one obj and return it. */ @@ -789,7 +792,6 @@ static int amdgpu_ras_enable_all_features(struct amdgpu_device *adev, .type = default_ras_type, .sub_block_index = 0, }; - strcpy(head.name, ras_block_str(i)); if (bypass) { /* * bypass psp. vbios enable ras for us. @@ -1316,6 +1318,12 @@ static struct dentry *amdgpu_ras_debugfs_create_ctrl_node(struct amdgpu_device * &con->bad_page_cnt_threshold); debugfs_create_x32("ras_hw_enabled", 0444, dir, &adev->ras_hw_enabled); debugfs_create_x32("ras_enabled", 0444, dir, &adev->ras_enabled); + debugfs_create_file("ras_eeprom_size", S_IRUGO, dir, adev, + &amdgpu_ras_debugfs_eeprom_size_ops); + con->de_ras_eeprom_table = debugfs_create_file("ras_eeprom_table", + S_IRUGO, dir, adev, + &amdgpu_ras_debugfs_eeprom_table_ops); + amdgpu_ras_debugfs_set_ret_size(&con->eeprom_control); /* * After one uncorrectable error happens, usually GPU recovery will @@ -1833,13 +1841,12 @@ int amdgpu_ras_save_bad_pages(struct amdgpu_device *adev) control = &con->eeprom_control; data = con->eh_data; - save_count = data->count - control->num_recs; + save_count = data->count - control->ras_num_recs; /* only new entries are saved */ if (save_count > 0) { - if (amdgpu_ras_eeprom_process_recods(control, - &data->bps[control->num_recs], - true, - save_count)) { + if (amdgpu_ras_eeprom_append(control, + &data->bps[control->ras_num_recs], + save_count)) { dev_err(adev->dev, "Failed to save EEPROM table data!"); return -EIO; } @@ -1857,28 +1864,24 @@ int amdgpu_ras_save_bad_pages(struct amdgpu_device *adev) static int amdgpu_ras_load_bad_pages(struct amdgpu_device *adev) { struct amdgpu_ras_eeprom_control *control = - &adev->psp.ras.ras->eeprom_control; - struct eeprom_table_record *bps = NULL; - int ret = 0; + &adev->psp.ras_context.ras->eeprom_control; + struct eeprom_table_record *bps; + int ret; /* no bad page record, skip eeprom access */ - if (!control->num_recs || (amdgpu_bad_page_threshold == 0)) - return ret; + if (control->ras_num_recs == 0 || amdgpu_bad_page_threshold == 0) + return 0; - bps = kcalloc(control->num_recs, sizeof(*bps), GFP_KERNEL); + bps = kcalloc(control->ras_num_recs, sizeof(*bps), GFP_KERNEL); if (!bps) return -ENOMEM; - if (amdgpu_ras_eeprom_process_recods(control, bps, false, - control->num_recs)) { + ret = amdgpu_ras_eeprom_read(control, bps, control->ras_num_recs); + if (ret) dev_err(adev->dev, "Failed to load EEPROM table records!"); - ret = -EIO; - goto out; - } - - ret = amdgpu_ras_add_bad_pages(adev, bps, control->num_recs); + else + ret = amdgpu_ras_add_bad_pages(adev, bps, control->ras_num_recs); -out: kfree(bps); return ret; } @@ -1918,11 +1921,9 @@ static bool amdgpu_ras_check_bad_page(struct amdgpu_device *adev, } static void amdgpu_ras_validate_threshold(struct amdgpu_device *adev, - uint32_t max_length) + uint32_t max_count) { struct amdgpu_ras *con = amdgpu_ras_get_context(adev); - int tmp_threshold = amdgpu_bad_page_threshold; - u64 val; /* * Justification of value bad_page_cnt_threshold in ras structure @@ -1943,18 +1944,15 @@ static void amdgpu_ras_validate_threshold(struct amdgpu_device *adev, * take no effect. */ - if (tmp_threshold < -1) - tmp_threshold = -1; - else if (tmp_threshold > max_length) - tmp_threshold = max_length; + if (amdgpu_bad_page_threshold < 0) { + u64 val = adev->gmc.mc_vram_size; - if (tmp_threshold == -1) { - val = adev->gmc.mc_vram_size; - do_div(val, RAS_BAD_PAGE_RATE); + do_div(val, RAS_BAD_PAGE_COVER); con->bad_page_cnt_threshold = min(lower_32_bits(val), - max_length); + max_count); } else { - con->bad_page_cnt_threshold = tmp_threshold; + con->bad_page_cnt_threshold = min_t(int, max_count, + amdgpu_bad_page_threshold); } } @@ -1962,15 +1960,24 @@ int amdgpu_ras_recovery_init(struct amdgpu_device *adev) { struct amdgpu_ras *con = amdgpu_ras_get_context(adev); struct ras_err_handler_data **data; - uint32_t max_eeprom_records_len = 0; + u32 max_eeprom_records_count = 0; bool exc_err_limit = false; int ret; - if (adev->ras_enabled && con) - data = &con->eh_data; - else + if (!con) return 0; + /* Allow access to RAS EEPROM via debugfs, when the ASIC + * supports RAS and debugfs is enabled, but when + * adev->ras_enabled is unset, i.e. when "ras_enable" + * module parameter is set to 0. + */ + con->adev = adev; + + if (!adev->ras_enabled) + return 0; + + data = &con->eh_data; *data = kmalloc(sizeof(**data), GFP_KERNEL | __GFP_ZERO); if (!*data) { ret = -ENOMEM; @@ -1980,10 +1987,9 @@ int amdgpu_ras_recovery_init(struct amdgpu_device *adev) mutex_init(&con->recovery_lock); INIT_WORK(&con->recovery_work, amdgpu_ras_do_recovery); atomic_set(&con->in_recovery, 0); - con->adev = adev; - max_eeprom_records_len = amdgpu_ras_eeprom_get_record_max_length(); - amdgpu_ras_validate_threshold(adev, max_eeprom_records_len); + max_eeprom_records_count = amdgpu_ras_eeprom_max_record_count(); + amdgpu_ras_validate_threshold(adev, max_eeprom_records_count); /* Todo: During test the SMU might fail to read the eeprom through I2C * when the GPU is pending on XGMI reset during probe time @@ -1999,13 +2005,13 @@ int amdgpu_ras_recovery_init(struct amdgpu_device *adev) if (exc_err_limit || ret) goto free; - if (con->eeprom_control.num_recs) { + if (con->eeprom_control.ras_num_recs) { ret = amdgpu_ras_load_bad_pages(adev); if (ret) goto free; if (adev->smu.ppt_funcs && adev->smu.ppt_funcs->send_hbm_bad_pages_num) - adev->smu.ppt_funcs->send_hbm_bad_pages_num(&adev->smu, con->eeprom_control.num_recs); + adev->smu.ppt_funcs->send_hbm_bad_pages_num(&adev->smu, con->eeprom_control.ras_num_recs); } return 0; @@ -2015,7 +2021,7 @@ free: kfree(*data); con->eh_data = NULL; out: - dev_warn(adev->dev, "Failed to initialize ras recovery!\n"); + dev_warn(adev->dev, "Failed to initialize ras recovery! (%d)\n", ret); /* * Except error threshold exceeding case, other failure cases in this diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h index b504ed8c9b50..eae604fd90b8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h @@ -49,10 +49,14 @@ enum amdgpu_ras_block { AMDGPU_RAS_BLOCK__MP0, AMDGPU_RAS_BLOCK__MP1, AMDGPU_RAS_BLOCK__FUSE, + AMDGPU_RAS_BLOCK__MPIO, AMDGPU_RAS_BLOCK__LAST }; +extern const char *ras_block_string[]; + +#define ras_block_str(i) (ras_block_string[i]) #define AMDGPU_RAS_BLOCK_COUNT AMDGPU_RAS_BLOCK__LAST #define AMDGPU_RAS_BLOCK_MASK ((1ULL << AMDGPU_RAS_BLOCK_COUNT) - 1) @@ -306,7 +310,6 @@ struct ras_common_if { enum amdgpu_ras_block block; enum amdgpu_ras_error_type type; uint32_t sub_block_index; - /* block name */ char name[32]; }; @@ -318,6 +321,7 @@ struct amdgpu_ras { /* sysfs */ struct device_attribute features_attr; struct bin_attribute badpages_attr; + struct dentry *de_ras_eeprom_table; /* block array */ struct ras_manager *objs; @@ -417,7 +421,7 @@ struct ras_badpage { /* interfaces for IP */ struct ras_fs_if { struct ras_common_if head; - char sysfs_name[32]; + const char* sysfs_name; char debugfs_name[32]; }; @@ -469,8 +473,8 @@ struct ras_debug_if { * 8: feature disable */ -#define amdgpu_ras_get_context(adev) ((adev)->psp.ras.ras) -#define amdgpu_ras_set_context(adev, ras_con) ((adev)->psp.ras.ras = (ras_con)) +#define amdgpu_ras_get_context(adev) ((adev)->psp.ras_context.ras) +#define amdgpu_ras_set_context(adev, ras_con) ((adev)->psp.ras_context.ras = (ras_con)) /* check if ras is supported on block, say, sdma, gfx */ static inline int amdgpu_ras_is_supported(struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c index 38222de921d1..9dc3b2d88176 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c @@ -26,94 +26,131 @@ #include "amdgpu_ras.h" #include <linux/bits.h> #include "atom.h" +#include "amdgpu_eeprom.h" #include "amdgpu_atomfirmware.h" +#include <linux/debugfs.h> +#include <linux/uaccess.h> -#define EEPROM_I2C_TARGET_ADDR_VEGA20 0xA0 -#define EEPROM_I2C_TARGET_ADDR_ARCTURUS 0xA8 -#define EEPROM_I2C_TARGET_ADDR_ARCTURUS_D342 0xA0 -#define EEPROM_I2C_TARGET_ADDR_SIENNA_CICHLID 0xA0 -#define EEPROM_I2C_TARGET_ADDR_ALDEBARAN 0xA0 +#define EEPROM_I2C_MADDR_VEGA20 0x0 +#define EEPROM_I2C_MADDR_ARCTURUS 0x40000 +#define EEPROM_I2C_MADDR_ARCTURUS_D342 0x0 +#define EEPROM_I2C_MADDR_SIENNA_CICHLID 0x0 +#define EEPROM_I2C_MADDR_ALDEBARAN 0x0 /* * The 2 macros bellow represent the actual size in bytes that * those entities occupy in the EEPROM memory. - * EEPROM_TABLE_RECORD_SIZE is different than sizeof(eeprom_table_record) which + * RAS_TABLE_RECORD_SIZE is different than sizeof(eeprom_table_record) which * uses uint64 to store 6b fields such as retired_page. */ -#define EEPROM_TABLE_HEADER_SIZE 20 -#define EEPROM_TABLE_RECORD_SIZE 24 - -#define EEPROM_ADDRESS_SIZE 0x2 +#define RAS_TABLE_HEADER_SIZE 20 +#define RAS_TABLE_RECORD_SIZE 24 /* Table hdr is 'AMDR' */ -#define EEPROM_TABLE_HDR_VAL 0x414d4452 -#define EEPROM_TABLE_VER 0x00010000 +#define RAS_TABLE_HDR_VAL 0x414d4452 +#define RAS_TABLE_VER 0x00010000 /* Bad GPU tag ‘BADG’ */ -#define EEPROM_TABLE_HDR_BAD 0x42414447 +#define RAS_TABLE_HDR_BAD 0x42414447 + +/* Assume 2-Mbit size EEPROM and take up the whole space. */ +#define RAS_TBL_SIZE_BYTES (256 * 1024) +#define RAS_TABLE_START 0 +#define RAS_HDR_START RAS_TABLE_START +#define RAS_RECORD_START (RAS_HDR_START + RAS_TABLE_HEADER_SIZE) +#define RAS_MAX_RECORD_COUNT ((RAS_TBL_SIZE_BYTES - RAS_TABLE_HEADER_SIZE) \ + / RAS_TABLE_RECORD_SIZE) + +/* Given a zero-based index of an EEPROM RAS record, yields the EEPROM + * offset off of RAS_TABLE_START. That is, this is something you can + * add to control->i2c_address, and then tell I2C layer to read + * from/write to there. _N is the so called absolute index, + * because it starts right after the table header. + */ +#define RAS_INDEX_TO_OFFSET(_C, _N) ((_C)->ras_record_offset + \ + (_N) * RAS_TABLE_RECORD_SIZE) + +#define RAS_OFFSET_TO_INDEX(_C, _O) (((_O) - \ + (_C)->ras_record_offset) / RAS_TABLE_RECORD_SIZE) + +/* Given a 0-based relative record index, 0, 1, 2, ..., etc., off + * of "fri", return the absolute record index off of the end of + * the table header. + */ +#define RAS_RI_TO_AI(_C, _I) (((_I) + (_C)->ras_fri) % \ + (_C)->ras_max_record_count) -/* Assume 2 Mbit size */ -#define EEPROM_SIZE_BYTES 256000 -#define EEPROM_PAGE__SIZE_BYTES 256 -#define EEPROM_HDR_START 0 -#define EEPROM_RECORD_START (EEPROM_HDR_START + EEPROM_TABLE_HEADER_SIZE) -#define EEPROM_MAX_RECORD_NUM ((EEPROM_SIZE_BYTES - EEPROM_TABLE_HEADER_SIZE) / EEPROM_TABLE_RECORD_SIZE) -#define EEPROM_ADDR_MSB_MASK GENMASK(17, 8) +#define RAS_NUM_RECS(_tbl_hdr) (((_tbl_hdr)->tbl_size - \ + RAS_TABLE_HEADER_SIZE) / RAS_TABLE_RECORD_SIZE) #define to_amdgpu_device(x) (container_of(x, struct amdgpu_ras, eeprom_control))->adev static bool __is_ras_eeprom_supported(struct amdgpu_device *adev) { - if ((adev->asic_type == CHIP_VEGA20) || - (adev->asic_type == CHIP_ARCTURUS) || - (adev->asic_type == CHIP_SIENNA_CICHLID) || - (adev->asic_type == CHIP_ALDEBARAN)) - return true; - - return false; + return adev->asic_type == CHIP_VEGA20 || + adev->asic_type == CHIP_ARCTURUS || + adev->asic_type == CHIP_SIENNA_CICHLID || + adev->asic_type == CHIP_ALDEBARAN; } static bool __get_eeprom_i2c_addr_arct(struct amdgpu_device *adev, - uint16_t *i2c_addr) + struct amdgpu_ras_eeprom_control *control) { struct atom_context *atom_ctx = adev->mode_info.atom_context; - if (!i2c_addr || !atom_ctx) + if (!control || !atom_ctx) return false; if (strnstr(atom_ctx->vbios_version, "D342", sizeof(atom_ctx->vbios_version))) - *i2c_addr = EEPROM_I2C_TARGET_ADDR_ARCTURUS_D342; + control->i2c_address = EEPROM_I2C_MADDR_ARCTURUS_D342; else - *i2c_addr = EEPROM_I2C_TARGET_ADDR_ARCTURUS; + control->i2c_address = EEPROM_I2C_MADDR_ARCTURUS; return true; } static bool __get_eeprom_i2c_addr(struct amdgpu_device *adev, - uint16_t *i2c_addr) + struct amdgpu_ras_eeprom_control *control) { - if (!i2c_addr) + uint8_t ras_rom_i2c_slave_addr; + + if (!control) return false; - if (amdgpu_atomfirmware_ras_rom_addr(adev, (uint8_t*)i2c_addr)) - return true; + control->i2c_address = 0; + + if (amdgpu_atomfirmware_ras_rom_addr(adev, &ras_rom_i2c_slave_addr)) + { + switch (ras_rom_i2c_slave_addr) { + case 0xA0: + control->i2c_address = 0; + return true; + case 0xA8: + control->i2c_address = 0x40000; + return true; + default: + dev_warn(adev->dev, "RAS EEPROM I2C slave address %02x not supported", + ras_rom_i2c_slave_addr); + return false; + } + } switch (adev->asic_type) { case CHIP_VEGA20: - *i2c_addr = EEPROM_I2C_TARGET_ADDR_VEGA20; + control->i2c_address = EEPROM_I2C_MADDR_VEGA20; break; case CHIP_ARCTURUS: - return __get_eeprom_i2c_addr_arct(adev, i2c_addr); + return __get_eeprom_i2c_addr_arct(adev, control); case CHIP_SIENNA_CICHLID: - *i2c_addr = EEPROM_I2C_TARGET_ADDR_SIENNA_CICHLID; + control->i2c_address = EEPROM_I2C_MADDR_SIENNA_CICHLID; break; case CHIP_ALDEBARAN: - *i2c_addr = EEPROM_I2C_TARGET_ADDR_ALDEBARAN; + control->i2c_address = EEPROM_I2C_MADDR_ALDEBARAN; break; default: @@ -123,10 +160,11 @@ static bool __get_eeprom_i2c_addr(struct amdgpu_device *adev, return true; } -static void __encode_table_header_to_buff(struct amdgpu_ras_eeprom_table_header *hdr, - unsigned char *buff) +static void +__encode_table_header_to_buf(struct amdgpu_ras_eeprom_table_header *hdr, + unsigned char *buf) { - uint32_t *pp = (uint32_t *) buff; + u32 *pp = (uint32_t *)buf; pp[0] = cpu_to_le32(hdr->header); pp[1] = cpu_to_le32(hdr->version); @@ -135,10 +173,11 @@ static void __encode_table_header_to_buff(struct amdgpu_ras_eeprom_table_header pp[4] = cpu_to_le32(hdr->checksum); } -static void __decode_table_header_from_buff(struct amdgpu_ras_eeprom_table_header *hdr, - unsigned char *buff) +static void +__decode_table_header_from_buf(struct amdgpu_ras_eeprom_table_header *hdr, + unsigned char *buf) { - uint32_t *pp = (uint32_t *)buff; + u32 *pp = (uint32_t *)buf; hdr->header = le32_to_cpu(pp[0]); hdr->version = le32_to_cpu(pp[1]); @@ -147,303 +186,168 @@ static void __decode_table_header_from_buff(struct amdgpu_ras_eeprom_table_heade hdr->checksum = le32_to_cpu(pp[4]); } -static int __update_table_header(struct amdgpu_ras_eeprom_control *control, - unsigned char *buff) +static int __write_table_header(struct amdgpu_ras_eeprom_control *control) { - int ret = 0; + u8 buf[RAS_TABLE_HEADER_SIZE]; struct amdgpu_device *adev = to_amdgpu_device(control); - struct i2c_msg msg = { - .addr = 0, - .flags = 0, - .len = EEPROM_ADDRESS_SIZE + EEPROM_TABLE_HEADER_SIZE, - .buf = buff, - }; - + int res; - *(uint16_t *)buff = EEPROM_HDR_START; - __encode_table_header_to_buff(&control->tbl_hdr, buff + EEPROM_ADDRESS_SIZE); - - msg.addr = control->i2c_address; + memset(buf, 0, sizeof(buf)); + __encode_table_header_to_buf(&control->tbl_hdr, buf); /* i2c may be unstable in gpu reset */ down_read(&adev->reset_sem); - ret = i2c_transfer(&adev->pm.smu_i2c, &msg, 1); + res = amdgpu_eeprom_write(&adev->pm.smu_i2c, + control->i2c_address + + control->ras_header_offset, + buf, RAS_TABLE_HEADER_SIZE); up_read(&adev->reset_sem); - if (ret < 1) - DRM_ERROR("Failed to write EEPROM table header, ret:%d", ret); - - return ret; -} - -static uint32_t __calc_hdr_byte_sum(struct amdgpu_ras_eeprom_control *control) -{ - int i; - uint32_t tbl_sum = 0; - - /* Header checksum, skip checksum field in the calculation */ - for (i = 0; i < sizeof(control->tbl_hdr) - sizeof(control->tbl_hdr.checksum); i++) - tbl_sum += *(((unsigned char *)&control->tbl_hdr) + i); - - return tbl_sum; -} - -static uint32_t __calc_recs_byte_sum(struct eeprom_table_record *records, - int num) -{ - int i, j; - uint32_t tbl_sum = 0; - - /* Records checksum */ - for (i = 0; i < num; i++) { - struct eeprom_table_record *record = &records[i]; - - for (j = 0; j < sizeof(*record); j++) { - tbl_sum += *(((unsigned char *)record) + j); - } + if (res < 0) { + DRM_ERROR("Failed to write EEPROM table header:%d", res); + } else if (res < RAS_TABLE_HEADER_SIZE) { + DRM_ERROR("Short write:%d out of %d\n", + res, RAS_TABLE_HEADER_SIZE); + res = -EIO; + } else { + res = 0; } - return tbl_sum; -} - -static inline uint32_t __calc_tbl_byte_sum(struct amdgpu_ras_eeprom_control *control, - struct eeprom_table_record *records, int num) -{ - return __calc_hdr_byte_sum(control) + __calc_recs_byte_sum(records, num); -} - -/* Checksum = 256 -((sum of all table entries) mod 256) */ -static void __update_tbl_checksum(struct amdgpu_ras_eeprom_control *control, - struct eeprom_table_record *records, int num, - uint32_t old_hdr_byte_sum) -{ - /* - * This will update the table sum with new records. - * - * TODO: What happens when the EEPROM table is to be wrapped around - * and old records from start will get overridden. - */ - - /* need to recalculate updated header byte sum */ - control->tbl_byte_sum -= old_hdr_byte_sum; - control->tbl_byte_sum += __calc_tbl_byte_sum(control, records, num); - - control->tbl_hdr.checksum = 256 - (control->tbl_byte_sum % 256); + return res; } -/* table sum mod 256 + checksum must equals 256 */ -static bool __validate_tbl_checksum(struct amdgpu_ras_eeprom_control *control, - struct eeprom_table_record *records, int num) +static u8 __calc_hdr_byte_sum(const struct amdgpu_ras_eeprom_control *control) { - control->tbl_byte_sum = __calc_tbl_byte_sum(control, records, num); + int ii; + u8 *pp, csum; + size_t sz; - if (control->tbl_hdr.checksum + (control->tbl_byte_sum % 256) != 256) { - DRM_WARN("Checksum mismatch, checksum: %u ", control->tbl_hdr.checksum); - return false; - } + /* Header checksum, skip checksum field in the calculation */ + sz = sizeof(control->tbl_hdr) - sizeof(control->tbl_hdr.checksum); + pp = (u8 *) &control->tbl_hdr; + csum = 0; + for (ii = 0; ii < sz; ii++, pp++) + csum += *pp; - return true; + return csum; } static int amdgpu_ras_eeprom_correct_header_tag( - struct amdgpu_ras_eeprom_control *control, - uint32_t header) + struct amdgpu_ras_eeprom_control *control, + uint32_t header) { - unsigned char buff[EEPROM_ADDRESS_SIZE + EEPROM_TABLE_HEADER_SIZE]; struct amdgpu_ras_eeprom_table_header *hdr = &control->tbl_hdr; - int ret = 0; - - memset(buff, 0, EEPROM_ADDRESS_SIZE + EEPROM_TABLE_HEADER_SIZE); - - mutex_lock(&control->tbl_mutex); + u8 *hh; + int res; + u8 csum; + + csum = -hdr->checksum; + + hh = (void *) &hdr->header; + csum -= (hh[0] + hh[1] + hh[2] + hh[3]); + hh = (void *) &header; + csum += hh[0] + hh[1] + hh[2] + hh[3]; + csum = -csum; + mutex_lock(&control->ras_tbl_mutex); hdr->header = header; - ret = __update_table_header(control, buff); - mutex_unlock(&control->tbl_mutex); + hdr->checksum = csum; + res = __write_table_header(control); + mutex_unlock(&control->ras_tbl_mutex); - return ret; + return res; } +/** + * amdgpu_ras_eeprom_reset_table -- Reset the RAS EEPROM table + * @control: pointer to control structure + * + * Reset the contents of the header of the RAS EEPROM table. + * Return 0 on success, -errno on error. + */ int amdgpu_ras_eeprom_reset_table(struct amdgpu_ras_eeprom_control *control) { - unsigned char buff[EEPROM_ADDRESS_SIZE + EEPROM_TABLE_HEADER_SIZE] = { 0 }; struct amdgpu_ras_eeprom_table_header *hdr = &control->tbl_hdr; - int ret = 0; - - mutex_lock(&control->tbl_mutex); - - hdr->header = EEPROM_TABLE_HDR_VAL; - hdr->version = EEPROM_TABLE_VER; - hdr->first_rec_offset = EEPROM_RECORD_START; - hdr->tbl_size = EEPROM_TABLE_HEADER_SIZE; - - control->tbl_byte_sum = 0; - __update_tbl_checksum(control, NULL, 0, 0); - control->next_addr = EEPROM_RECORD_START; - - ret = __update_table_header(control, buff); - - mutex_unlock(&control->tbl_mutex); - - return ret; - -} - -int amdgpu_ras_eeprom_init(struct amdgpu_ras_eeprom_control *control, - bool *exceed_err_limit) -{ - int ret = 0; - struct amdgpu_device *adev = to_amdgpu_device(control); - unsigned char buff[EEPROM_ADDRESS_SIZE + EEPROM_TABLE_HEADER_SIZE] = { 0 }; - struct amdgpu_ras_eeprom_table_header *hdr = &control->tbl_hdr; - struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); - struct i2c_msg msg = { - .addr = 0, - .flags = I2C_M_RD, - .len = EEPROM_ADDRESS_SIZE + EEPROM_TABLE_HEADER_SIZE, - .buf = buff, - }; - - *exceed_err_limit = false; - - if (!__is_ras_eeprom_supported(adev)) - return 0; - - /* Verify i2c adapter is initialized */ - if (!adev->pm.smu_i2c.algo) - return -ENOENT; + u8 csum; + int res; - if (!__get_eeprom_i2c_addr(adev, &control->i2c_address)) - return -EINVAL; + mutex_lock(&control->ras_tbl_mutex); - mutex_init(&control->tbl_mutex); + hdr->header = RAS_TABLE_HDR_VAL; + hdr->version = RAS_TABLE_VER; + hdr->first_rec_offset = RAS_RECORD_START; + hdr->tbl_size = RAS_TABLE_HEADER_SIZE; - msg.addr = control->i2c_address; - /* Read/Create table header from EEPROM address 0 */ - ret = i2c_transfer(&adev->pm.smu_i2c, &msg, 1); - if (ret < 1) { - DRM_ERROR("Failed to read EEPROM table header, ret:%d", ret); - return ret; - } + csum = __calc_hdr_byte_sum(control); + csum = -csum; + hdr->checksum = csum; + res = __write_table_header(control); - __decode_table_header_from_buff(hdr, &buff[2]); + control->ras_num_recs = 0; + control->ras_fri = 0; - if (hdr->header == EEPROM_TABLE_HDR_VAL) { - control->num_recs = (hdr->tbl_size - EEPROM_TABLE_HEADER_SIZE) / - EEPROM_TABLE_RECORD_SIZE; - control->tbl_byte_sum = __calc_hdr_byte_sum(control); - control->next_addr = EEPROM_RECORD_START; + amdgpu_ras_debugfs_set_ret_size(control); - DRM_DEBUG_DRIVER("Found existing EEPROM table with %d records", - control->num_recs); - - } else if ((hdr->header == EEPROM_TABLE_HDR_BAD) && - (amdgpu_bad_page_threshold != 0)) { - if (ras->bad_page_cnt_threshold > control->num_recs) { - dev_info(adev->dev, "Using one valid bigger bad page " - "threshold and correcting eeprom header tag.\n"); - ret = amdgpu_ras_eeprom_correct_header_tag(control, - EEPROM_TABLE_HDR_VAL); - } else { - *exceed_err_limit = true; - dev_err(adev->dev, "Exceeding the bad_page_threshold parameter, " - "disabling the GPU.\n"); - } - } else { - DRM_INFO("Creating new EEPROM table"); + mutex_unlock(&control->ras_tbl_mutex); - ret = amdgpu_ras_eeprom_reset_table(control); - } - - return ret == 1 ? 0 : -EIO; + return res; } -static void __encode_table_record_to_buff(struct amdgpu_ras_eeprom_control *control, - struct eeprom_table_record *record, - unsigned char *buff) +static void +__encode_table_record_to_buf(struct amdgpu_ras_eeprom_control *control, + struct eeprom_table_record *record, + unsigned char *buf) { __le64 tmp = 0; int i = 0; /* Next are all record fields according to EEPROM page spec in LE foramt */ - buff[i++] = record->err_type; + buf[i++] = record->err_type; - buff[i++] = record->bank; + buf[i++] = record->bank; tmp = cpu_to_le64(record->ts); - memcpy(buff + i, &tmp, 8); + memcpy(buf + i, &tmp, 8); i += 8; tmp = cpu_to_le64((record->offset & 0xffffffffffff)); - memcpy(buff + i, &tmp, 6); + memcpy(buf + i, &tmp, 6); i += 6; - buff[i++] = record->mem_channel; - buff[i++] = record->mcumc_id; + buf[i++] = record->mem_channel; + buf[i++] = record->mcumc_id; tmp = cpu_to_le64((record->retired_page & 0xffffffffffff)); - memcpy(buff + i, &tmp, 6); + memcpy(buf + i, &tmp, 6); } -static void __decode_table_record_from_buff(struct amdgpu_ras_eeprom_control *control, - struct eeprom_table_record *record, - unsigned char *buff) +static void +__decode_table_record_from_buf(struct amdgpu_ras_eeprom_control *control, + struct eeprom_table_record *record, + unsigned char *buf) { __le64 tmp = 0; int i = 0; /* Next are all record fields according to EEPROM page spec in LE foramt */ - record->err_type = buff[i++]; + record->err_type = buf[i++]; - record->bank = buff[i++]; + record->bank = buf[i++]; - memcpy(&tmp, buff + i, 8); + memcpy(&tmp, buf + i, 8); record->ts = le64_to_cpu(tmp); i += 8; - memcpy(&tmp, buff + i, 6); + memcpy(&tmp, buf + i, 6); record->offset = (le64_to_cpu(tmp) & 0xffffffffffff); i += 6; - record->mem_channel = buff[i++]; - record->mcumc_id = buff[i++]; + record->mem_channel = buf[i++]; + record->mcumc_id = buf[i++]; - memcpy(&tmp, buff + i, 6); + memcpy(&tmp, buf + i, 6); record->retired_page = (le64_to_cpu(tmp) & 0xffffffffffff); } -/* - * When reaching end of EEPROM memory jump back to 0 record address - * When next record access will go beyond EEPROM page boundary modify bits A17/A8 - * in I2C selector to go to next page - */ -static uint32_t __correct_eeprom_dest_address(uint32_t curr_address) -{ - uint32_t next_address = curr_address + EEPROM_TABLE_RECORD_SIZE; - - /* When all EEPROM memory used jump back to 0 address */ - if (next_address > EEPROM_SIZE_BYTES) { - DRM_INFO("Reached end of EEPROM memory, jumping to 0 " - "and overriding old record"); - return EEPROM_RECORD_START; - } - - /* - * To check if we overflow page boundary compare next address with - * current and see if bits 17/8 of the EEPROM address will change - * If they do start from the next 256b page - * - * https://www.st.com/resource/en/datasheet/m24m02-dr.pdf sec. 5.1.2 - */ - if ((curr_address & EEPROM_ADDR_MSB_MASK) != (next_address & EEPROM_ADDR_MSB_MASK)) { - DRM_DEBUG_DRIVER("Reached end of EEPROM memory page, jumping to next: %lx", - (next_address & EEPROM_ADDR_MSB_MASK)); - - return (next_address & EEPROM_ADDR_MSB_MASK); - } - - return curr_address; -} - bool amdgpu_ras_eeprom_check_err_threshold(struct amdgpu_device *adev) { struct amdgpu_ras *con = amdgpu_ras_get_context(adev); @@ -458,197 +362,756 @@ bool amdgpu_ras_eeprom_check_err_threshold(struct amdgpu_device *adev) if (!(con->features & BIT(AMDGPU_RAS_BLOCK__UMC))) return false; - if (con->eeprom_control.tbl_hdr.header == EEPROM_TABLE_HDR_BAD) { + if (con->eeprom_control.tbl_hdr.header == RAS_TABLE_HDR_BAD) { dev_warn(adev->dev, "This GPU is in BAD status."); - dev_warn(adev->dev, "Please retire it or setting one bigger " - "threshold value when reloading driver.\n"); + dev_warn(adev->dev, "Please retire it or set a larger " + "threshold value when reloading driver.\n"); return true; } return false; } -int amdgpu_ras_eeprom_process_recods(struct amdgpu_ras_eeprom_control *control, - struct eeprom_table_record *records, - bool write, - int num) +/** + * __amdgpu_ras_eeprom_write -- write indexed from buffer to EEPROM + * @control: pointer to control structure + * @buf: pointer to buffer containing data to write + * @fri: start writing at this index + * @num: number of records to write + * + * The caller must hold the table mutex in @control. + * Return 0 on success, -errno otherwise. + */ +static int __amdgpu_ras_eeprom_write(struct amdgpu_ras_eeprom_control *control, + u8 *buf, const u32 fri, const u32 num) { - int i, ret = 0; - struct i2c_msg *msgs, *msg; - unsigned char *buffs, *buff; - struct eeprom_table_record *record; struct amdgpu_device *adev = to_amdgpu_device(control); - struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); + u32 buf_size; + int res; - if (!__is_ras_eeprom_supported(adev)) - return 0; + /* i2c may be unstable in gpu reset */ + down_read(&adev->reset_sem); + buf_size = num * RAS_TABLE_RECORD_SIZE; + res = amdgpu_eeprom_write(&adev->pm.smu_i2c, + control->i2c_address + + RAS_INDEX_TO_OFFSET(control, fri), + buf, buf_size); + up_read(&adev->reset_sem); + if (res < 0) { + DRM_ERROR("Writing %d EEPROM table records error:%d", + num, res); + } else if (res < buf_size) { + /* Short write, return error. + */ + DRM_ERROR("Wrote %d records out of %d", + res / RAS_TABLE_RECORD_SIZE, num); + res = -EIO; + } else { + res = 0; + } - buffs = kcalloc(num, EEPROM_ADDRESS_SIZE + EEPROM_TABLE_RECORD_SIZE, - GFP_KERNEL); - if (!buffs) - return -ENOMEM; + return res; +} - mutex_lock(&control->tbl_mutex); +static int +amdgpu_ras_eeprom_append_table(struct amdgpu_ras_eeprom_control *control, + struct eeprom_table_record *record, + const u32 num) +{ + u32 a, b, i; + u8 *buf, *pp; + int res; + + buf = kcalloc(num, RAS_TABLE_RECORD_SIZE, GFP_KERNEL); + if (!buf) + return -ENOMEM; - msgs = kcalloc(num, sizeof(*msgs), GFP_KERNEL); - if (!msgs) { - ret = -ENOMEM; - goto free_buff; + /* Encode all of them in one go. + */ + pp = buf; + for (i = 0; i < num; i++, pp += RAS_TABLE_RECORD_SIZE) + __encode_table_record_to_buf(control, &record[i], pp); + + /* a, first record index to write into. + * b, last record index to write into. + * a = first index to read (fri) + number of records in the table, + * b = a + @num - 1. + * Let N = control->ras_max_num_record_count, then we have, + * case 0: 0 <= a <= b < N, + * just append @num records starting at a; + * case 1: 0 <= a < N <= b, + * append (N - a) records starting at a, and + * append the remainder, b % N + 1, starting at 0. + * case 2: 0 <= fri < N <= a <= b, then modulo N we get two subcases, + * case 2a: 0 <= a <= b < N + * append num records starting at a; and fix fri if b overwrote it, + * and since a <= b, if b overwrote it then a must've also, + * and if b didn't overwrite it, then a didn't also. + * case 2b: 0 <= b < a < N + * write num records starting at a, which wraps around 0=N + * and overwrite fri unconditionally. Now from case 2a, + * this means that b eclipsed fri to overwrite it and wrap + * around 0 again, i.e. b = 2N+r pre modulo N, so we unconditionally + * set fri = b + 1 (mod N). + * Now, since fri is updated in every case, except the trivial case 0, + * the number of records present in the table after writing, is, + * num_recs - 1 = b - fri (mod N), and we take the positive value, + * by adding an arbitrary multiple of N before taking the modulo N + * as shown below. + */ + a = control->ras_fri + control->ras_num_recs; + b = a + num - 1; + if (b < control->ras_max_record_count) { + res = __amdgpu_ras_eeprom_write(control, buf, a, num); + } else if (a < control->ras_max_record_count) { + u32 g0, g1; + + g0 = control->ras_max_record_count - a; + g1 = b % control->ras_max_record_count + 1; + res = __amdgpu_ras_eeprom_write(control, buf, a, g0); + if (res) + goto Out; + res = __amdgpu_ras_eeprom_write(control, + buf + g0 * RAS_TABLE_RECORD_SIZE, + 0, g1); + if (res) + goto Out; + if (g1 > control->ras_fri) + control->ras_fri = g1 % control->ras_max_record_count; + } else { + a %= control->ras_max_record_count; + b %= control->ras_max_record_count; + + if (a <= b) { + /* Note that, b - a + 1 = num. */ + res = __amdgpu_ras_eeprom_write(control, buf, a, num); + if (res) + goto Out; + if (b >= control->ras_fri) + control->ras_fri = (b + 1) % control->ras_max_record_count; + } else { + u32 g0, g1; + + /* b < a, which means, we write from + * a to the end of the table, and from + * the start of the table to b. + */ + g0 = control->ras_max_record_count - a; + g1 = b + 1; + res = __amdgpu_ras_eeprom_write(control, buf, a, g0); + if (res) + goto Out; + res = __amdgpu_ras_eeprom_write(control, + buf + g0 * RAS_TABLE_RECORD_SIZE, + 0, g1); + if (res) + goto Out; + control->ras_fri = g1 % control->ras_max_record_count; + } } + control->ras_num_recs = 1 + (control->ras_max_record_count + b + - control->ras_fri) + % control->ras_max_record_count; +Out: + kfree(buf); + return res; +} - /* - * If saved bad pages number exceeds the bad page threshold for - * the whole VRAM, update table header to mark the BAD GPU tag - * and schedule one ras recovery after eeprom write is done, - * this can avoid the missing for latest records. - * - * This new header will be picked up and checked in the bootup - * by ras recovery, which may break bootup process to notify - * user this GPU is in bad state and to retire such GPU for - * further check. +static int +amdgpu_ras_eeprom_update_header(struct amdgpu_ras_eeprom_control *control) +{ + struct amdgpu_device *adev = to_amdgpu_device(control); + struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); + u8 *buf, *pp, csum; + u32 buf_size; + int res; + + /* Modify the header if it exceeds. */ - if (write && (amdgpu_bad_page_threshold != 0) && - ((control->num_recs + num) >= ras->bad_page_cnt_threshold)) { + if (amdgpu_bad_page_threshold != 0 && + control->ras_num_recs >= ras->bad_page_cnt_threshold) { dev_warn(adev->dev, - "Saved bad pages(%d) reaches threshold value(%d).\n", - control->num_recs + num, ras->bad_page_cnt_threshold); - control->tbl_hdr.header = EEPROM_TABLE_HDR_BAD; + "Saved bad pages %d reaches threshold value %d\n", + control->ras_num_recs, ras->bad_page_cnt_threshold); + control->tbl_hdr.header = RAS_TABLE_HDR_BAD; + } + + control->tbl_hdr.version = RAS_TABLE_VER; + control->tbl_hdr.first_rec_offset = RAS_INDEX_TO_OFFSET(control, control->ras_fri); + control->tbl_hdr.tbl_size = RAS_TABLE_HEADER_SIZE + control->ras_num_recs * RAS_TABLE_RECORD_SIZE; + control->tbl_hdr.checksum = 0; + + buf_size = control->ras_num_recs * RAS_TABLE_RECORD_SIZE; + buf = kcalloc(control->ras_num_recs, RAS_TABLE_RECORD_SIZE, GFP_KERNEL); + if (!buf) { + DRM_ERROR("allocating memory for table of size %d bytes failed\n", + control->tbl_hdr.tbl_size); + res = -ENOMEM; + goto Out; } - /* In case of overflow just start from beginning to not lose newest records */ - if (write && (control->next_addr + EEPROM_TABLE_RECORD_SIZE * num > EEPROM_SIZE_BYTES)) - control->next_addr = EEPROM_RECORD_START; + down_read(&adev->reset_sem); + res = amdgpu_eeprom_read(&adev->pm.smu_i2c, + control->i2c_address + + control->ras_record_offset, + buf, buf_size); + up_read(&adev->reset_sem); + if (res < 0) { + DRM_ERROR("EEPROM failed reading records:%d\n", + res); + goto Out; + } else if (res < buf_size) { + DRM_ERROR("EEPROM read %d out of %d bytes\n", + res, buf_size); + res = -EIO; + goto Out; + } - /* - * TODO Currently makes EEPROM writes for each record, this creates - * internal fragmentation. Optimized the code to do full page write of - * 256b + /* Recalc the checksum. */ - for (i = 0; i < num; i++) { - buff = &buffs[i * (EEPROM_ADDRESS_SIZE + EEPROM_TABLE_RECORD_SIZE)]; - record = &records[i]; - msg = &msgs[i]; + csum = 0; + for (pp = buf; pp < buf + buf_size; pp++) + csum += *pp; + + csum += __calc_hdr_byte_sum(control); + /* avoid sign extension when assigning to "checksum" */ + csum = -csum; + control->tbl_hdr.checksum = csum; + res = __write_table_header(control); +Out: + kfree(buf); + return res; +} - control->next_addr = __correct_eeprom_dest_address(control->next_addr); +/** + * amdgpu_ras_eeprom_append -- append records to the EEPROM RAS table + * @control: pointer to control structure + * @record: array of records to append + * @num: number of records in @record array + * + * Append @num records to the table, calculate the checksum and write + * the table back to EEPROM. The maximum number of records that + * can be appended is between 1 and control->ras_max_record_count, + * regardless of how many records are already stored in the table. + * + * Return 0 on success or if EEPROM is not supported, -errno on error. + */ +int amdgpu_ras_eeprom_append(struct amdgpu_ras_eeprom_control *control, + struct eeprom_table_record *record, + const u32 num) +{ + struct amdgpu_device *adev = to_amdgpu_device(control); + int res; - /* - * Update bits 16,17 of EEPROM address in I2C address by setting them - * to bits 1,2 of Device address byte - */ - msg->addr = control->i2c_address | - ((control->next_addr & EEPROM_ADDR_MSB_MASK) >> 15); - msg->flags = write ? 0 : I2C_M_RD; - msg->len = EEPROM_ADDRESS_SIZE + EEPROM_TABLE_RECORD_SIZE; - msg->buf = buff; - - /* Insert the EEPROM dest addess, bits 0-15 */ - buff[0] = ((control->next_addr >> 8) & 0xff); - buff[1] = (control->next_addr & 0xff); - - /* EEPROM table content is stored in LE format */ - if (write) - __encode_table_record_to_buff(control, record, buff + EEPROM_ADDRESS_SIZE); - - /* - * The destination EEPROM address might need to be corrected to account - * for page or entire memory wrapping - */ - control->next_addr += EEPROM_TABLE_RECORD_SIZE; + if (!__is_ras_eeprom_supported(adev)) + return 0; + + if (num == 0) { + DRM_ERROR("will not append 0 records\n"); + return -EINVAL; + } else if (num > control->ras_max_record_count) { + DRM_ERROR("cannot append %d records than the size of table %d\n", + num, control->ras_max_record_count); + return -EINVAL; } + mutex_lock(&control->ras_tbl_mutex); + + res = amdgpu_ras_eeprom_append_table(control, record, num); + if (!res) + res = amdgpu_ras_eeprom_update_header(control); + if (!res) + amdgpu_ras_debugfs_set_ret_size(control); + + mutex_unlock(&control->ras_tbl_mutex); + return res; +} + +/** + * __amdgpu_ras_eeprom_read -- read indexed from EEPROM into buffer + * @control: pointer to control structure + * @buf: pointer to buffer to read into + * @fri: first record index, start reading at this index, absolute index + * @num: number of records to read + * + * The caller must hold the table mutex in @control. + * Return 0 on success, -errno otherwise. + */ +static int __amdgpu_ras_eeprom_read(struct amdgpu_ras_eeprom_control *control, + u8 *buf, const u32 fri, const u32 num) +{ + struct amdgpu_device *adev = to_amdgpu_device(control); + u32 buf_size; + int res; + /* i2c may be unstable in gpu reset */ down_read(&adev->reset_sem); - ret = i2c_transfer(&adev->pm.smu_i2c, msgs, num); + buf_size = num * RAS_TABLE_RECORD_SIZE; + res = amdgpu_eeprom_read(&adev->pm.smu_i2c, + control->i2c_address + + RAS_INDEX_TO_OFFSET(control, fri), + buf, buf_size); up_read(&adev->reset_sem); + if (res < 0) { + DRM_ERROR("Reading %d EEPROM table records error:%d", + num, res); + } else if (res < buf_size) { + /* Short read, return error. + */ + DRM_ERROR("Read %d records out of %d", + res / RAS_TABLE_RECORD_SIZE, num); + res = -EIO; + } else { + res = 0; + } + + return res; +} + +/** + * amdgpu_ras_eeprom_read -- read EEPROM + * @control: pointer to control structure + * @record: array of records to read into + * @num: number of records in @record + * + * Reads num records from the RAS table in EEPROM and + * writes the data into @record array. + * + * Returns 0 on success, -errno on error. + */ +int amdgpu_ras_eeprom_read(struct amdgpu_ras_eeprom_control *control, + struct eeprom_table_record *record, + const u32 num) +{ + struct amdgpu_device *adev = to_amdgpu_device(control); + int i, res; + u8 *buf, *pp; + u32 g0, g1; - if (ret < 1) { - DRM_ERROR("Failed to process EEPROM table records, ret:%d", ret); + if (!__is_ras_eeprom_supported(adev)) + return 0; - /* TODO Restore prev next EEPROM address ? */ - goto free_msgs; + if (num == 0) { + DRM_ERROR("will not read 0 records\n"); + return -EINVAL; + } else if (num > control->ras_num_recs) { + DRM_ERROR("too many records to read:%d available:%d\n", + num, control->ras_num_recs); + return -EINVAL; } + buf = kcalloc(num, RAS_TABLE_RECORD_SIZE, GFP_KERNEL); + if (!buf) + return -ENOMEM; - if (!write) { - for (i = 0; i < num; i++) { - buff = &buffs[i*(EEPROM_ADDRESS_SIZE + EEPROM_TABLE_RECORD_SIZE)]; - record = &records[i]; + /* Determine how many records to read, from the first record + * index, fri, to the end of the table, and from the beginning + * of the table, such that the total number of records is + * @num, and we handle wrap around when fri > 0 and + * fri + num > RAS_MAX_RECORD_COUNT. + * + * First we compute the index of the last element + * which would be fetched from each region, + * g0 is in [fri, fri + num - 1], and + * g1 is in [0, RAS_MAX_RECORD_COUNT - 1]. + * Then, if g0 < RAS_MAX_RECORD_COUNT, the index of + * the last element to fetch, we set g0 to _the number_ + * of elements to fetch, @num, since we know that the last + * indexed to be fetched does not exceed the table. + * + * If, however, g0 >= RAS_MAX_RECORD_COUNT, then + * we set g0 to the number of elements to read + * until the end of the table, and g1 to the number of + * elements to read from the beginning of the table. + */ + g0 = control->ras_fri + num - 1; + g1 = g0 % control->ras_max_record_count; + if (g0 < control->ras_max_record_count) { + g0 = num; + g1 = 0; + } else { + g0 = control->ras_max_record_count - control->ras_fri; + g1 += 1; + } - __decode_table_record_from_buff(control, record, buff + EEPROM_ADDRESS_SIZE); - } + mutex_lock(&control->ras_tbl_mutex); + res = __amdgpu_ras_eeprom_read(control, buf, control->ras_fri, g0); + if (res) + goto Out; + if (g1) { + res = __amdgpu_ras_eeprom_read(control, + buf + g0 * RAS_TABLE_RECORD_SIZE, + 0, g1); + if (res) + goto Out; } - if (write) { - uint32_t old_hdr_byte_sum = __calc_hdr_byte_sum(control); + res = 0; + + /* Read up everything? Then transform. + */ + pp = buf; + for (i = 0; i < num; i++, pp += RAS_TABLE_RECORD_SIZE) + __decode_table_record_from_buf(control, &record[i], pp); +Out: + kfree(buf); + mutex_unlock(&control->ras_tbl_mutex); + + return res; +} + +inline uint32_t amdgpu_ras_eeprom_max_record_count(void) +{ + return RAS_MAX_RECORD_COUNT; +} + +static ssize_t +amdgpu_ras_debugfs_eeprom_size_read(struct file *f, char __user *buf, + size_t size, loff_t *pos) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)file_inode(f)->i_private; + struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); + struct amdgpu_ras_eeprom_control *control = ras ? &ras->eeprom_control : NULL; + u8 data[50]; + int res; + + if (!size) + return size; + + if (!ras || !control) { + res = snprintf(data, sizeof(data), "Not supported\n"); + } else { + res = snprintf(data, sizeof(data), "%d bytes or %d records\n", + RAS_TBL_SIZE_BYTES, control->ras_max_record_count); + } + + if (*pos >= res) + return 0; + + res -= *pos; + res = min_t(size_t, res, size); + + if (copy_to_user(buf, &data[*pos], res)) + return -EFAULT; + + *pos += res; - /* - * Update table header with size and CRC and account for table - * wrap around where the assumption is that we treat it as empty - * table - * - * TODO - Check the assumption is correct + return res; +} + +const struct file_operations amdgpu_ras_debugfs_eeprom_size_ops = { + .owner = THIS_MODULE, + .read = amdgpu_ras_debugfs_eeprom_size_read, + .write = NULL, + .llseek = default_llseek, +}; + +static const char *tbl_hdr_str = " Signature Version FirstOffs Size Checksum\n"; +static const char *tbl_hdr_fmt = "0x%08X 0x%08X 0x%08X 0x%08X 0x%08X\n"; +#define tbl_hdr_fmt_size (5 * (2+8) + 4 + 1) +static const char *rec_hdr_str = "Index Offset ErrType Bank/CU TimeStamp Offs/Addr MemChl MCUMCID RetiredPage\n"; +static const char *rec_hdr_fmt = "%5d 0x%05X %7s 0x%02X 0x%016llX 0x%012llX 0x%02X 0x%02X 0x%012llX\n"; +#define rec_hdr_fmt_size (5 + 1 + 7 + 1 + 7 + 1 + 7 + 1 + 18 + 1 + 14 + 1 + 6 + 1 + 7 + 1 + 14 + 1) + +static const char *record_err_type_str[AMDGPU_RAS_EEPROM_ERR_COUNT] = { + "ignore", + "re", + "ue", +}; + +static loff_t amdgpu_ras_debugfs_table_size(struct amdgpu_ras_eeprom_control *control) +{ + return strlen(tbl_hdr_str) + tbl_hdr_fmt_size + + strlen(rec_hdr_str) + rec_hdr_fmt_size * control->ras_num_recs; +} + +void amdgpu_ras_debugfs_set_ret_size(struct amdgpu_ras_eeprom_control *control) +{ + struct amdgpu_ras *ras = container_of(control, struct amdgpu_ras, + eeprom_control); + struct dentry *de = ras->de_ras_eeprom_table; + + if (de) + d_inode(de)->i_size = amdgpu_ras_debugfs_table_size(control); +} + +static ssize_t amdgpu_ras_debugfs_table_read(struct file *f, char __user *buf, + size_t size, loff_t *pos) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)file_inode(f)->i_private; + struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); + struct amdgpu_ras_eeprom_control *control = &ras->eeprom_control; + const size_t orig_size = size; + int res = -EFAULT; + size_t data_len; + + mutex_lock(&control->ras_tbl_mutex); + + /* We want *pos - data_len > 0, which means there's + * bytes to be printed from data. + */ + data_len = strlen(tbl_hdr_str); + if (*pos < data_len) { + data_len -= *pos; + data_len = min_t(size_t, data_len, size); + if (copy_to_user(buf, &tbl_hdr_str[*pos], data_len)) + goto Out; + buf += data_len; + size -= data_len; + *pos += data_len; + } + + data_len = strlen(tbl_hdr_str) + tbl_hdr_fmt_size; + if (*pos < data_len && size > 0) { + u8 data[tbl_hdr_fmt_size + 1]; + loff_t lpos; + + snprintf(data, sizeof(data), tbl_hdr_fmt, + control->tbl_hdr.header, + control->tbl_hdr.version, + control->tbl_hdr.first_rec_offset, + control->tbl_hdr.tbl_size, + control->tbl_hdr.checksum); + + data_len -= *pos; + data_len = min_t(size_t, data_len, size); + lpos = *pos - strlen(tbl_hdr_str); + if (copy_to_user(buf, &data[lpos], data_len)) + goto Out; + buf += data_len; + size -= data_len; + *pos += data_len; + } + + data_len = strlen(tbl_hdr_str) + tbl_hdr_fmt_size + strlen(rec_hdr_str); + if (*pos < data_len && size > 0) { + loff_t lpos; + + data_len -= *pos; + data_len = min_t(size_t, data_len, size); + lpos = *pos - strlen(tbl_hdr_str) - tbl_hdr_fmt_size; + if (copy_to_user(buf, &rec_hdr_str[lpos], data_len)) + goto Out; + buf += data_len; + size -= data_len; + *pos += data_len; + } + + data_len = amdgpu_ras_debugfs_table_size(control); + if (*pos < data_len && size > 0) { + u8 dare[RAS_TABLE_RECORD_SIZE]; + u8 data[rec_hdr_fmt_size + 1]; + struct eeprom_table_record record; + int s, r; + + /* Find the starting record index */ - control->num_recs += num; - control->num_recs %= EEPROM_MAX_RECORD_NUM; - control->tbl_hdr.tbl_size += EEPROM_TABLE_RECORD_SIZE * num; - if (control->tbl_hdr.tbl_size > EEPROM_SIZE_BYTES) - control->tbl_hdr.tbl_size = EEPROM_TABLE_HEADER_SIZE + - control->num_recs * EEPROM_TABLE_RECORD_SIZE; - - __update_tbl_checksum(control, records, num, old_hdr_byte_sum); - - __update_table_header(control, buffs); - } else if (!__validate_tbl_checksum(control, records, num)) { - DRM_WARN("EEPROM Table checksum mismatch!"); - /* TODO Uncomment when EEPROM read/write is relliable */ - /* ret = -EIO; */ + s = *pos - strlen(tbl_hdr_str) - tbl_hdr_fmt_size - + strlen(rec_hdr_str); + s = s / rec_hdr_fmt_size; + r = *pos - strlen(tbl_hdr_str) - tbl_hdr_fmt_size - + strlen(rec_hdr_str); + r = r % rec_hdr_fmt_size; + + for ( ; size > 0 && s < control->ras_num_recs; s++) { + u32 ai = RAS_RI_TO_AI(control, s); + /* Read a single record + */ + res = __amdgpu_ras_eeprom_read(control, dare, ai, 1); + if (res) + goto Out; + __decode_table_record_from_buf(control, &record, dare); + snprintf(data, sizeof(data), rec_hdr_fmt, + s, + RAS_INDEX_TO_OFFSET(control, ai), + record_err_type_str[record.err_type], + record.bank, + record.ts, + record.offset, + record.mem_channel, + record.mcumc_id, + record.retired_page); + + data_len = min_t(size_t, rec_hdr_fmt_size - r, size); + if (copy_to_user(buf, &data[r], data_len)) { + res = -EFAULT; + goto Out; + } + buf += data_len; + size -= data_len; + *pos += data_len; + r = 0; + } } + res = 0; +Out: + mutex_unlock(&control->ras_tbl_mutex); + return res < 0 ? res : orig_size - size; +} -free_msgs: - kfree(msgs); +static ssize_t +amdgpu_ras_debugfs_eeprom_table_read(struct file *f, char __user *buf, + size_t size, loff_t *pos) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)file_inode(f)->i_private; + struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); + struct amdgpu_ras_eeprom_control *control = ras ? &ras->eeprom_control : NULL; + u8 data[81]; + int res; + + if (!size) + return size; + + if (!ras || !control) { + res = snprintf(data, sizeof(data), "Not supported\n"); + if (*pos >= res) + return 0; + + res -= *pos; + res = min_t(size_t, res, size); -free_buff: - kfree(buffs); + if (copy_to_user(buf, &data[*pos], res)) + return -EFAULT; - mutex_unlock(&control->tbl_mutex); + *pos += res; - return ret == num ? 0 : -EIO; + return res; + } else { + return amdgpu_ras_debugfs_table_read(f, buf, size, pos); + } } -inline uint32_t amdgpu_ras_eeprom_get_record_max_length(void) +const struct file_operations amdgpu_ras_debugfs_eeprom_table_ops = { + .owner = THIS_MODULE, + .read = amdgpu_ras_debugfs_eeprom_table_read, + .write = NULL, + .llseek = default_llseek, +}; + +/** + * __verify_ras_table_checksum -- verify the RAS EEPROM table checksum + * @control: pointer to control structure + * + * Check the checksum of the stored in EEPROM RAS table. + * + * Return 0 if the checksum is correct, + * positive if it is not correct, and + * -errno on I/O error. + */ +static int __verify_ras_table_checksum(struct amdgpu_ras_eeprom_control *control) { - return EEPROM_MAX_RECORD_NUM; + struct amdgpu_device *adev = to_amdgpu_device(control); + int buf_size, res; + u8 csum, *buf, *pp; + + buf_size = RAS_TABLE_HEADER_SIZE + + control->ras_num_recs * RAS_TABLE_RECORD_SIZE; + buf = kzalloc(buf_size, GFP_KERNEL); + if (!buf) { + DRM_ERROR("Out of memory checking RAS table checksum.\n"); + return -ENOMEM; + } + + res = amdgpu_eeprom_read(&adev->pm.smu_i2c, + control->i2c_address + + control->ras_header_offset, + buf, buf_size); + if (res < buf_size) { + DRM_ERROR("Partial read for checksum, res:%d\n", res); + /* On partial reads, return -EIO. + */ + if (res >= 0) + res = -EIO; + goto Out; + } + + csum = 0; + for (pp = buf; pp < buf + buf_size; pp++) + csum += *pp; +Out: + kfree(buf); + return res < 0 ? res : csum; } -/* Used for testing if bugs encountered */ -#if 0 -void amdgpu_ras_eeprom_test(struct amdgpu_ras_eeprom_control *control) +int amdgpu_ras_eeprom_init(struct amdgpu_ras_eeprom_control *control, + bool *exceed_err_limit) { - int i; - struct eeprom_table_record *recs = kcalloc(1, sizeof(*recs), GFP_KERNEL); + struct amdgpu_device *adev = to_amdgpu_device(control); + unsigned char buf[RAS_TABLE_HEADER_SIZE] = { 0 }; + struct amdgpu_ras_eeprom_table_header *hdr = &control->tbl_hdr; + struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); + int res; + + *exceed_err_limit = false; + + if (!__is_ras_eeprom_supported(adev)) + return 0; - if (!recs) - return; + /* Verify i2c adapter is initialized */ + if (!adev->pm.smu_i2c.algo) + return -ENOENT; - for (i = 0; i < 1 ; i++) { - recs[i].address = 0xdeadbeef; - recs[i].retired_page = i; + if (!__get_eeprom_i2c_addr(adev, control)) + return -EINVAL; + + control->ras_header_offset = RAS_HDR_START; + control->ras_record_offset = RAS_RECORD_START; + control->ras_max_record_count = RAS_MAX_RECORD_COUNT; + mutex_init(&control->ras_tbl_mutex); + + /* Read the table header from EEPROM address */ + res = amdgpu_eeprom_read(&adev->pm.smu_i2c, + control->i2c_address + control->ras_header_offset, + buf, RAS_TABLE_HEADER_SIZE); + if (res < RAS_TABLE_HEADER_SIZE) { + DRM_ERROR("Failed to read EEPROM table header, res:%d", res); + return res >= 0 ? -EIO : res; } - if (!amdgpu_ras_eeprom_process_recods(control, recs, true, 1)) { + __decode_table_header_from_buf(hdr, buf); - memset(recs, 0, sizeof(*recs) * 1); + control->ras_num_recs = RAS_NUM_RECS(hdr); + control->ras_fri = RAS_OFFSET_TO_INDEX(control, hdr->first_rec_offset); - control->next_addr = EEPROM_RECORD_START; + if (hdr->header == RAS_TABLE_HDR_VAL) { + DRM_DEBUG_DRIVER("Found existing EEPROM table with %d records", + control->ras_num_recs); + res = __verify_ras_table_checksum(control); + if (res) + DRM_ERROR("RAS table incorrect checksum or error:%d\n", + res); + } else if (hdr->header == RAS_TABLE_HDR_BAD && + amdgpu_bad_page_threshold != 0) { + res = __verify_ras_table_checksum(control); + if (res) + DRM_ERROR("RAS Table incorrect checksum or error:%d\n", + res); + if (ras->bad_page_cnt_threshold > control->ras_num_recs) { + /* This means that, the threshold was increased since + * the last time the system was booted, and now, + * ras->bad_page_cnt_threshold - control->num_recs > 0, + * so that at least one more record can be saved, + * before the page count threshold is reached. + */ + dev_info(adev->dev, + "records:%d threshold:%d, resetting " + "RAS table header signature", + control->ras_num_recs, + ras->bad_page_cnt_threshold); + res = amdgpu_ras_eeprom_correct_header_tag(control, + RAS_TABLE_HDR_VAL); + } else { + *exceed_err_limit = true; + dev_err(adev->dev, + "RAS records:%d exceed threshold:%d, " + "maybe retire this GPU?", + control->ras_num_recs, ras->bad_page_cnt_threshold); + } + } else { + DRM_INFO("Creating a new EEPROM table"); - if (!amdgpu_ras_eeprom_process_recods(control, recs, false, 1)) { - for (i = 0; i < 1; i++) - DRM_INFO("rec.address :0x%llx, rec.retired_page :%llu", - recs[i].address, recs[i].retired_page); - } else - DRM_ERROR("Failed in reading from table"); + res = amdgpu_ras_eeprom_reset_table(control); + } - } else - DRM_ERROR("Failed in writing to table"); + return res < 0 ? res : 0; } -#endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h index 178721170974..f95fc61b3021 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h @@ -28,10 +28,11 @@ struct amdgpu_device; -enum amdgpu_ras_eeprom_err_type{ - AMDGPU_RAS_EEPROM_ERR_PLACE_HOLDER, +enum amdgpu_ras_eeprom_err_type { + AMDGPU_RAS_EEPROM_ERR_NA, AMDGPU_RAS_EEPROM_ERR_RECOVERABLE, - AMDGPU_RAS_EEPROM_ERR_NON_RECOVERABLE + AMDGPU_RAS_EEPROM_ERR_NON_RECOVERABLE, + AMDGPU_RAS_EEPROM_ERR_COUNT, }; struct amdgpu_ras_eeprom_table_header { @@ -40,15 +41,45 @@ struct amdgpu_ras_eeprom_table_header { uint32_t first_rec_offset; uint32_t tbl_size; uint32_t checksum; -}__attribute__((__packed__)); +} __packed; struct amdgpu_ras_eeprom_control { struct amdgpu_ras_eeprom_table_header tbl_hdr; - uint32_t next_addr; - unsigned int num_recs; - struct mutex tbl_mutex; - uint32_t tbl_byte_sum; - uint16_t i2c_address; // 8-bit represented address + + /* Base I2C EEPPROM 19-bit memory address, + * where the table is located. For more information, + * see top of amdgpu_eeprom.c. + */ + u32 i2c_address; + + /* The byte offset off of @i2c_address + * where the table header is found, + * and where the records start--always + * right after the header. + */ + u32 ras_header_offset; + u32 ras_record_offset; + + /* Number of records in the table. + */ + u32 ras_num_recs; + + /* First record index to read, 0-based. + * Range is [0, num_recs-1]. This is + * an absolute index, starting right after + * the table header. + */ + u32 ras_fri; + + /* Maximum possible number of records + * we could store, i.e. the maximum capacity + * of the table. + */ + u32 ras_max_record_count; + + /* Protect table access via this mutex. + */ + struct mutex ras_tbl_mutex; }; /* @@ -74,21 +105,26 @@ struct eeprom_table_record { unsigned char mem_channel; unsigned char mcumc_id; -}__attribute__((__packed__)); +} __packed; int amdgpu_ras_eeprom_init(struct amdgpu_ras_eeprom_control *control, - bool *exceed_err_limit); + bool *exceed_err_limit); + int amdgpu_ras_eeprom_reset_table(struct amdgpu_ras_eeprom_control *control); bool amdgpu_ras_eeprom_check_err_threshold(struct amdgpu_device *adev); -int amdgpu_ras_eeprom_process_recods(struct amdgpu_ras_eeprom_control *control, - struct eeprom_table_record *records, - bool write, - int num); +int amdgpu_ras_eeprom_read(struct amdgpu_ras_eeprom_control *control, + struct eeprom_table_record *records, const u32 num); + +int amdgpu_ras_eeprom_append(struct amdgpu_ras_eeprom_control *control, + struct eeprom_table_record *records, const u32 num); + +inline uint32_t amdgpu_ras_eeprom_max_record_count(void); -inline uint32_t amdgpu_ras_eeprom_get_record_max_length(void); +void amdgpu_ras_debugfs_set_ret_size(struct amdgpu_ras_eeprom_control *control); -void amdgpu_ras_eeprom_test(struct amdgpu_ras_eeprom_control *control); +extern const struct file_operations amdgpu_ras_debugfs_eeprom_size_ops; +extern const struct file_operations amdgpu_ras_debugfs_eeprom_table_ops; #endif // _AMDGPU_RAS_EEPROM_H diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h index e7d3d0dbdd96..e713d31619fe 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h @@ -48,6 +48,9 @@ #define AMDGPU_FENCE_FLAG_INT (1 << 1) #define AMDGPU_FENCE_FLAG_TC_WB_ONLY (1 << 2) +/* fence flag bit to indicate the face is embedded in job*/ +#define AMDGPU_FENCE_FLAG_EMBED_IN_JOB_BIT (DMA_FENCE_FLAG_USER_BITS + 1) + #define to_amdgpu_ring(s) container_of((s), struct amdgpu_ring, sched) #define AMDGPU_IB_POOL_SIZE (1024 * 1024) @@ -106,9 +109,6 @@ struct amdgpu_fence_driver { struct dma_fence **fences; }; -int amdgpu_fence_driver_init(struct amdgpu_device *adev); -void amdgpu_fence_driver_fini_hw(struct amdgpu_device *adev); -void amdgpu_fence_driver_fini_sw(struct amdgpu_device *adev); void amdgpu_fence_driver_force_completion(struct amdgpu_ring *ring); int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring, @@ -117,9 +117,11 @@ int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring, int amdgpu_fence_driver_start_ring(struct amdgpu_ring *ring, struct amdgpu_irq_src *irq_src, unsigned irq_type); -void amdgpu_fence_driver_suspend(struct amdgpu_device *adev); -void amdgpu_fence_driver_resume(struct amdgpu_device *adev); -int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **fence, +void amdgpu_fence_driver_hw_init(struct amdgpu_device *adev); +void amdgpu_fence_driver_hw_fini(struct amdgpu_device *adev); +int amdgpu_fence_driver_sw_init(struct amdgpu_device *adev); +void amdgpu_fence_driver_sw_fini(struct amdgpu_device *adev); +int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **fence, struct amdgpu_job *job, unsigned flags); int amdgpu_fence_emit_polling(struct amdgpu_ring *ring, uint32_t *s, uint32_t timeout); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h index 7a4775ab6804..00afd0dcae86 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h @@ -127,8 +127,8 @@ struct amdgpu_rlc_funcs { void (*reset)(struct amdgpu_device *adev); void (*start)(struct amdgpu_device *adev); void (*update_spm_vmid)(struct amdgpu_device *adev, unsigned vmid); - void (*rlcg_wreg)(struct amdgpu_device *adev, u32 offset, u32 v, u32 acc_flags, u32 hwip); - u32 (*rlcg_rreg)(struct amdgpu_device *adev, u32 offset, u32 acc_flags, u32 hwip); + void (*sriov_wreg)(struct amdgpu_device *adev, u32 offset, u32 v, u32 acc_flags, u32 hwip); + u32 (*sriov_rreg)(struct amdgpu_device *adev, u32 offset, u32 acc_flags, u32 hwip); bool (*is_rlcg_access_range)(struct amdgpu_device *adev, uint32_t reg); }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c index de91d29c9d96..65debb65a5df 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c @@ -105,7 +105,6 @@ int amdgpu_sdma_ras_late_init(struct amdgpu_device *adev, adev->sdma.ras_if->block = AMDGPU_RAS_BLOCK__SDMA; adev->sdma.ras_if->type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE; adev->sdma.ras_if->sub_block_index = 0; - strcpy(adev->sdma.ras_if->name, "sdma"); } fs_info.head = ih_info->head = *adev->sdma.ras_if; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_securedisplay.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_securedisplay.c index 123453999093..cc7597a15fe9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_securedisplay.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_securedisplay.c @@ -80,7 +80,7 @@ void psp_securedisplay_parse_resp_status(struct psp_context *psp, void psp_prep_securedisplay_cmd_buf(struct psp_context *psp, struct securedisplay_cmd **cmd, enum ta_securedisplay_command command_id) { - *cmd = (struct securedisplay_cmd *)psp->securedisplay_context.securedisplay_shared_buf; + *cmd = (struct securedisplay_cmd *)psp->securedisplay_context.context.mem_context.shared_buf; memset(*cmd, 0, sizeof(struct securedisplay_cmd)); (*cmd)->status = TA_SECUREDISPLAY_STATUS__GENERIC_FAILURE; (*cmd)->cmd_id = command_id; @@ -170,7 +170,7 @@ void amdgpu_securedisplay_debugfs_init(struct amdgpu_device *adev) { #if defined(CONFIG_DEBUG_FS) - if (!adev->psp.securedisplay_context.securedisplay_initialized) + if (!adev->psp.securedisplay_context.context.initialized) return; debugfs_create_file("securedisplay_test", S_IWUSR, adev_to_drm(adev)->primary->debugfs_root, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c index 1b2ceccaf5b0..862eb3c1c4c5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c @@ -28,6 +28,8 @@ * Christian König <christian.koenig@amd.com> */ +#include <linux/dma-fence-chain.h> + #include "amdgpu.h" #include "amdgpu_trace.h" #include "amdgpu_amdkfd.h" @@ -186,6 +188,55 @@ int amdgpu_sync_vm_fence(struct amdgpu_sync *sync, struct dma_fence *fence) return amdgpu_sync_fence(sync, fence); } +/* Determine based on the owner and mode if we should sync to a fence or not */ +static bool amdgpu_sync_test_fence(struct amdgpu_device *adev, + enum amdgpu_sync_mode mode, + void *owner, struct dma_fence *f) +{ + void *fence_owner = amdgpu_sync_get_owner(f); + + /* Always sync to moves, no matter what */ + if (fence_owner == AMDGPU_FENCE_OWNER_UNDEFINED) + return true; + + /* We only want to trigger KFD eviction fences on + * evict or move jobs. Skip KFD fences otherwise. + */ + if (fence_owner == AMDGPU_FENCE_OWNER_KFD && + owner != AMDGPU_FENCE_OWNER_UNDEFINED) + return false; + + /* Never sync to VM updates either. */ + if (fence_owner == AMDGPU_FENCE_OWNER_VM && + owner != AMDGPU_FENCE_OWNER_UNDEFINED) + return false; + + /* Ignore fences depending on the sync mode */ + switch (mode) { + case AMDGPU_SYNC_ALWAYS: + return true; + + case AMDGPU_SYNC_NE_OWNER: + if (amdgpu_sync_same_dev(adev, f) && + fence_owner == owner) + return false; + break; + + case AMDGPU_SYNC_EQ_OWNER: + if (amdgpu_sync_same_dev(adev, f) && + fence_owner != owner) + return false; + break; + + case AMDGPU_SYNC_EXPLICIT: + return false; + } + + WARN(debug_evictions && fence_owner == AMDGPU_FENCE_OWNER_KFD, + "Adding eviction fence to sync obj"); + return true; +} + /** * amdgpu_sync_resv - sync to a reservation object * @@ -211,67 +262,34 @@ int amdgpu_sync_resv(struct amdgpu_device *adev, struct amdgpu_sync *sync, /* always sync to the exclusive fence */ f = dma_resv_excl_fence(resv); - r = amdgpu_sync_fence(sync, f); + dma_fence_chain_for_each(f, f) { + struct dma_fence_chain *chain = to_dma_fence_chain(f); + + if (amdgpu_sync_test_fence(adev, mode, owner, chain ? + chain->fence : f)) { + r = amdgpu_sync_fence(sync, f); + dma_fence_put(f); + if (r) + return r; + break; + } + } flist = dma_resv_shared_list(resv); - if (!flist || r) - return r; + if (!flist) + return 0; for (i = 0; i < flist->shared_count; ++i) { - void *fence_owner; - f = rcu_dereference_protected(flist->shared[i], dma_resv_held(resv)); - fence_owner = amdgpu_sync_get_owner(f); - - /* Always sync to moves, no matter what */ - if (fence_owner == AMDGPU_FENCE_OWNER_UNDEFINED) { + if (amdgpu_sync_test_fence(adev, mode, owner, f)) { r = amdgpu_sync_fence(sync, f); if (r) - break; - } - - /* We only want to trigger KFD eviction fences on - * evict or move jobs. Skip KFD fences otherwise. - */ - if (fence_owner == AMDGPU_FENCE_OWNER_KFD && - owner != AMDGPU_FENCE_OWNER_UNDEFINED) - continue; - - /* Never sync to VM updates either. */ - if (fence_owner == AMDGPU_FENCE_OWNER_VM && - owner != AMDGPU_FENCE_OWNER_UNDEFINED) - continue; - - /* Ignore fences depending on the sync mode */ - switch (mode) { - case AMDGPU_SYNC_ALWAYS: - break; - - case AMDGPU_SYNC_NE_OWNER: - if (amdgpu_sync_same_dev(adev, f) && - fence_owner == owner) - continue; - break; - - case AMDGPU_SYNC_EQ_OWNER: - if (amdgpu_sync_same_dev(adev, f) && - fence_owner != owner) - continue; - break; - - case AMDGPU_SYNC_EXPLICIT: - continue; + return r; } - - WARN(debug_evictions && fence_owner == AMDGPU_FENCE_OWNER_KFD, - "Adding eviction fence to sync obj"); - r = amdgpu_sync_fence(sync, f); - if (r) - break; } - return r; + return 0; } /** diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 3a55f08e00e1..38dade421d46 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -149,14 +149,16 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo, * BOs to be evicted from VRAM */ amdgpu_bo_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_VRAM | - AMDGPU_GEM_DOMAIN_GTT); + AMDGPU_GEM_DOMAIN_GTT | + AMDGPU_GEM_DOMAIN_CPU); abo->placements[0].fpfn = adev->gmc.visible_vram_size >> PAGE_SHIFT; abo->placements[0].lpfn = 0; abo->placement.busy_placement = &abo->placements[1]; abo->placement.num_busy_placement = 1; } else { /* Move to GTT memory */ - amdgpu_bo_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_GTT); + amdgpu_bo_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_GTT | + AMDGPU_GEM_DOMAIN_CPU); } break; case TTM_PL_TT: @@ -521,7 +523,7 @@ static int amdgpu_bo_move(struct ttm_buffer_object *bo, bool evict, hop->fpfn = 0; hop->lpfn = 0; hop->mem_type = TTM_PL_TT; - hop->flags = 0; + hop->flags = TTM_PL_FLAG_TEMPORARY; return -EMULTIHOP; } @@ -1121,7 +1123,7 @@ static int amdgpu_ttm_tt_populate(struct ttm_device *bdev, struct amdgpu_ttm_tt *gtt = (void *)ttm; /* user pages are bound by amdgpu_ttm_tt_pin_userptr() */ - if (gtt && gtt->userptr) { + if (gtt->userptr) { ttm->sg = kzalloc(sizeof(struct sg_table), GFP_KERNEL); if (!ttm->sg) return -ENOMEM; @@ -1146,7 +1148,7 @@ static void amdgpu_ttm_tt_unpopulate(struct ttm_device *bdev, struct amdgpu_ttm_tt *gtt = (void *)ttm; struct amdgpu_device *adev; - if (gtt && gtt->userptr) { + if (gtt->userptr) { amdgpu_ttm_tt_set_user_pages(ttm, NULL); kfree(ttm->sg); ttm->sg = NULL; @@ -1394,6 +1396,41 @@ static bool amdgpu_ttm_bo_eviction_valuable(struct ttm_buffer_object *bo, return ttm_bo_eviction_valuable(bo, place); } +static void amdgpu_ttm_vram_mm_access(struct amdgpu_device *adev, loff_t pos, + void *buf, size_t size, bool write) +{ + while (size) { + uint64_t aligned_pos = ALIGN_DOWN(pos, 4); + uint64_t bytes = 4 - (pos & 0x3); + uint32_t shift = (pos & 0x3) * 8; + uint32_t mask = 0xffffffff << shift; + uint32_t value = 0; + + if (size < bytes) { + mask &= 0xffffffff >> (bytes - size) * 8; + bytes = size; + } + + if (mask != 0xffffffff) { + amdgpu_device_mm_access(adev, aligned_pos, &value, 4, false); + if (write) { + value &= ~mask; + value |= (*(uint32_t *)buf << shift) & mask; + amdgpu_device_mm_access(adev, aligned_pos, &value, 4, true); + } else { + value = (value & mask) >> shift; + memcpy(buf, &value, bytes); + } + } else { + amdgpu_device_mm_access(adev, aligned_pos, buf, 4, write); + } + + pos += bytes; + buf += bytes; + size -= bytes; + } +} + /** * amdgpu_ttm_access_memory - Read or Write memory that backs a buffer object. * @@ -1413,8 +1450,6 @@ static int amdgpu_ttm_access_memory(struct ttm_buffer_object *bo, struct amdgpu_bo *abo = ttm_to_amdgpu_bo(bo); struct amdgpu_device *adev = amdgpu_ttm_adev(abo->tbo.bdev); struct amdgpu_res_cursor cursor; - unsigned long flags; - uint32_t value = 0; int ret = 0; if (bo->resource->mem_type != TTM_PL_VRAM) @@ -1422,41 +1457,21 @@ static int amdgpu_ttm_access_memory(struct ttm_buffer_object *bo, amdgpu_res_first(bo->resource, offset, len, &cursor); while (cursor.remaining) { - uint64_t aligned_pos = cursor.start & ~(uint64_t)3; - uint64_t bytes = 4 - (cursor.start & 3); - uint32_t shift = (cursor.start & 3) * 8; - uint32_t mask = 0xffffffff << shift; - - if (cursor.size < bytes) { - mask &= 0xffffffff >> (bytes - cursor.size) * 8; - bytes = cursor.size; - } - - if (mask != 0xffffffff) { - spin_lock_irqsave(&adev->mmio_idx_lock, flags); - WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)aligned_pos) | 0x80000000); - WREG32_NO_KIQ(mmMM_INDEX_HI, aligned_pos >> 31); - value = RREG32_NO_KIQ(mmMM_DATA); - if (write) { - value &= ~mask; - value |= (*(uint32_t *)buf << shift) & mask; - WREG32_NO_KIQ(mmMM_DATA, value); - } - spin_unlock_irqrestore(&adev->mmio_idx_lock, flags); - if (!write) { - value = (value & mask) >> shift; - memcpy(buf, &value, bytes); - } - } else { - bytes = cursor.size & ~0x3ULL; - amdgpu_device_vram_access(adev, cursor.start, - (uint32_t *)buf, bytes, - write); + size_t count, size = cursor.size; + loff_t pos = cursor.start; + + count = amdgpu_device_aper_access(adev, pos, buf, size, write); + size -= count; + if (size) { + /* using MM to access rest vram and handle un-aligned address */ + pos += count; + buf += count; + amdgpu_ttm_vram_mm_access(adev, pos, buf, size, write); } - ret += bytes; - buf = (uint8_t *)buf + bytes; - amdgpu_res_next(&cursor, bytes); + ret += cursor.size; + buf += cursor.size; + amdgpu_res_next(&cursor, cursor.size); } return ret; @@ -2146,7 +2161,6 @@ static ssize_t amdgpu_ttm_vram_write(struct file *f, const char __user *buf, return -ENXIO; while (size) { - unsigned long flags; uint32_t value; if (*pos >= adev->gmc.mc_vram_size) @@ -2156,11 +2170,7 @@ static ssize_t amdgpu_ttm_vram_write(struct file *f, const char __user *buf, if (r) return r; - spin_lock_irqsave(&adev->mmio_idx_lock, flags); - WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)*pos) | 0x80000000); - WREG32_NO_KIQ(mmMM_INDEX_HI, *pos >> 31); - WREG32_NO_KIQ(mmMM_DATA, value); - spin_unlock_irqrestore(&adev->mmio_idx_lock, flags); + amdgpu_device_mm_access(adev, *pos, &value, 4, true); result += 4; buf += 4; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h index e69f3e8e06e5..3205fd520060 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h @@ -52,7 +52,7 @@ struct amdgpu_gtt_mgr { struct ttm_resource_manager manager; struct drm_mm mm; spinlock_t lock; - atomic64_t available; + atomic64_t used; }; struct amdgpu_preempt_mgr { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c index 2834981f8c08..abd8469380e5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c @@ -409,6 +409,12 @@ amdgpu_ucode_get_load_type(struct amdgpu_device *adev, int load_type) return AMDGPU_FW_LOAD_DIRECT; else return AMDGPU_FW_LOAD_PSP; + case CHIP_CYAN_SKILLFISH: + if (!(load_type && + adev->apu_flags & AMD_APU_IS_CYAN_SKILLFISH2)) + return AMDGPU_FW_LOAD_DIRECT; + else + return AMDGPU_FW_LOAD_PSP; default: DRM_ERROR("Unknown firmware load type\n"); } @@ -416,6 +422,84 @@ amdgpu_ucode_get_load_type(struct amdgpu_device *adev, int load_type) return AMDGPU_FW_LOAD_DIRECT; } +const char *amdgpu_ucode_name(enum AMDGPU_UCODE_ID ucode_id) +{ + switch (ucode_id) { + case AMDGPU_UCODE_ID_SDMA0: + return "SDMA0"; + case AMDGPU_UCODE_ID_SDMA1: + return "SDMA1"; + case AMDGPU_UCODE_ID_SDMA2: + return "SDMA2"; + case AMDGPU_UCODE_ID_SDMA3: + return "SDMA3"; + case AMDGPU_UCODE_ID_SDMA4: + return "SDMA4"; + case AMDGPU_UCODE_ID_SDMA5: + return "SDMA5"; + case AMDGPU_UCODE_ID_SDMA6: + return "SDMA6"; + case AMDGPU_UCODE_ID_SDMA7: + return "SDMA7"; + case AMDGPU_UCODE_ID_CP_CE: + return "CP_CE"; + case AMDGPU_UCODE_ID_CP_PFP: + return "CP_PFP"; + case AMDGPU_UCODE_ID_CP_ME: + return "CP_ME"; + case AMDGPU_UCODE_ID_CP_MEC1: + return "CP_MEC1"; + case AMDGPU_UCODE_ID_CP_MEC1_JT: + return "CP_MEC1_JT"; + case AMDGPU_UCODE_ID_CP_MEC2: + return "CP_MEC2"; + case AMDGPU_UCODE_ID_CP_MEC2_JT: + return "CP_MEC2_JT"; + case AMDGPU_UCODE_ID_CP_MES: + return "CP_MES"; + case AMDGPU_UCODE_ID_CP_MES_DATA: + return "CP_MES_DATA"; + case AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL: + return "RLC_RESTORE_LIST_CNTL"; + case AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM: + return "RLC_RESTORE_LIST_GPM_MEM"; + case AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM: + return "RLC_RESTORE_LIST_SRM_MEM"; + case AMDGPU_UCODE_ID_RLC_IRAM: + return "RLC_IRAM"; + case AMDGPU_UCODE_ID_RLC_DRAM: + return "RLC_DRAM"; + case AMDGPU_UCODE_ID_RLC_G: + return "RLC_G"; + case AMDGPU_UCODE_ID_STORAGE: + return "STORAGE"; + case AMDGPU_UCODE_ID_SMC: + return "SMC"; + case AMDGPU_UCODE_ID_UVD: + return "UVD"; + case AMDGPU_UCODE_ID_UVD1: + return "UVD1"; + case AMDGPU_UCODE_ID_VCE: + return "VCE"; + case AMDGPU_UCODE_ID_VCN: + return "VCN"; + case AMDGPU_UCODE_ID_VCN1: + return "VCN1"; + case AMDGPU_UCODE_ID_DMCU_ERAM: + return "DMCU_ERAM"; + case AMDGPU_UCODE_ID_DMCU_INTV: + return "DMCU_INTV"; + case AMDGPU_UCODE_ID_VCN0_RAM: + return "VCN0_RAM"; + case AMDGPU_UCODE_ID_VCN1_RAM: + return "VCN1_RAM"; + case AMDGPU_UCODE_ID_DMCUB: + return "DMCUB"; + default: + return "UNKNOWN UCODE"; + } +} + #define FW_VERSION_ATTR(name, mode, field) \ static ssize_t show_##name(struct device *dev, \ struct device_attribute *attr, \ @@ -440,10 +524,10 @@ FW_VERSION_ATTR(rlc_srlg_fw_version, 0444, gfx.rlc_srlg_fw_version); FW_VERSION_ATTR(rlc_srls_fw_version, 0444, gfx.rlc_srls_fw_version); FW_VERSION_ATTR(mec_fw_version, 0444, gfx.mec_fw_version); FW_VERSION_ATTR(mec2_fw_version, 0444, gfx.mec2_fw_version); -FW_VERSION_ATTR(sos_fw_version, 0444, psp.sos_fw_version); -FW_VERSION_ATTR(asd_fw_version, 0444, psp.asd_fw_version); -FW_VERSION_ATTR(ta_ras_fw_version, 0444, psp.ta_ras_ucode_version); -FW_VERSION_ATTR(ta_xgmi_fw_version, 0444, psp.ta_xgmi_ucode_version); +FW_VERSION_ATTR(sos_fw_version, 0444, psp.sos.fw_version); +FW_VERSION_ATTR(asd_fw_version, 0444, psp.asd.fw_version); +FW_VERSION_ATTR(ta_ras_fw_version, 0444, psp.ras.feature_version); +FW_VERSION_ATTR(ta_xgmi_fw_version, 0444, psp.xgmi.feature_version); FW_VERSION_ATTR(smc_fw_version, 0444, pm.fw_version); FW_VERSION_ATTR(sdma_fw_version, 0444, sdma.instance[0].fw_version); FW_VERSION_ATTR(sdma2_fw_version, 0444, sdma.instance[1].fw_version); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h index 270309e7f5f5..7c2538db3cd5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h @@ -71,7 +71,7 @@ struct smc_firmware_header_v2_1 { uint32_t pptable_entry_offset; }; -struct psp_fw_bin_desc { +struct psp_fw_legacy_bin_desc { uint32_t fw_version; uint32_t offset_bytes; uint32_t size_bytes; @@ -80,50 +80,67 @@ struct psp_fw_bin_desc { /* version_major=1, version_minor=0 */ struct psp_firmware_header_v1_0 { struct common_firmware_header header; - struct psp_fw_bin_desc sos; + struct psp_fw_legacy_bin_desc sos; }; /* version_major=1, version_minor=1 */ struct psp_firmware_header_v1_1 { struct psp_firmware_header_v1_0 v1_0; - struct psp_fw_bin_desc toc; - struct psp_fw_bin_desc kdb; + struct psp_fw_legacy_bin_desc toc; + struct psp_fw_legacy_bin_desc kdb; }; /* version_major=1, version_minor=2 */ struct psp_firmware_header_v1_2 { struct psp_firmware_header_v1_0 v1_0; - struct psp_fw_bin_desc res; - struct psp_fw_bin_desc kdb; + struct psp_fw_legacy_bin_desc res; + struct psp_fw_legacy_bin_desc kdb; }; /* version_major=1, version_minor=3 */ struct psp_firmware_header_v1_3 { struct psp_firmware_header_v1_1 v1_1; - struct psp_fw_bin_desc spl; - struct psp_fw_bin_desc rl; - struct psp_fw_bin_desc sys_drv_aux; - struct psp_fw_bin_desc sos_aux; + struct psp_fw_legacy_bin_desc spl; + struct psp_fw_legacy_bin_desc rl; + struct psp_fw_legacy_bin_desc sys_drv_aux; + struct psp_fw_legacy_bin_desc sos_aux; +}; + +struct psp_fw_bin_desc { + uint32_t fw_type; + uint32_t fw_version; + uint32_t offset_bytes; + uint32_t size_bytes; +}; + +enum psp_fw_type { + PSP_FW_TYPE_UNKOWN, + PSP_FW_TYPE_PSP_SOS, + PSP_FW_TYPE_PSP_SYS_DRV, + PSP_FW_TYPE_PSP_KDB, + PSP_FW_TYPE_PSP_TOC, + PSP_FW_TYPE_PSP_SPL, + PSP_FW_TYPE_PSP_RL, + PSP_FW_TYPE_PSP_SOC_DRV, + PSP_FW_TYPE_PSP_INTF_DRV, + PSP_FW_TYPE_PSP_DBG_DRV, +}; + +/* version_major=2, version_minor=0 */ +struct psp_firmware_header_v2_0 { + struct common_firmware_header header; + uint32_t psp_fw_bin_count; + struct psp_fw_bin_desc psp_fw_bin[]; }; /* version_major=1, version_minor=0 */ struct ta_firmware_header_v1_0 { struct common_firmware_header header; - uint32_t ta_xgmi_ucode_version; - uint32_t ta_xgmi_offset_bytes; - uint32_t ta_xgmi_size_bytes; - uint32_t ta_ras_ucode_version; - uint32_t ta_ras_offset_bytes; - uint32_t ta_ras_size_bytes; - uint32_t ta_hdcp_ucode_version; - uint32_t ta_hdcp_offset_bytes; - uint32_t ta_hdcp_size_bytes; - uint32_t ta_dtm_ucode_version; - uint32_t ta_dtm_offset_bytes; - uint32_t ta_dtm_size_bytes; - uint32_t ta_securedisplay_ucode_version; - uint32_t ta_securedisplay_offset_bytes; - uint32_t ta_securedisplay_size_bytes; + struct psp_fw_legacy_bin_desc xgmi; + struct psp_fw_legacy_bin_desc ras; + struct psp_fw_legacy_bin_desc hdcp; + struct psp_fw_legacy_bin_desc dtm; + struct psp_fw_legacy_bin_desc securedisplay; }; enum ta_fw_type { @@ -138,18 +155,11 @@ enum ta_fw_type { TA_FW_TYPE_MAX_INDEX, }; -struct ta_fw_bin_desc { - uint32_t fw_type; - uint32_t fw_version; - uint32_t offset_bytes; - uint32_t size_bytes; -}; - /* version_major=2, version_minor=0 */ struct ta_firmware_header_v2_0 { struct common_firmware_header header; uint32_t ta_fw_bin_count; - struct ta_fw_bin_desc ta_fw_bin[]; + struct psp_fw_bin_desc ta_fw_bin[]; }; /* version_major=1, version_minor=0 */ @@ -312,6 +322,7 @@ union amdgpu_firmware_header { struct psp_firmware_header_v1_0 psp; struct psp_firmware_header_v1_1 psp_v1_1; struct psp_firmware_header_v1_3 psp_v1_3; + struct psp_firmware_header_v2_0 psp_v2_0; struct ta_firmware_header_v1_0 ta; struct ta_firmware_header_v2_0 ta_v2_0; struct gfx_firmware_header_v1_0 gfx; @@ -326,7 +337,7 @@ union amdgpu_firmware_header { uint8_t raw[0x100]; }; -#define UCODE_MAX_TA_PACKAGING ((sizeof(union amdgpu_firmware_header) - sizeof(struct common_firmware_header) - 4) / sizeof(struct ta_fw_bin_desc)) +#define UCODE_MAX_PSP_PACKAGING ((sizeof(union amdgpu_firmware_header) - sizeof(struct common_firmware_header) - 4) / sizeof(struct psp_fw_bin_desc)) /* * fw loading support @@ -449,4 +460,6 @@ void amdgpu_ucode_sysfs_fini(struct amdgpu_device *adev); enum amdgpu_firmware_load_type amdgpu_ucode_get_load_type(struct amdgpu_device *adev, int load_type); +const char *amdgpu_ucode_name(enum AMDGPU_UCODE_ID ucode_id); + #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c index f4489773715e..a90029ee9733 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c @@ -41,7 +41,6 @@ int amdgpu_umc_ras_late_init(struct amdgpu_device *adev) adev->umc.ras_if->block = AMDGPU_RAS_BLOCK__UMC; adev->umc.ras_if->type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE; adev->umc.ras_if->sub_block_index = 0; - strcpy(adev->umc.ras_if->name, "umc"); } ih_info.head = fs_info.head = *adev->umc.ras_if; @@ -134,7 +133,7 @@ int amdgpu_umc_process_ras_data_cb(struct amdgpu_device *adev, amdgpu_ras_save_bad_pages(adev); if (adev->smu.ppt_funcs && adev->smu.ppt_funcs->send_hbm_bad_pages_num) - adev->smu.ppt_funcs->send_hbm_bad_pages_num(&adev->smu, con->eeprom_control.num_recs); + adev->smu.ppt_funcs->send_hbm_bad_pages_num(&adev->smu, con->eeprom_control.ras_num_recs); } amdgpu_ras_reset_gpu(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c index 0f576f294d8a..d451c359606a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c @@ -326,7 +326,6 @@ int amdgpu_uvd_sw_fini(struct amdgpu_device *adev) { int i, j; - cancel_delayed_work_sync(&adev->uvd.idle_work); drm_sched_entity_destroy(&adev->uvd.entity); for (j = 0; j < adev->uvd.num_uvd_inst; ++j) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c index 1ae7f824adc7..8e8dee9fac9f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c @@ -218,7 +218,6 @@ int amdgpu_vce_sw_fini(struct amdgpu_device *adev) if (adev->vce.vcpu_bo == NULL) return 0; - cancel_delayed_work_sync(&adev->vce.idle_work); drm_sched_entity_destroy(&adev->vce.entity); amdgpu_bo_free_kernel(&adev->vce.vcpu_bo, &adev->vce.gpu_addr, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c index 6780df0fb265..008a308a4eca 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c @@ -258,8 +258,6 @@ int amdgpu_vcn_sw_fini(struct amdgpu_device *adev) { int i, j; - cancel_delayed_work_sync(&adev->vcn.idle_work); - for (j = 0; j < adev->vcn.num_vcn_inst; ++j) { if (adev->vcn.harvest_config & (1 << j)) continue; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c index b71dd1deeb2d..ca058fbcccd4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c @@ -531,10 +531,10 @@ static void amdgpu_virt_populate_vf2pf_ucode_info(struct amdgpu_device *adev) POPULATE_UCODE_INFO(vf2pf_info, AMD_SRIOV_UCODE_ID_RLC_SRLS, adev->gfx.rlc_srls_fw_version); POPULATE_UCODE_INFO(vf2pf_info, AMD_SRIOV_UCODE_ID_MEC, adev->gfx.mec_fw_version); POPULATE_UCODE_INFO(vf2pf_info, AMD_SRIOV_UCODE_ID_MEC2, adev->gfx.mec2_fw_version); - POPULATE_UCODE_INFO(vf2pf_info, AMD_SRIOV_UCODE_ID_SOS, adev->psp.sos_fw_version); - POPULATE_UCODE_INFO(vf2pf_info, AMD_SRIOV_UCODE_ID_ASD, adev->psp.asd_fw_version); - POPULATE_UCODE_INFO(vf2pf_info, AMD_SRIOV_UCODE_ID_TA_RAS, adev->psp.ta_ras_ucode_version); - POPULATE_UCODE_INFO(vf2pf_info, AMD_SRIOV_UCODE_ID_TA_XGMI, adev->psp.ta_xgmi_ucode_version); + POPULATE_UCODE_INFO(vf2pf_info, AMD_SRIOV_UCODE_ID_SOS, adev->psp.sos.fw_version); + POPULATE_UCODE_INFO(vf2pf_info, AMD_SRIOV_UCODE_ID_ASD, adev->psp.asd.fw_version); + POPULATE_UCODE_INFO(vf2pf_info, AMD_SRIOV_UCODE_ID_TA_RAS, adev->psp.ras.feature_version); + POPULATE_UCODE_INFO(vf2pf_info, AMD_SRIOV_UCODE_ID_TA_XGMI, adev->psp.xgmi.feature_version); POPULATE_UCODE_INFO(vf2pf_info, AMD_SRIOV_UCODE_ID_SMC, adev->pm.fw_version); POPULATE_UCODE_INFO(vf2pf_info, AMD_SRIOV_UCODE_ID_SDMA, adev->sdma.instance[0].fw_version); POPULATE_UCODE_INFO(vf2pf_info, AMD_SRIOV_UCODE_ID_SDMA2, adev->sdma.instance[1].fw_version); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c new file mode 100644 index 000000000000..ce982afeff91 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c @@ -0,0 +1,643 @@ +// SPDX-License-Identifier: GPL-2.0+ + +#include <drm/drm_atomic_helper.h> +#include <drm/drm_simple_kms_helper.h> +#include <drm/drm_vblank.h> + +#include "amdgpu.h" +#ifdef CONFIG_DRM_AMDGPU_SI +#include "dce_v6_0.h" +#endif +#ifdef CONFIG_DRM_AMDGPU_CIK +#include "dce_v8_0.h" +#endif +#include "dce_v10_0.h" +#include "dce_v11_0.h" +#include "ivsrcid/ivsrcid_vislands30.h" +#include "amdgpu_vkms.h" +#include "amdgpu_display.h" + +/** + * DOC: amdgpu_vkms + * + * The amdgpu vkms interface provides a virtual KMS interface for several use + * cases: devices without display hardware, platforms where the actual display + * hardware is not useful (e.g., servers), SR-IOV virtual functions, device + * emulation/simulation, and device bring up prior to display hardware being + * usable. We previously emulated a legacy KMS interface, but there was a desire + * to move to the atomic KMS interface. The vkms driver did everything we + * needed, but we wanted KMS support natively in the driver without buffer + * sharing and the ability to support an instance of VKMS per device. We first + * looked at splitting vkms into a stub driver and a helper module that other + * drivers could use to implement a virtual display, but this strategy ended up + * being messy due to driver specific callbacks needed for buffer management. + * Ultimately, it proved easier to import the vkms code as it mostly used core + * drm helpers anyway. + */ + +static const u32 amdgpu_vkms_formats[] = { + DRM_FORMAT_XRGB8888, +}; + +static enum hrtimer_restart amdgpu_vkms_vblank_simulate(struct hrtimer *timer) +{ + struct amdgpu_vkms_output *output = container_of(timer, + struct amdgpu_vkms_output, + vblank_hrtimer); + struct drm_crtc *crtc = &output->crtc; + u64 ret_overrun; + bool ret; + + ret_overrun = hrtimer_forward_now(&output->vblank_hrtimer, + output->period_ns); + WARN_ON(ret_overrun != 1); + + ret = drm_crtc_handle_vblank(crtc); + if (!ret) + DRM_ERROR("amdgpu_vkms failure on handling vblank"); + + return HRTIMER_RESTART; +} + +static int amdgpu_vkms_enable_vblank(struct drm_crtc *crtc) +{ + struct drm_device *dev = crtc->dev; + unsigned int pipe = drm_crtc_index(crtc); + struct drm_vblank_crtc *vblank = &dev->vblank[pipe]; + struct amdgpu_vkms_output *out = drm_crtc_to_amdgpu_vkms_output(crtc); + + drm_calc_timestamping_constants(crtc, &crtc->mode); + + hrtimer_init(&out->vblank_hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); + out->vblank_hrtimer.function = &amdgpu_vkms_vblank_simulate; + out->period_ns = ktime_set(0, vblank->framedur_ns); + hrtimer_start(&out->vblank_hrtimer, out->period_ns, HRTIMER_MODE_REL); + + return 0; +} + +static void amdgpu_vkms_disable_vblank(struct drm_crtc *crtc) +{ + struct amdgpu_vkms_output *out = drm_crtc_to_amdgpu_vkms_output(crtc); + + hrtimer_cancel(&out->vblank_hrtimer); +} + +static bool amdgpu_vkms_get_vblank_timestamp(struct drm_crtc *crtc, + int *max_error, + ktime_t *vblank_time, + bool in_vblank_irq) +{ + struct drm_device *dev = crtc->dev; + unsigned int pipe = crtc->index; + struct amdgpu_vkms_output *output = drm_crtc_to_amdgpu_vkms_output(crtc); + struct drm_vblank_crtc *vblank = &dev->vblank[pipe]; + + if (!READ_ONCE(vblank->enabled)) { + *vblank_time = ktime_get(); + return true; + } + + *vblank_time = READ_ONCE(output->vblank_hrtimer.node.expires); + + if (WARN_ON(*vblank_time == vblank->time)) + return true; + + /* + * To prevent races we roll the hrtimer forward before we do any + * interrupt processing - this is how real hw works (the interrupt is + * only generated after all the vblank registers are updated) and what + * the vblank core expects. Therefore we need to always correct the + * timestampe by one frame. + */ + *vblank_time -= output->period_ns; + + return true; +} + +static const struct drm_crtc_funcs amdgpu_vkms_crtc_funcs = { + .set_config = drm_atomic_helper_set_config, + .destroy = drm_crtc_cleanup, + .page_flip = drm_atomic_helper_page_flip, + .reset = drm_atomic_helper_crtc_reset, + .atomic_duplicate_state = drm_atomic_helper_crtc_duplicate_state, + .atomic_destroy_state = drm_atomic_helper_crtc_destroy_state, + .enable_vblank = amdgpu_vkms_enable_vblank, + .disable_vblank = amdgpu_vkms_disable_vblank, + .get_vblank_timestamp = amdgpu_vkms_get_vblank_timestamp, +}; + +static void amdgpu_vkms_crtc_atomic_enable(struct drm_crtc *crtc, + struct drm_atomic_state *state) +{ + drm_crtc_vblank_on(crtc); +} + +static void amdgpu_vkms_crtc_atomic_disable(struct drm_crtc *crtc, + struct drm_atomic_state *state) +{ + drm_crtc_vblank_off(crtc); +} + +static void amdgpu_vkms_crtc_atomic_flush(struct drm_crtc *crtc, + struct drm_atomic_state *state) +{ + if (crtc->state->event) { + spin_lock(&crtc->dev->event_lock); + + if (drm_crtc_vblank_get(crtc) != 0) + drm_crtc_send_vblank_event(crtc, crtc->state->event); + else + drm_crtc_arm_vblank_event(crtc, crtc->state->event); + + spin_unlock(&crtc->dev->event_lock); + + crtc->state->event = NULL; + } +} + +static const struct drm_crtc_helper_funcs amdgpu_vkms_crtc_helper_funcs = { + .atomic_flush = amdgpu_vkms_crtc_atomic_flush, + .atomic_enable = amdgpu_vkms_crtc_atomic_enable, + .atomic_disable = amdgpu_vkms_crtc_atomic_disable, +}; + +static int amdgpu_vkms_crtc_init(struct drm_device *dev, struct drm_crtc *crtc, + struct drm_plane *primary, struct drm_plane *cursor) +{ + int ret; + + ret = drm_crtc_init_with_planes(dev, crtc, primary, cursor, + &amdgpu_vkms_crtc_funcs, NULL); + if (ret) { + DRM_ERROR("Failed to init CRTC\n"); + return ret; + } + + drm_crtc_helper_add(crtc, &amdgpu_vkms_crtc_helper_funcs); + + return ret; +} + +static const struct drm_connector_funcs amdgpu_vkms_connector_funcs = { + .fill_modes = drm_helper_probe_single_connector_modes, + .destroy = drm_connector_cleanup, + .reset = drm_atomic_helper_connector_reset, + .atomic_duplicate_state = drm_atomic_helper_connector_duplicate_state, + .atomic_destroy_state = drm_atomic_helper_connector_destroy_state, +}; + +static int amdgpu_vkms_conn_get_modes(struct drm_connector *connector) +{ + struct drm_device *dev = connector->dev; + struct drm_display_mode *mode = NULL; + unsigned i; + static const struct mode_size { + int w; + int h; + } common_modes[] = { + { 640, 480}, + { 720, 480}, + { 800, 600}, + { 848, 480}, + {1024, 768}, + {1152, 768}, + {1280, 720}, + {1280, 800}, + {1280, 854}, + {1280, 960}, + {1280, 1024}, + {1440, 900}, + {1400, 1050}, + {1680, 1050}, + {1600, 1200}, + {1920, 1080}, + {1920, 1200}, + {2560, 1440}, + {4096, 3112}, + {3656, 2664}, + {3840, 2160}, + {4096, 2160}, + }; + + for (i = 0; i < ARRAY_SIZE(common_modes); i++) { + mode = drm_cvt_mode(dev, common_modes[i].w, common_modes[i].h, 60, false, false, false); + drm_mode_probed_add(connector, mode); + } + + drm_set_preferred_mode(connector, XRES_DEF, YRES_DEF); + + return ARRAY_SIZE(common_modes); +} + +static const struct drm_connector_helper_funcs amdgpu_vkms_conn_helper_funcs = { + .get_modes = amdgpu_vkms_conn_get_modes, +}; + +static const struct drm_plane_funcs amdgpu_vkms_plane_funcs = { + .update_plane = drm_atomic_helper_update_plane, + .disable_plane = drm_atomic_helper_disable_plane, + .destroy = drm_plane_cleanup, + .reset = drm_atomic_helper_plane_reset, + .atomic_duplicate_state = drm_atomic_helper_plane_duplicate_state, + .atomic_destroy_state = drm_atomic_helper_plane_destroy_state, +}; + +static void amdgpu_vkms_plane_atomic_update(struct drm_plane *plane, + struct drm_atomic_state *old_state) +{ + return; +} + +static int amdgpu_vkms_plane_atomic_check(struct drm_plane *plane, + struct drm_atomic_state *state) +{ + struct drm_plane_state *new_plane_state = drm_atomic_get_new_plane_state(state, + plane); + struct drm_crtc_state *crtc_state; + int ret; + + if (!new_plane_state->fb || WARN_ON(!new_plane_state->crtc)) + return 0; + + crtc_state = drm_atomic_get_crtc_state(state, + new_plane_state->crtc); + if (IS_ERR(crtc_state)) + return PTR_ERR(crtc_state); + + ret = drm_atomic_helper_check_plane_state(new_plane_state, crtc_state, + DRM_PLANE_HELPER_NO_SCALING, + DRM_PLANE_HELPER_NO_SCALING, + false, true); + if (ret != 0) + return ret; + + /* for now primary plane must be visible and full screen */ + if (!new_plane_state->visible) + return -EINVAL; + + return 0; +} + +static int amdgpu_vkms_prepare_fb(struct drm_plane *plane, + struct drm_plane_state *new_state) +{ + struct amdgpu_framebuffer *afb; + struct drm_gem_object *obj; + struct amdgpu_device *adev; + struct amdgpu_bo *rbo; + struct list_head list; + struct ttm_validate_buffer tv; + struct ww_acquire_ctx ticket; + uint32_t domain; + int r; + + if (!new_state->fb) { + DRM_DEBUG_KMS("No FB bound\n"); + return 0; + } + afb = to_amdgpu_framebuffer(new_state->fb); + obj = new_state->fb->obj[0]; + rbo = gem_to_amdgpu_bo(obj); + adev = amdgpu_ttm_adev(rbo->tbo.bdev); + INIT_LIST_HEAD(&list); + + tv.bo = &rbo->tbo; + tv.num_shared = 1; + list_add(&tv.head, &list); + + r = ttm_eu_reserve_buffers(&ticket, &list, false, NULL); + if (r) { + dev_err(adev->dev, "fail to reserve bo (%d)\n", r); + return r; + } + + if (plane->type != DRM_PLANE_TYPE_CURSOR) + domain = amdgpu_display_supported_domains(adev, rbo->flags); + else + domain = AMDGPU_GEM_DOMAIN_VRAM; + + r = amdgpu_bo_pin(rbo, domain); + if (unlikely(r != 0)) { + if (r != -ERESTARTSYS) + DRM_ERROR("Failed to pin framebuffer with error %d\n", r); + ttm_eu_backoff_reservation(&ticket, &list); + return r; + } + + r = amdgpu_ttm_alloc_gart(&rbo->tbo); + if (unlikely(r != 0)) { + amdgpu_bo_unpin(rbo); + ttm_eu_backoff_reservation(&ticket, &list); + DRM_ERROR("%p bind failed\n", rbo); + return r; + } + + ttm_eu_backoff_reservation(&ticket, &list); + + afb->address = amdgpu_bo_gpu_offset(rbo); + + amdgpu_bo_ref(rbo); + + return 0; +} + +static void amdgpu_vkms_cleanup_fb(struct drm_plane *plane, + struct drm_plane_state *old_state) +{ + struct amdgpu_bo *rbo; + int r; + + if (!old_state->fb) + return; + + rbo = gem_to_amdgpu_bo(old_state->fb->obj[0]); + r = amdgpu_bo_reserve(rbo, false); + if (unlikely(r)) { + DRM_ERROR("failed to reserve rbo before unpin\n"); + return; + } + + amdgpu_bo_unpin(rbo); + amdgpu_bo_unreserve(rbo); + amdgpu_bo_unref(&rbo); +} + +static const struct drm_plane_helper_funcs amdgpu_vkms_primary_helper_funcs = { + .atomic_update = amdgpu_vkms_plane_atomic_update, + .atomic_check = amdgpu_vkms_plane_atomic_check, + .prepare_fb = amdgpu_vkms_prepare_fb, + .cleanup_fb = amdgpu_vkms_cleanup_fb, +}; + +static struct drm_plane *amdgpu_vkms_plane_init(struct drm_device *dev, + enum drm_plane_type type, + int index) +{ + struct drm_plane *plane; + int ret; + + plane = kzalloc(sizeof(*plane), GFP_KERNEL); + if (!plane) + return ERR_PTR(-ENOMEM); + + ret = drm_universal_plane_init(dev, plane, 1 << index, + &amdgpu_vkms_plane_funcs, + amdgpu_vkms_formats, + ARRAY_SIZE(amdgpu_vkms_formats), + NULL, type, NULL); + if (ret) { + kfree(plane); + return ERR_PTR(ret); + } + + drm_plane_helper_add(plane, &amdgpu_vkms_primary_helper_funcs); + + return plane; +} + +int amdgpu_vkms_output_init(struct drm_device *dev, + struct amdgpu_vkms_output *output, int index) +{ + struct drm_connector *connector = &output->connector; + struct drm_encoder *encoder = &output->encoder; + struct drm_crtc *crtc = &output->crtc; + struct drm_plane *primary, *cursor = NULL; + int ret; + + primary = amdgpu_vkms_plane_init(dev, DRM_PLANE_TYPE_PRIMARY, index); + if (IS_ERR(primary)) + return PTR_ERR(primary); + + ret = amdgpu_vkms_crtc_init(dev, crtc, primary, cursor); + if (ret) + goto err_crtc; + + ret = drm_connector_init(dev, connector, &amdgpu_vkms_connector_funcs, + DRM_MODE_CONNECTOR_VIRTUAL); + if (ret) { + DRM_ERROR("Failed to init connector\n"); + goto err_connector; + } + + drm_connector_helper_add(connector, &amdgpu_vkms_conn_helper_funcs); + + ret = drm_simple_encoder_init(dev, encoder, DRM_MODE_ENCODER_VIRTUAL); + if (ret) { + DRM_ERROR("Failed to init encoder\n"); + goto err_encoder; + } + encoder->possible_crtcs = 1 << index; + + ret = drm_connector_attach_encoder(connector, encoder); + if (ret) { + DRM_ERROR("Failed to attach connector to encoder\n"); + goto err_attach; + } + + drm_mode_config_reset(dev); + + return 0; + +err_attach: + drm_encoder_cleanup(encoder); + +err_encoder: + drm_connector_cleanup(connector); + +err_connector: + drm_crtc_cleanup(crtc); + +err_crtc: + drm_plane_cleanup(primary); + + return ret; +} + +const struct drm_mode_config_funcs amdgpu_vkms_mode_funcs = { + .fb_create = amdgpu_display_user_framebuffer_create, + .atomic_check = drm_atomic_helper_check, + .atomic_commit = drm_atomic_helper_commit, +}; + +static int amdgpu_vkms_sw_init(void *handle) +{ + int r, i; + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + adev_to_drm(adev)->max_vblank_count = 0; + + adev_to_drm(adev)->mode_config.funcs = &amdgpu_vkms_mode_funcs; + + adev_to_drm(adev)->mode_config.max_width = XRES_MAX; + adev_to_drm(adev)->mode_config.max_height = YRES_MAX; + + adev_to_drm(adev)->mode_config.preferred_depth = 24; + adev_to_drm(adev)->mode_config.prefer_shadow = 1; + + adev_to_drm(adev)->mode_config.fb_base = adev->gmc.aper_base; + + r = amdgpu_display_modeset_create_props(adev); + if (r) + return r; + + adev->amdgpu_vkms_output = kcalloc(adev->mode_info.num_crtc, sizeof(struct amdgpu_vkms_output), GFP_KERNEL); + if (!adev->amdgpu_vkms_output) + return -ENOMEM; + + /* allocate crtcs, encoders, connectors */ + for (i = 0; i < adev->mode_info.num_crtc; i++) { + r = amdgpu_vkms_output_init(adev_to_drm(adev), &adev->amdgpu_vkms_output[i], i); + if (r) + return r; + } + + drm_kms_helper_poll_init(adev_to_drm(adev)); + + adev->mode_info.mode_config_initialized = true; + return 0; +} + +static int amdgpu_vkms_sw_fini(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int i = 0; + + for (i = 0; i < adev->mode_info.num_crtc; i++) + if (adev->mode_info.crtcs[i]) + hrtimer_cancel(&adev->mode_info.crtcs[i]->vblank_timer); + + kfree(adev->mode_info.bios_hardcoded_edid); + kfree(adev->amdgpu_vkms_output); + + drm_kms_helper_poll_fini(adev_to_drm(adev)); + + adev->mode_info.mode_config_initialized = false; + return 0; +} + +static int amdgpu_vkms_hw_init(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + switch (adev->asic_type) { +#ifdef CONFIG_DRM_AMDGPU_SI + case CHIP_TAHITI: + case CHIP_PITCAIRN: + case CHIP_VERDE: + case CHIP_OLAND: + dce_v6_0_disable_dce(adev); + break; +#endif +#ifdef CONFIG_DRM_AMDGPU_CIK + case CHIP_BONAIRE: + case CHIP_HAWAII: + case CHIP_KAVERI: + case CHIP_KABINI: + case CHIP_MULLINS: + dce_v8_0_disable_dce(adev); + break; +#endif + case CHIP_FIJI: + case CHIP_TONGA: + dce_v10_0_disable_dce(adev); + break; + case CHIP_CARRIZO: + case CHIP_STONEY: + case CHIP_POLARIS10: + case CHIP_POLARIS11: + case CHIP_VEGAM: + dce_v11_0_disable_dce(adev); + break; + case CHIP_TOPAZ: +#ifdef CONFIG_DRM_AMDGPU_SI + case CHIP_HAINAN: +#endif + /* no DCE */ + break; + default: + break; + } + return 0; +} + +static int amdgpu_vkms_hw_fini(void *handle) +{ + return 0; +} + +static int amdgpu_vkms_suspend(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int r; + + r = drm_mode_config_helper_suspend(adev_to_drm(adev)); + if (r) + return r; + return amdgpu_vkms_hw_fini(handle); +} + +static int amdgpu_vkms_resume(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int r; + + r = amdgpu_vkms_hw_init(handle); + if (r) + return r; + return drm_mode_config_helper_resume(adev_to_drm(adev)); +} + +static bool amdgpu_vkms_is_idle(void *handle) +{ + return true; +} + +static int amdgpu_vkms_wait_for_idle(void *handle) +{ + return 0; +} + +static int amdgpu_vkms_soft_reset(void *handle) +{ + return 0; +} + +static int amdgpu_vkms_set_clockgating_state(void *handle, + enum amd_clockgating_state state) +{ + return 0; +} + +static int amdgpu_vkms_set_powergating_state(void *handle, + enum amd_powergating_state state) +{ + return 0; +} + +static const struct amd_ip_funcs amdgpu_vkms_ip_funcs = { + .name = "amdgpu_vkms", + .early_init = NULL, + .late_init = NULL, + .sw_init = amdgpu_vkms_sw_init, + .sw_fini = amdgpu_vkms_sw_fini, + .hw_init = amdgpu_vkms_hw_init, + .hw_fini = amdgpu_vkms_hw_fini, + .suspend = amdgpu_vkms_suspend, + .resume = amdgpu_vkms_resume, + .is_idle = amdgpu_vkms_is_idle, + .wait_for_idle = amdgpu_vkms_wait_for_idle, + .soft_reset = amdgpu_vkms_soft_reset, + .set_clockgating_state = amdgpu_vkms_set_clockgating_state, + .set_powergating_state = amdgpu_vkms_set_powergating_state, +}; + +const struct amdgpu_ip_block_version amdgpu_vkms_ip_block = +{ + .type = AMD_IP_BLOCK_TYPE_DCE, + .major = 1, + .minor = 0, + .rev = 0, + .funcs = &amdgpu_vkms_ip_funcs, +}; + diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.h new file mode 100644 index 000000000000..97f1b79c0724 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.h @@ -0,0 +1,26 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ + +#ifndef _AMDGPU_VKMS_H_ +#define _AMDGPU_VKMS_H_ + +#define XRES_DEF 1024 +#define YRES_DEF 768 + +#define XRES_MAX 16384 +#define YRES_MAX 16384 + +#define drm_crtc_to_amdgpu_vkms_output(target) \ + container_of(target, struct amdgpu_vkms_output, crtc) + +extern const struct amdgpu_ip_block_version amdgpu_vkms_ip_block; + +struct amdgpu_vkms_output { + struct drm_crtc crtc; + struct drm_encoder encoder; + struct drm_connector connector; + struct hrtimer vblank_hrtimer; + ktime_t period_ns; + struct drm_pending_vblank_event *event; +}; + +#endif /* _AMDGPU_VKMS_H_ */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 078c068937fe..6b15cad78de9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -88,6 +88,46 @@ struct amdgpu_prt_cb { struct dma_fence_cb cb; }; +/** + * amdgpu_vm_set_pasid - manage pasid and vm ptr mapping + * + * @adev: amdgpu_device pointer + * @vm: amdgpu_vm pointer + * @pasid: the pasid the VM is using on this GPU + * + * Set the pasid this VM is using on this GPU, can also be used to remove the + * pasid by passing in zero. + * + */ +int amdgpu_vm_set_pasid(struct amdgpu_device *adev, struct amdgpu_vm *vm, + u32 pasid) +{ + int r; + + if (vm->pasid == pasid) + return 0; + + if (vm->pasid) { + r = xa_err(xa_erase_irq(&adev->vm_manager.pasids, vm->pasid)); + if (r < 0) + return r; + + vm->pasid = 0; + } + + if (pasid) { + r = xa_err(xa_store_irq(&adev->vm_manager.pasids, pasid, vm, + GFP_KERNEL)); + if (r < 0) + return r; + + vm->pasid = pasid; + } + + + return 0; +} + /* * vm eviction_lock can be taken in MMU notifiers. Make sure no reclaim-FS * happens while holding this lock anywhere to prevent deadlocks when @@ -886,7 +926,7 @@ static int amdgpu_vm_pt_create(struct amdgpu_device *adev, bp.size = amdgpu_vm_bo_size(adev, level); bp.byte_align = AMDGPU_GPU_PAGE_SIZE; bp.domain = AMDGPU_GEM_DOMAIN_VRAM; - bp.domain = amdgpu_bo_get_preferred_pin_domain(adev, bp.domain); + bp.domain = amdgpu_bo_get_preferred_domain(adev, bp.domain); bp.flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS | AMDGPU_GEM_CREATE_CPU_GTT_USWC; @@ -1178,7 +1218,7 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, amdgpu_gmc_emit_pasid_mapping(ring, job->vmid, job->pasid); if (vm_flush_needed || pasid_mapping_needed) { - r = amdgpu_fence_emit(ring, &fence, 0); + r = amdgpu_fence_emit(ring, &fence, NULL, 0); if (r) return r; } @@ -1758,7 +1798,7 @@ int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, r = vm->update_funcs->commit(¶ms, fence); if (table_freed) - *table_freed = params.table_freed; + *table_freed = *table_freed || params.table_freed; error_unlock: amdgpu_vm_eviction_unlock(vm); @@ -1816,6 +1856,7 @@ void amdgpu_vm_get_memory(struct amdgpu_vm *vm, uint64_t *vram_mem, * @adev: amdgpu_device pointer * @bo_va: requested BO and VM object * @clear: if true clear the entries + * @table_freed: return true if page table is freed * * Fill in the page table entries for @bo_va. * @@ -1823,7 +1864,7 @@ void amdgpu_vm_get_memory(struct amdgpu_vm *vm, uint64_t *vram_mem, * 0 for success, -EINVAL for failure. */ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va, - bool clear) + bool clear, bool *table_freed) { struct amdgpu_bo *bo = bo_va->base.bo; struct amdgpu_vm *vm = bo_va->base.vm; @@ -1902,7 +1943,7 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va, resv, mapping->start, mapping->last, update_flags, mapping->offset, mem, - pages_addr, last_update, NULL); + pages_addr, last_update, table_freed); if (r) return r; } @@ -2154,7 +2195,7 @@ int amdgpu_vm_handle_moved(struct amdgpu_device *adev, list_for_each_entry_safe(bo_va, tmp, &vm->moved, base.vm_status) { /* Per VM BOs never need to bo cleared in the page tables */ - r = amdgpu_vm_bo_update(adev, bo_va, false); + r = amdgpu_vm_bo_update(adev, bo_va, false, NULL); if (r) return r; } @@ -2173,7 +2214,7 @@ int amdgpu_vm_handle_moved(struct amdgpu_device *adev, else clear = true; - r = amdgpu_vm_bo_update(adev, bo_va, clear); + r = amdgpu_vm_bo_update(adev, bo_va, clear, NULL); if (r) return r; @@ -2863,14 +2904,13 @@ long amdgpu_vm_wait_idle(struct amdgpu_vm *vm, long timeout) * * @adev: amdgpu_device pointer * @vm: requested vm - * @pasid: Process address space identifier * * Init @vm fields. * * Returns: * 0 for success, error for failure. */ -int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, u32 pasid) +int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm) { struct amdgpu_bo *root_bo; struct amdgpu_bo_vm *root; @@ -2944,19 +2984,6 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, u32 pasid) amdgpu_bo_unreserve(vm->root.bo); - if (pasid) { - unsigned long flags; - - spin_lock_irqsave(&adev->vm_manager.pasid_lock, flags); - r = idr_alloc(&adev->vm_manager.pasid_idr, vm, pasid, pasid + 1, - GFP_ATOMIC); - spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags); - if (r < 0) - goto error_free_root; - - vm->pasid = pasid; - } - INIT_KFIFO(vm->faults); return 0; @@ -3012,7 +3039,6 @@ static int amdgpu_vm_check_clean_reserved(struct amdgpu_device *adev, * * @adev: amdgpu_device pointer * @vm: requested vm - * @pasid: pasid to use * * This only works on GFX VMs that don't have any BOs added and no * page tables allocated yet. @@ -3020,7 +3046,6 @@ static int amdgpu_vm_check_clean_reserved(struct amdgpu_device *adev, * Changes the following VM parameters: * - use_cpu_for_update * - pte_supports_ats - * - pasid (old PASID is released, because compute manages its own PASIDs) * * Reinitializes the page directory to reflect the changed ATS * setting. @@ -3028,8 +3053,7 @@ static int amdgpu_vm_check_clean_reserved(struct amdgpu_device *adev, * Returns: * 0 for success, -errno for errors. */ -int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm, - u32 pasid) +int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm) { bool pte_support_ats = (adev->asic_type == CHIP_RAVEN); int r; @@ -3043,19 +3067,6 @@ int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm, if (r) goto unreserve_bo; - if (pasid) { - unsigned long flags; - - spin_lock_irqsave(&adev->vm_manager.pasid_lock, flags); - r = idr_alloc(&adev->vm_manager.pasid_idr, vm, pasid, pasid + 1, - GFP_ATOMIC); - spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags); - - if (r == -ENOSPC) - goto unreserve_bo; - r = 0; - } - /* Check if PD needs to be reinitialized and do it before * changing any other state, in case it fails. */ @@ -3065,7 +3076,7 @@ int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm, to_amdgpu_bo_vm(vm->root.bo), false); if (r) - goto free_idr; + goto unreserve_bo; } /* Update VM state */ @@ -3082,7 +3093,7 @@ int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm, r = amdgpu_bo_sync_wait(vm->root.bo, AMDGPU_FENCE_OWNER_UNDEFINED, true); if (r) - goto free_idr; + goto unreserve_bo; vm->update_funcs = &amdgpu_vm_cpu_funcs; } else { @@ -3092,36 +3103,11 @@ int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm, vm->last_update = NULL; vm->is_compute_context = true; - if (vm->pasid) { - unsigned long flags; - - spin_lock_irqsave(&adev->vm_manager.pasid_lock, flags); - idr_remove(&adev->vm_manager.pasid_idr, vm->pasid); - spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags); - - /* Free the original amdgpu allocated pasid - * Will be replaced with kfd allocated pasid - */ - amdgpu_pasid_free(vm->pasid); - vm->pasid = 0; - } - /* Free the shadow bo for compute VM */ amdgpu_bo_unref(&to_amdgpu_bo_vm(vm->root.bo)->shadow); - if (pasid) - vm->pasid = pasid; - goto unreserve_bo; -free_idr: - if (pasid) { - unsigned long flags; - - spin_lock_irqsave(&adev->vm_manager.pasid_lock, flags); - idr_remove(&adev->vm_manager.pasid_idr, pasid); - spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags); - } unreserve_bo: amdgpu_bo_unreserve(vm->root.bo); return r; @@ -3137,14 +3123,7 @@ unreserve_bo: */ void amdgpu_vm_release_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm) { - if (vm->pasid) { - unsigned long flags; - - spin_lock_irqsave(&adev->vm_manager.pasid_lock, flags); - idr_remove(&adev->vm_manager.pasid_idr, vm->pasid); - spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags); - } - vm->pasid = 0; + amdgpu_vm_set_pasid(adev, vm, 0); vm->is_compute_context = false; } @@ -3168,15 +3147,7 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm) root = amdgpu_bo_ref(vm->root.bo); amdgpu_bo_reserve(root, true); - if (vm->pasid) { - unsigned long flags; - - spin_lock_irqsave(&adev->vm_manager.pasid_lock, flags); - idr_remove(&adev->vm_manager.pasid_idr, vm->pasid); - spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags); - vm->pasid = 0; - } - + amdgpu_vm_set_pasid(adev, vm, 0); dma_fence_wait(vm->last_unlocked, false); dma_fence_put(vm->last_unlocked); @@ -3258,8 +3229,7 @@ void amdgpu_vm_manager_init(struct amdgpu_device *adev) adev->vm_manager.vm_update_mode = 0; #endif - idr_init(&adev->vm_manager.pasid_idr); - spin_lock_init(&adev->vm_manager.pasid_lock); + xa_init_flags(&adev->vm_manager.pasids, XA_FLAGS_LOCK_IRQ); } /** @@ -3271,8 +3241,8 @@ void amdgpu_vm_manager_init(struct amdgpu_device *adev) */ void amdgpu_vm_manager_fini(struct amdgpu_device *adev) { - WARN_ON(!idr_is_empty(&adev->vm_manager.pasid_idr)); - idr_destroy(&adev->vm_manager.pasid_idr); + WARN_ON(!xa_empty(&adev->vm_manager.pasids)); + xa_destroy(&adev->vm_manager.pasids); amdgpu_vmid_mgr_fini(adev); } @@ -3341,13 +3311,13 @@ void amdgpu_vm_get_task_info(struct amdgpu_device *adev, u32 pasid, struct amdgpu_vm *vm; unsigned long flags; - spin_lock_irqsave(&adev->vm_manager.pasid_lock, flags); + xa_lock_irqsave(&adev->vm_manager.pasids, flags); - vm = idr_find(&adev->vm_manager.pasid_idr, pasid); + vm = xa_load(&adev->vm_manager.pasids, pasid); if (vm) *task_info = vm->task_info; - spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags); + xa_unlock_irqrestore(&adev->vm_manager.pasids, flags); } /** @@ -3375,12 +3345,13 @@ void amdgpu_vm_set_task_info(struct amdgpu_vm *vm) * @adev: amdgpu device pointer * @pasid: PASID of the VM * @addr: Address of the fault + * @write_fault: true is write fault, false is read fault * * Try to gracefully handle a VM fault. Return true if the fault was handled and * shouldn't be reported any more. */ bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid, - uint64_t addr) + uint64_t addr, bool write_fault) { bool is_compute_context = false; struct amdgpu_bo *root; @@ -3389,15 +3360,15 @@ bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid, struct amdgpu_vm *vm; int r; - spin_lock_irqsave(&adev->vm_manager.pasid_lock, irqflags); - vm = idr_find(&adev->vm_manager.pasid_idr, pasid); + xa_lock_irqsave(&adev->vm_manager.pasids, irqflags); + vm = xa_load(&adev->vm_manager.pasids, pasid); if (vm) { root = amdgpu_bo_ref(vm->root.bo); is_compute_context = vm->is_compute_context; } else { root = NULL; } - spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, irqflags); + xa_unlock_irqrestore(&adev->vm_manager.pasids, irqflags); if (!root) return false; @@ -3405,7 +3376,7 @@ bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid, addr /= AMDGPU_GPU_PAGE_SIZE; if (is_compute_context && - !svm_range_restore_pages(adev, pasid, addr)) { + !svm_range_restore_pages(adev, pasid, addr, write_fault)) { amdgpu_bo_unref(&root); return true; } @@ -3415,11 +3386,11 @@ bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid, goto error_unref; /* Double check that the VM still exists */ - spin_lock_irqsave(&adev->vm_manager.pasid_lock, irqflags); - vm = idr_find(&adev->vm_manager.pasid_idr, pasid); + xa_lock_irqsave(&adev->vm_manager.pasids, irqflags); + vm = xa_load(&adev->vm_manager.pasids, pasid); if (vm && vm->root.bo != root) vm = NULL; - spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, irqflags); + xa_unlock_irqrestore(&adev->vm_manager.pasids, irqflags); if (!vm) goto error_unlock; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h index f8fa653d4da7..85fcfb8c5efd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h @@ -359,8 +359,7 @@ struct amdgpu_vm_manager { /* PASID to VM mapping, will be used in interrupt context to * look up VM of a page fault */ - struct idr pasid_idr; - spinlock_t pasid_lock; + struct xarray pasids; }; struct amdgpu_bo_va_mapping; @@ -375,9 +374,12 @@ extern const struct amdgpu_vm_update_funcs amdgpu_vm_sdma_funcs; void amdgpu_vm_manager_init(struct amdgpu_device *adev); void amdgpu_vm_manager_fini(struct amdgpu_device *adev); +int amdgpu_vm_set_pasid(struct amdgpu_device *adev, struct amdgpu_vm *vm, + u32 pasid); + long amdgpu_vm_wait_idle(struct amdgpu_vm *vm, long timeout); -int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, u32 pasid); -int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm, u32 pasid); +int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm); +int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm); void amdgpu_vm_release_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm); void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm); void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm, @@ -406,7 +408,7 @@ int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, struct dma_fence **fence, bool *free_table); int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va, - bool clear); + bool clear, bool *table_freed); bool amdgpu_vm_evictable(struct amdgpu_bo *bo); void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev, struct amdgpu_bo *bo, bool evicted); @@ -446,7 +448,7 @@ void amdgpu_vm_check_compute_bug(struct amdgpu_device *adev); void amdgpu_vm_get_task_info(struct amdgpu_device *adev, u32 pasid, struct amdgpu_task_info *task_info); bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid, - uint64_t addr); + uint64_t addr, bool write_fault); void amdgpu_vm_set_task_info(struct amdgpu_vm *vm); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c index 8567d5d77346..978ac927ac11 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c @@ -32,6 +32,10 @@ #include "wafl/wafl2_4_0_0_smn.h" #include "wafl/wafl2_4_0_0_sh_mask.h" +#define smnPCS_XGMI23_PCS_ERROR_STATUS 0x11a01210 +#define smnPCS_XGMI3X16_PCS_ERROR_STATUS 0x11a0020c +#define smnPCS_GOPX1_PCS_ERROR_STATUS 0x12200210 + static DEFINE_MUTEX(xgmi_mutex); #define AMDGPU_MAX_XGMI_DEVICE_PER_HIVE 4 @@ -63,6 +67,33 @@ static const int wafl_pcs_err_status_reg_arct[] = { smnPCS_GOPX1_0_PCS_GOPX1_PCS_ERROR_STATUS + 0x100000, }; +static const int xgmi23_pcs_err_status_reg_aldebaran[] = { + smnPCS_XGMI23_PCS_ERROR_STATUS, + smnPCS_XGMI23_PCS_ERROR_STATUS + 0x100000, + smnPCS_XGMI23_PCS_ERROR_STATUS + 0x200000, + smnPCS_XGMI23_PCS_ERROR_STATUS + 0x300000, + smnPCS_XGMI23_PCS_ERROR_STATUS + 0x400000, + smnPCS_XGMI23_PCS_ERROR_STATUS + 0x500000, + smnPCS_XGMI23_PCS_ERROR_STATUS + 0x600000, + smnPCS_XGMI23_PCS_ERROR_STATUS + 0x700000 +}; + +static const int xgmi3x16_pcs_err_status_reg_aldebaran[] = { + smnPCS_XGMI3X16_PCS_ERROR_STATUS, + smnPCS_XGMI3X16_PCS_ERROR_STATUS + 0x100000, + smnPCS_XGMI3X16_PCS_ERROR_STATUS + 0x200000, + smnPCS_XGMI3X16_PCS_ERROR_STATUS + 0x300000, + smnPCS_XGMI3X16_PCS_ERROR_STATUS + 0x400000, + smnPCS_XGMI3X16_PCS_ERROR_STATUS + 0x500000, + smnPCS_XGMI3X16_PCS_ERROR_STATUS + 0x600000, + smnPCS_XGMI3X16_PCS_ERROR_STATUS + 0x700000 +}; + +static const int walf_pcs_err_status_reg_aldebaran[] = { + smnPCS_GOPX1_PCS_ERROR_STATUS, + smnPCS_GOPX1_PCS_ERROR_STATUS + 0x100000 +}; + static const struct amdgpu_pcs_ras_field xgmi_pcs_ras_fields[] = { {"XGMI PCS DataLossErr", SOC15_REG_FIELD(XGMI0_PCS_GOPX16_PCS_ERROR_STATUS, DataLossErr)}, @@ -486,6 +517,44 @@ int amdgpu_xgmi_get_hops_count(struct amdgpu_device *adev, return -EINVAL; } +int amdgpu_xgmi_get_num_links(struct amdgpu_device *adev, + struct amdgpu_device *peer_adev) +{ + struct psp_xgmi_topology_info *top = &adev->psp.xgmi_context.top_info; + int i; + + for (i = 0 ; i < top->num_nodes; ++i) + if (top->nodes[i].node_id == peer_adev->gmc.xgmi.node_id) + return top->nodes[i].num_links; + return -EINVAL; +} + +/* + * Devices that support extended data require the entire hive to initialize with + * the shared memory buffer flag set. + * + * Hive locks and conditions apply - see amdgpu_xgmi_add_device + */ +static int amdgpu_xgmi_initialize_hive_get_data_partition(struct amdgpu_hive_info *hive, + bool set_extended_data) +{ + struct amdgpu_device *tmp_adev; + int ret; + + list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) { + ret = psp_xgmi_initialize(&tmp_adev->psp, set_extended_data, false); + if (ret) { + dev_err(tmp_adev->dev, + "XGMI: Failed to initialize xgmi session for data partition %i\n", + set_extended_data); + return ret; + } + + } + + return 0; +} + int amdgpu_xgmi_add_device(struct amdgpu_device *adev) { struct psp_xgmi_topology_info *top_info; @@ -500,7 +569,7 @@ int amdgpu_xgmi_add_device(struct amdgpu_device *adev) if (!adev->gmc.xgmi.pending_reset && amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_PSP)) { - ret = psp_xgmi_initialize(&adev->psp); + ret = psp_xgmi_initialize(&adev->psp, false, true); if (ret) { dev_err(adev->dev, "XGMI: Failed to initialize xgmi session\n"); @@ -563,7 +632,7 @@ int amdgpu_xgmi_add_device(struct amdgpu_device *adev) /* get latest topology info for each device from psp */ list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) { ret = psp_xgmi_get_topology_info(&tmp_adev->psp, count, - &tmp_adev->psp.xgmi_context.top_info); + &tmp_adev->psp.xgmi_context.top_info, false); if (ret) { dev_err(tmp_adev->dev, "XGMI: Get topology failure on device %llx, hive %llx, ret %d", @@ -573,6 +642,34 @@ int amdgpu_xgmi_add_device(struct amdgpu_device *adev) goto exit_unlock; } } + + /* get topology again for hives that support extended data */ + if (adev->psp.xgmi_context.supports_extended_data) { + + /* initialize the hive to get extended data. */ + ret = amdgpu_xgmi_initialize_hive_get_data_partition(hive, true); + if (ret) + goto exit_unlock; + + /* get the extended data. */ + list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) { + ret = psp_xgmi_get_topology_info(&tmp_adev->psp, count, + &tmp_adev->psp.xgmi_context.top_info, true); + if (ret) { + dev_err(tmp_adev->dev, + "XGMI: Get topology for extended data failure on device %llx, hive %llx, ret %d", + tmp_adev->gmc.xgmi.node_id, + tmp_adev->gmc.xgmi.hive_id, ret); + goto exit_unlock; + } + } + + /* initialize the hive to get non-extended data for the next round. */ + ret = amdgpu_xgmi_initialize_hive_get_data_partition(hive, false); + if (ret) + goto exit_unlock; + + } } if (!ret && !adev->gmc.xgmi.pending_reset) @@ -651,7 +748,6 @@ static int amdgpu_xgmi_ras_late_init(struct amdgpu_device *adev) adev->gmc.xgmi.ras_if->block = AMDGPU_RAS_BLOCK__XGMI_WAFL; adev->gmc.xgmi.ras_if->type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE; adev->gmc.xgmi.ras_if->sub_block_index = 0; - strcpy(adev->gmc.xgmi.ras_if->name, "xgmi_wafl"); } ih_info.head = fs_info.head = *adev->gmc.xgmi.ras_if; r = amdgpu_ras_late_init(adev, adev->gmc.xgmi.ras_if, @@ -706,6 +802,17 @@ static void amdgpu_xgmi_reset_ras_error_count(struct amdgpu_device *adev) pcs_clear_status(adev, xgmi_pcs_err_status_reg_vg20[i]); break; + case CHIP_ALDEBARAN: + for (i = 0; i < ARRAY_SIZE(xgmi23_pcs_err_status_reg_aldebaran); i++) + pcs_clear_status(adev, + xgmi23_pcs_err_status_reg_aldebaran[i]); + for (i = 0; i < ARRAY_SIZE(xgmi23_pcs_err_status_reg_aldebaran); i++) + pcs_clear_status(adev, + xgmi23_pcs_err_status_reg_aldebaran[i]); + for (i = 0; i < ARRAY_SIZE(walf_pcs_err_status_reg_aldebaran); i++) + pcs_clear_status(adev, + walf_pcs_err_status_reg_aldebaran[i]); + break; default: break; } @@ -783,7 +890,6 @@ static int amdgpu_xgmi_query_ras_error_count(struct amdgpu_device *adev, } break; case CHIP_VEGA20: - default: /* check xgmi pcs error */ for (i = 0; i < ARRAY_SIZE(xgmi_pcs_err_status_reg_vg20); i++) { data = RREG32_PCIE(xgmi_pcs_err_status_reg_vg20[i]); @@ -799,6 +905,32 @@ static int amdgpu_xgmi_query_ras_error_count(struct amdgpu_device *adev, data, &ue_cnt, &ce_cnt, false); } break; + case CHIP_ALDEBARAN: + /* check xgmi23 pcs error */ + for (i = 0; i < ARRAY_SIZE(xgmi23_pcs_err_status_reg_aldebaran); i++) { + data = RREG32_PCIE(xgmi23_pcs_err_status_reg_aldebaran[i]); + if (data) + amdgpu_xgmi_query_pcs_error_status(adev, + data, &ue_cnt, &ce_cnt, true); + } + /* check xgmi3x16 pcs error */ + for (i = 0; i < ARRAY_SIZE(xgmi3x16_pcs_err_status_reg_aldebaran); i++) { + data = RREG32_PCIE(xgmi3x16_pcs_err_status_reg_aldebaran[i]); + if (data) + amdgpu_xgmi_query_pcs_error_status(adev, + data, &ue_cnt, &ce_cnt, true); + } + /* check wafl pcs error */ + for (i = 0; i < ARRAY_SIZE(walf_pcs_err_status_reg_aldebaran); i++) { + data = RREG32_PCIE(walf_pcs_err_status_reg_aldebaran[i]); + if (data) + amdgpu_xgmi_query_pcs_error_status(adev, + data, &ue_cnt, &ce_cnt, false); + } + break; + default: + dev_warn(adev->dev, "XGMI RAS error query not supported"); + break; } adev->gmc.xgmi.ras_funcs->reset_ras_error_count(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h index 12969c0830d5..d2189bf7d428 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h @@ -59,6 +59,8 @@ int amdgpu_xgmi_remove_device(struct amdgpu_device *adev); int amdgpu_xgmi_set_pstate(struct amdgpu_device *adev, int pstate); int amdgpu_xgmi_get_hops_count(struct amdgpu_device *adev, struct amdgpu_device *peer_adev); +int amdgpu_xgmi_get_num_links(struct amdgpu_device *adev, + struct amdgpu_device *peer_adev); uint64_t amdgpu_xgmi_get_relative_phy_addr(struct amdgpu_device *adev, uint64_t addr); static inline bool amdgpu_xgmi_same_hive(struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/amdgpu/atombios_crtc.c b/drivers/gpu/drm/amd/amdgpu/atombios_crtc.c index 159a2a4385a1..afad094f84c2 100644 --- a/drivers/gpu/drm/amd/amdgpu/atombios_crtc.c +++ b/drivers/gpu/drm/amd/amdgpu/atombios_crtc.c @@ -851,7 +851,7 @@ void amdgpu_atombios_crtc_set_pll(struct drm_crtc *crtc, struct drm_display_mode pll->reference_div = amdgpu_crtc->pll_reference_div; pll->post_div = amdgpu_crtc->pll_post_div; - amdgpu_pll_compute(pll, amdgpu_crtc->adjusted_clock, &pll_clock, + amdgpu_pll_compute(adev, pll, amdgpu_crtc->adjusted_clock, &pll_clock, &fb_div, &frac_fb_div, &ref_div, &post_div); amdgpu_atombios_crtc_program_ss(adev, ATOM_DISABLE, amdgpu_crtc->pll_id, diff --git a/drivers/gpu/drm/amd/amdgpu/cik.c b/drivers/gpu/drm/amd/amdgpu/cik.c index c0fcc41ee574..54f28c075f21 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik.c +++ b/drivers/gpu/drm/amd/amdgpu/cik.c @@ -70,7 +70,7 @@ #include "amdgpu_dm.h" #include "amdgpu_amdkfd.h" -#include "dce_virtual.h" +#include "amdgpu_vkms.h" static const struct amdgpu_video_codec_info cik_video_codecs_encode_array[] = { @@ -2259,7 +2259,7 @@ int cik_set_ip_blocks(struct amdgpu_device *adev) amdgpu_device_ip_block_add(adev, &cik_sdma_ip_block); amdgpu_device_ip_block_add(adev, &pp_smu_ip_block); if (adev->enable_virtual_display) - amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block); + amdgpu_device_ip_block_add(adev, &amdgpu_vkms_ip_block); #if defined(CONFIG_DRM_AMD_DC) else if (amdgpu_device_has_dc_support(adev)) amdgpu_device_ip_block_add(adev, &dm_ip_block); @@ -2277,7 +2277,7 @@ int cik_set_ip_blocks(struct amdgpu_device *adev) amdgpu_device_ip_block_add(adev, &cik_sdma_ip_block); amdgpu_device_ip_block_add(adev, &pp_smu_ip_block); if (adev->enable_virtual_display) - amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block); + amdgpu_device_ip_block_add(adev, &amdgpu_vkms_ip_block); #if defined(CONFIG_DRM_AMD_DC) else if (amdgpu_device_has_dc_support(adev)) amdgpu_device_ip_block_add(adev, &dm_ip_block); @@ -2295,7 +2295,7 @@ int cik_set_ip_blocks(struct amdgpu_device *adev) amdgpu_device_ip_block_add(adev, &cik_sdma_ip_block); amdgpu_device_ip_block_add(adev, &kv_smu_ip_block); if (adev->enable_virtual_display) - amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block); + amdgpu_device_ip_block_add(adev, &amdgpu_vkms_ip_block); #if defined(CONFIG_DRM_AMD_DC) else if (amdgpu_device_has_dc_support(adev)) amdgpu_device_ip_block_add(adev, &dm_ip_block); @@ -2315,7 +2315,7 @@ int cik_set_ip_blocks(struct amdgpu_device *adev) amdgpu_device_ip_block_add(adev, &cik_sdma_ip_block); amdgpu_device_ip_block_add(adev, &kv_smu_ip_block); if (adev->enable_virtual_display) - amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block); + amdgpu_device_ip_block_add(adev, &amdgpu_vkms_ip_block); #if defined(CONFIG_DRM_AMD_DC) else if (amdgpu_device_has_dc_support(adev)) amdgpu_device_ip_block_add(adev, &dm_ip_block); diff --git a/drivers/gpu/drm/amd/amdgpu/cyan_skillfish_reg_init.c b/drivers/gpu/drm/amd/amdgpu/cyan_skillfish_reg_init.c new file mode 100644 index 000000000000..58808814d8fb --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/cyan_skillfish_reg_init.c @@ -0,0 +1,51 @@ +/* + * Copyright 2018 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#include "amdgpu.h" +#include "nv.h" + +#include "soc15_common.h" +#include "soc15_hw_ip.h" +#include "cyan_skillfish_ip_offset.h" + +int cyan_skillfish_reg_base_init(struct amdgpu_device *adev) +{ + /* HW has more IP blocks, only initialized the blocke needed by driver */ + uint32_t i; + for (i = 0 ; i < MAX_INSTANCE ; ++i) { + adev->reg_offset[GC_HWIP][i] = (uint32_t *)(&(GC_BASE.instance[i])); + adev->reg_offset[HDP_HWIP][i] = (uint32_t *)(&(HDP_BASE.instance[i])); + adev->reg_offset[MMHUB_HWIP][i] = (uint32_t *)(&(MMHUB_BASE.instance[i])); + adev->reg_offset[ATHUB_HWIP][i] = (uint32_t *)(&(ATHUB_BASE.instance[i])); + adev->reg_offset[NBIO_HWIP][i] = (uint32_t *)(&(NBIO_BASE.instance[i])); + adev->reg_offset[MP0_HWIP][i] = (uint32_t *)(&(MP0_BASE.instance[i])); + adev->reg_offset[MP1_HWIP][i] = (uint32_t *)(&(MP1_BASE.instance[i])); + adev->reg_offset[VCN_HWIP][i] = (uint32_t *)(&(UVD0_BASE.instance[i])); + adev->reg_offset[DF_HWIP][i] = (uint32_t *)(&(DF_BASE.instance[i])); + adev->reg_offset[DCE_HWIP][i] = (uint32_t *)(&(DMU_BASE.instance[i])); + adev->reg_offset[OSSSYS_HWIP][i] = (uint32_t *)(&(OSSSYS_BASE.instance[i])); + adev->reg_offset[SDMA0_HWIP][i] = (uint32_t *)(&(GC_BASE.instance[i])); + adev->reg_offset[SDMA1_HWIP][i] = (uint32_t *)(&(GC_BASE.instance[i])); + adev->reg_offset[SMUIO_HWIP][i] = (uint32_t *)(&(SMUIO_BASE.instance[i])); + } + return 0; +} diff --git a/drivers/gpu/drm/amd/amdgpu/dce_virtual.c b/drivers/gpu/drm/amd/amdgpu/dce_virtual.c deleted file mode 100644 index 7e0d8c092c7e..000000000000 --- a/drivers/gpu/drm/amd/amdgpu/dce_virtual.c +++ /dev/null @@ -1,780 +0,0 @@ -/* - * Copyright 2014 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - */ - -#include <drm/drm_vblank.h> - -#include "amdgpu.h" -#include "amdgpu_pm.h" -#include "amdgpu_i2c.h" -#include "atom.h" -#include "amdgpu_pll.h" -#include "amdgpu_connectors.h" -#ifdef CONFIG_DRM_AMDGPU_SI -#include "dce_v6_0.h" -#endif -#ifdef CONFIG_DRM_AMDGPU_CIK -#include "dce_v8_0.h" -#endif -#include "dce_v10_0.h" -#include "dce_v11_0.h" -#include "dce_virtual.h" -#include "ivsrcid/ivsrcid_vislands30.h" -#include "amdgpu_display.h" - -#define DCE_VIRTUAL_VBLANK_PERIOD 16666666 - - -static void dce_virtual_set_display_funcs(struct amdgpu_device *adev); -static void dce_virtual_set_irq_funcs(struct amdgpu_device *adev); -static int dce_virtual_connector_encoder_init(struct amdgpu_device *adev, - int index); -static int dce_virtual_pageflip(struct amdgpu_device *adev, - unsigned crtc_id); -static enum hrtimer_restart dce_virtual_vblank_timer_handle(struct hrtimer *vblank_timer); -static void dce_virtual_set_crtc_vblank_interrupt_state(struct amdgpu_device *adev, - int crtc, - enum amdgpu_interrupt_state state); - -static u32 dce_virtual_vblank_get_counter(struct amdgpu_device *adev, int crtc) -{ - return 0; -} - -static void dce_virtual_page_flip(struct amdgpu_device *adev, - int crtc_id, u64 crtc_base, bool async) -{ - return; -} - -static int dce_virtual_crtc_get_scanoutpos(struct amdgpu_device *adev, int crtc, - u32 *vbl, u32 *position) -{ - *vbl = 0; - *position = 0; - - return -EINVAL; -} - -static bool dce_virtual_hpd_sense(struct amdgpu_device *adev, - enum amdgpu_hpd_id hpd) -{ - return true; -} - -static void dce_virtual_hpd_set_polarity(struct amdgpu_device *adev, - enum amdgpu_hpd_id hpd) -{ - return; -} - -static u32 dce_virtual_hpd_get_gpio_reg(struct amdgpu_device *adev) -{ - return 0; -} - -/** - * dce_virtual_bandwidth_update - program display watermarks - * - * @adev: amdgpu_device pointer - * - * Calculate and program the display watermarks and line - * buffer allocation (CIK). - */ -static void dce_virtual_bandwidth_update(struct amdgpu_device *adev) -{ - return; -} - -static int dce_virtual_crtc_gamma_set(struct drm_crtc *crtc, u16 *red, - u16 *green, u16 *blue, uint32_t size, - struct drm_modeset_acquire_ctx *ctx) -{ - return 0; -} - -static void dce_virtual_crtc_destroy(struct drm_crtc *crtc) -{ - struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc); - - drm_crtc_cleanup(crtc); - kfree(amdgpu_crtc); -} - -static const struct drm_crtc_funcs dce_virtual_crtc_funcs = { - .cursor_set2 = NULL, - .cursor_move = NULL, - .gamma_set = dce_virtual_crtc_gamma_set, - .set_config = amdgpu_display_crtc_set_config, - .destroy = dce_virtual_crtc_destroy, - .page_flip_target = amdgpu_display_crtc_page_flip_target, - .get_vblank_counter = amdgpu_get_vblank_counter_kms, - .enable_vblank = amdgpu_enable_vblank_kms, - .disable_vblank = amdgpu_disable_vblank_kms, - .get_vblank_timestamp = drm_crtc_vblank_helper_get_vblank_timestamp, -}; - -static void dce_virtual_crtc_dpms(struct drm_crtc *crtc, int mode) -{ - struct drm_device *dev = crtc->dev; - struct amdgpu_device *adev = drm_to_adev(dev); - struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc); - unsigned type; - - switch (mode) { - case DRM_MODE_DPMS_ON: - amdgpu_crtc->enabled = true; - /* Make sure VBLANK interrupts are still enabled */ - type = amdgpu_display_crtc_idx_to_irq_type(adev, - amdgpu_crtc->crtc_id); - amdgpu_irq_update(adev, &adev->crtc_irq, type); - drm_crtc_vblank_on(crtc); - break; - case DRM_MODE_DPMS_STANDBY: - case DRM_MODE_DPMS_SUSPEND: - case DRM_MODE_DPMS_OFF: - drm_crtc_vblank_off(crtc); - amdgpu_crtc->enabled = false; - break; - } -} - - -static void dce_virtual_crtc_prepare(struct drm_crtc *crtc) -{ - dce_virtual_crtc_dpms(crtc, DRM_MODE_DPMS_OFF); -} - -static void dce_virtual_crtc_commit(struct drm_crtc *crtc) -{ - dce_virtual_crtc_dpms(crtc, DRM_MODE_DPMS_ON); -} - -static void dce_virtual_crtc_disable(struct drm_crtc *crtc) -{ - struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc); - struct drm_device *dev = crtc->dev; - - if (dev->num_crtcs) - drm_crtc_vblank_off(crtc); - - amdgpu_crtc->enabled = false; - amdgpu_crtc->pll_id = ATOM_PPLL_INVALID; - amdgpu_crtc->encoder = NULL; - amdgpu_crtc->connector = NULL; -} - -static int dce_virtual_crtc_mode_set(struct drm_crtc *crtc, - struct drm_display_mode *mode, - struct drm_display_mode *adjusted_mode, - int x, int y, struct drm_framebuffer *old_fb) -{ - struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc); - - /* update the hw version fpr dpm */ - amdgpu_crtc->hw_mode = *adjusted_mode; - - return 0; -} - -static bool dce_virtual_crtc_mode_fixup(struct drm_crtc *crtc, - const struct drm_display_mode *mode, - struct drm_display_mode *adjusted_mode) -{ - return true; -} - - -static int dce_virtual_crtc_set_base(struct drm_crtc *crtc, int x, int y, - struct drm_framebuffer *old_fb) -{ - return 0; -} - -static int dce_virtual_crtc_set_base_atomic(struct drm_crtc *crtc, - struct drm_framebuffer *fb, - int x, int y, enum mode_set_atomic state) -{ - return 0; -} - -static const struct drm_crtc_helper_funcs dce_virtual_crtc_helper_funcs = { - .dpms = dce_virtual_crtc_dpms, - .mode_fixup = dce_virtual_crtc_mode_fixup, - .mode_set = dce_virtual_crtc_mode_set, - .mode_set_base = dce_virtual_crtc_set_base, - .mode_set_base_atomic = dce_virtual_crtc_set_base_atomic, - .prepare = dce_virtual_crtc_prepare, - .commit = dce_virtual_crtc_commit, - .disable = dce_virtual_crtc_disable, - .get_scanout_position = amdgpu_crtc_get_scanout_position, -}; - -static int dce_virtual_crtc_init(struct amdgpu_device *adev, int index) -{ - struct amdgpu_crtc *amdgpu_crtc; - - amdgpu_crtc = kzalloc(sizeof(struct amdgpu_crtc) + - (AMDGPUFB_CONN_LIMIT * sizeof(struct drm_connector *)), GFP_KERNEL); - if (amdgpu_crtc == NULL) - return -ENOMEM; - - drm_crtc_init(adev_to_drm(adev), &amdgpu_crtc->base, &dce_virtual_crtc_funcs); - - drm_mode_crtc_set_gamma_size(&amdgpu_crtc->base, 256); - amdgpu_crtc->crtc_id = index; - adev->mode_info.crtcs[index] = amdgpu_crtc; - - amdgpu_crtc->pll_id = ATOM_PPLL_INVALID; - amdgpu_crtc->encoder = NULL; - amdgpu_crtc->connector = NULL; - amdgpu_crtc->vsync_timer_enabled = AMDGPU_IRQ_STATE_DISABLE; - drm_crtc_helper_add(&amdgpu_crtc->base, &dce_virtual_crtc_helper_funcs); - - hrtimer_init(&amdgpu_crtc->vblank_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); - hrtimer_set_expires(&amdgpu_crtc->vblank_timer, DCE_VIRTUAL_VBLANK_PERIOD); - amdgpu_crtc->vblank_timer.function = dce_virtual_vblank_timer_handle; - hrtimer_start(&amdgpu_crtc->vblank_timer, - DCE_VIRTUAL_VBLANK_PERIOD, HRTIMER_MODE_REL); - return 0; -} - -static int dce_virtual_early_init(void *handle) -{ - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - - dce_virtual_set_display_funcs(adev); - dce_virtual_set_irq_funcs(adev); - - adev->mode_info.num_hpd = 1; - adev->mode_info.num_dig = 1; - return 0; -} - -static struct drm_encoder * -dce_virtual_encoder(struct drm_connector *connector) -{ - struct drm_encoder *encoder; - - drm_connector_for_each_possible_encoder(connector, encoder) { - if (encoder->encoder_type == DRM_MODE_ENCODER_VIRTUAL) - return encoder; - } - - /* pick the first one */ - drm_connector_for_each_possible_encoder(connector, encoder) - return encoder; - - return NULL; -} - -static int dce_virtual_get_modes(struct drm_connector *connector) -{ - struct drm_device *dev = connector->dev; - struct drm_display_mode *mode = NULL; - unsigned i; - static const struct mode_size { - int w; - int h; - } common_modes[] = { - { 640, 480}, - { 720, 480}, - { 800, 600}, - { 848, 480}, - {1024, 768}, - {1152, 768}, - {1280, 720}, - {1280, 800}, - {1280, 854}, - {1280, 960}, - {1280, 1024}, - {1440, 900}, - {1400, 1050}, - {1680, 1050}, - {1600, 1200}, - {1920, 1080}, - {1920, 1200}, - {2560, 1440}, - {4096, 3112}, - {3656, 2664}, - {3840, 2160}, - {4096, 2160}, - }; - - for (i = 0; i < ARRAY_SIZE(common_modes); i++) { - mode = drm_cvt_mode(dev, common_modes[i].w, common_modes[i].h, 60, false, false, false); - drm_mode_probed_add(connector, mode); - } - - return 0; -} - -static enum drm_mode_status dce_virtual_mode_valid(struct drm_connector *connector, - struct drm_display_mode *mode) -{ - return MODE_OK; -} - -static int -dce_virtual_dpms(struct drm_connector *connector, int mode) -{ - return 0; -} - -static int -dce_virtual_set_property(struct drm_connector *connector, - struct drm_property *property, - uint64_t val) -{ - return 0; -} - -static void dce_virtual_destroy(struct drm_connector *connector) -{ - drm_connector_unregister(connector); - drm_connector_cleanup(connector); - kfree(connector); -} - -static void dce_virtual_force(struct drm_connector *connector) -{ - return; -} - -static const struct drm_connector_helper_funcs dce_virtual_connector_helper_funcs = { - .get_modes = dce_virtual_get_modes, - .mode_valid = dce_virtual_mode_valid, - .best_encoder = dce_virtual_encoder, -}; - -static const struct drm_connector_funcs dce_virtual_connector_funcs = { - .dpms = dce_virtual_dpms, - .fill_modes = drm_helper_probe_single_connector_modes, - .set_property = dce_virtual_set_property, - .destroy = dce_virtual_destroy, - .force = dce_virtual_force, -}; - -static int dce_virtual_sw_init(void *handle) -{ - int r, i; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - - r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_SMU_DISP_TIMER2_TRIGGER, &adev->crtc_irq); - if (r) - return r; - - adev_to_drm(adev)->max_vblank_count = 0; - - adev_to_drm(adev)->mode_config.funcs = &amdgpu_mode_funcs; - - adev_to_drm(adev)->mode_config.max_width = 16384; - adev_to_drm(adev)->mode_config.max_height = 16384; - - adev_to_drm(adev)->mode_config.preferred_depth = 24; - adev_to_drm(adev)->mode_config.prefer_shadow = 1; - - adev_to_drm(adev)->mode_config.fb_base = adev->gmc.aper_base; - - r = amdgpu_display_modeset_create_props(adev); - if (r) - return r; - - adev_to_drm(adev)->mode_config.max_width = 16384; - adev_to_drm(adev)->mode_config.max_height = 16384; - - /* allocate crtcs, encoders, connectors */ - for (i = 0; i < adev->mode_info.num_crtc; i++) { - r = dce_virtual_crtc_init(adev, i); - if (r) - return r; - r = dce_virtual_connector_encoder_init(adev, i); - if (r) - return r; - } - - drm_kms_helper_poll_init(adev_to_drm(adev)); - - adev->mode_info.mode_config_initialized = true; - return 0; -} - -static int dce_virtual_sw_fini(void *handle) -{ - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - int i = 0; - - for (i = 0; i < adev->mode_info.num_crtc; i++) - if (adev->mode_info.crtcs[i]) - hrtimer_cancel(&adev->mode_info.crtcs[i]->vblank_timer); - - kfree(adev->mode_info.bios_hardcoded_edid); - - drm_kms_helper_poll_fini(adev_to_drm(adev)); - - drm_mode_config_cleanup(adev_to_drm(adev)); - /* clear crtcs pointer to avoid dce irq finish routine access freed data */ - memset(adev->mode_info.crtcs, 0, sizeof(adev->mode_info.crtcs[0]) * AMDGPU_MAX_CRTCS); - adev->mode_info.mode_config_initialized = false; - return 0; -} - -static int dce_virtual_hw_init(void *handle) -{ - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - - switch (adev->asic_type) { -#ifdef CONFIG_DRM_AMDGPU_SI - case CHIP_TAHITI: - case CHIP_PITCAIRN: - case CHIP_VERDE: - case CHIP_OLAND: - dce_v6_0_disable_dce(adev); - break; -#endif -#ifdef CONFIG_DRM_AMDGPU_CIK - case CHIP_BONAIRE: - case CHIP_HAWAII: - case CHIP_KAVERI: - case CHIP_KABINI: - case CHIP_MULLINS: - dce_v8_0_disable_dce(adev); - break; -#endif - case CHIP_FIJI: - case CHIP_TONGA: - dce_v10_0_disable_dce(adev); - break; - case CHIP_CARRIZO: - case CHIP_STONEY: - case CHIP_POLARIS10: - case CHIP_POLARIS11: - case CHIP_VEGAM: - dce_v11_0_disable_dce(adev); - break; - case CHIP_TOPAZ: -#ifdef CONFIG_DRM_AMDGPU_SI - case CHIP_HAINAN: -#endif - /* no DCE */ - break; - default: - break; - } - return 0; -} - -static int dce_virtual_hw_fini(void *handle) -{ - return 0; -} - -static int dce_virtual_suspend(void *handle) -{ - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - int r; - - r = amdgpu_display_suspend_helper(adev); - if (r) - return r; - return dce_virtual_hw_fini(handle); -} - -static int dce_virtual_resume(void *handle) -{ - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - int r; - - r = dce_virtual_hw_init(handle); - if (r) - return r; - return amdgpu_display_resume_helper(adev); -} - -static bool dce_virtual_is_idle(void *handle) -{ - return true; -} - -static int dce_virtual_wait_for_idle(void *handle) -{ - return 0; -} - -static int dce_virtual_soft_reset(void *handle) -{ - return 0; -} - -static int dce_virtual_set_clockgating_state(void *handle, - enum amd_clockgating_state state) -{ - return 0; -} - -static int dce_virtual_set_powergating_state(void *handle, - enum amd_powergating_state state) -{ - return 0; -} - -static const struct amd_ip_funcs dce_virtual_ip_funcs = { - .name = "dce_virtual", - .early_init = dce_virtual_early_init, - .late_init = NULL, - .sw_init = dce_virtual_sw_init, - .sw_fini = dce_virtual_sw_fini, - .hw_init = dce_virtual_hw_init, - .hw_fini = dce_virtual_hw_fini, - .suspend = dce_virtual_suspend, - .resume = dce_virtual_resume, - .is_idle = dce_virtual_is_idle, - .wait_for_idle = dce_virtual_wait_for_idle, - .soft_reset = dce_virtual_soft_reset, - .set_clockgating_state = dce_virtual_set_clockgating_state, - .set_powergating_state = dce_virtual_set_powergating_state, -}; - -/* these are handled by the primary encoders */ -static void dce_virtual_encoder_prepare(struct drm_encoder *encoder) -{ - return; -} - -static void dce_virtual_encoder_commit(struct drm_encoder *encoder) -{ - return; -} - -static void -dce_virtual_encoder_mode_set(struct drm_encoder *encoder, - struct drm_display_mode *mode, - struct drm_display_mode *adjusted_mode) -{ - return; -} - -static void dce_virtual_encoder_disable(struct drm_encoder *encoder) -{ - return; -} - -static void -dce_virtual_encoder_dpms(struct drm_encoder *encoder, int mode) -{ - return; -} - -static bool dce_virtual_encoder_mode_fixup(struct drm_encoder *encoder, - const struct drm_display_mode *mode, - struct drm_display_mode *adjusted_mode) -{ - return true; -} - -static const struct drm_encoder_helper_funcs dce_virtual_encoder_helper_funcs = { - .dpms = dce_virtual_encoder_dpms, - .mode_fixup = dce_virtual_encoder_mode_fixup, - .prepare = dce_virtual_encoder_prepare, - .mode_set = dce_virtual_encoder_mode_set, - .commit = dce_virtual_encoder_commit, - .disable = dce_virtual_encoder_disable, -}; - -static void dce_virtual_encoder_destroy(struct drm_encoder *encoder) -{ - drm_encoder_cleanup(encoder); - kfree(encoder); -} - -static const struct drm_encoder_funcs dce_virtual_encoder_funcs = { - .destroy = dce_virtual_encoder_destroy, -}; - -static int dce_virtual_connector_encoder_init(struct amdgpu_device *adev, - int index) -{ - struct drm_encoder *encoder; - struct drm_connector *connector; - - /* add a new encoder */ - encoder = kzalloc(sizeof(struct drm_encoder), GFP_KERNEL); - if (!encoder) - return -ENOMEM; - encoder->possible_crtcs = 1 << index; - drm_encoder_init(adev_to_drm(adev), encoder, &dce_virtual_encoder_funcs, - DRM_MODE_ENCODER_VIRTUAL, NULL); - drm_encoder_helper_add(encoder, &dce_virtual_encoder_helper_funcs); - - connector = kzalloc(sizeof(struct drm_connector), GFP_KERNEL); - if (!connector) { - kfree(encoder); - return -ENOMEM; - } - - /* add a new connector */ - drm_connector_init(adev_to_drm(adev), connector, &dce_virtual_connector_funcs, - DRM_MODE_CONNECTOR_VIRTUAL); - drm_connector_helper_add(connector, &dce_virtual_connector_helper_funcs); - connector->display_info.subpixel_order = SubPixelHorizontalRGB; - connector->interlace_allowed = false; - connector->doublescan_allowed = false; - - /* link them */ - drm_connector_attach_encoder(connector, encoder); - - return 0; -} - -static const struct amdgpu_display_funcs dce_virtual_display_funcs = { - .bandwidth_update = &dce_virtual_bandwidth_update, - .vblank_get_counter = &dce_virtual_vblank_get_counter, - .backlight_set_level = NULL, - .backlight_get_level = NULL, - .hpd_sense = &dce_virtual_hpd_sense, - .hpd_set_polarity = &dce_virtual_hpd_set_polarity, - .hpd_get_gpio_reg = &dce_virtual_hpd_get_gpio_reg, - .page_flip = &dce_virtual_page_flip, - .page_flip_get_scanoutpos = &dce_virtual_crtc_get_scanoutpos, - .add_encoder = NULL, - .add_connector = NULL, -}; - -static void dce_virtual_set_display_funcs(struct amdgpu_device *adev) -{ - adev->mode_info.funcs = &dce_virtual_display_funcs; -} - -static int dce_virtual_pageflip(struct amdgpu_device *adev, - unsigned crtc_id) -{ - unsigned long flags; - struct amdgpu_crtc *amdgpu_crtc; - struct amdgpu_flip_work *works; - - amdgpu_crtc = adev->mode_info.crtcs[crtc_id]; - - if (crtc_id >= adev->mode_info.num_crtc) { - DRM_ERROR("invalid pageflip crtc %d\n", crtc_id); - return -EINVAL; - } - - /* IRQ could occur when in initial stage */ - if (amdgpu_crtc == NULL) - return 0; - - spin_lock_irqsave(&adev_to_drm(adev)->event_lock, flags); - works = amdgpu_crtc->pflip_works; - if (amdgpu_crtc->pflip_status != AMDGPU_FLIP_SUBMITTED) { - DRM_DEBUG_DRIVER("amdgpu_crtc->pflip_status = %d != " - "AMDGPU_FLIP_SUBMITTED(%d)\n", - amdgpu_crtc->pflip_status, - AMDGPU_FLIP_SUBMITTED); - spin_unlock_irqrestore(&adev_to_drm(adev)->event_lock, flags); - return 0; - } - - /* page flip completed. clean up */ - amdgpu_crtc->pflip_status = AMDGPU_FLIP_NONE; - amdgpu_crtc->pflip_works = NULL; - - /* wakeup usersapce */ - if (works->event) - drm_crtc_send_vblank_event(&amdgpu_crtc->base, works->event); - - spin_unlock_irqrestore(&adev_to_drm(adev)->event_lock, flags); - - drm_crtc_vblank_put(&amdgpu_crtc->base); - amdgpu_bo_unref(&works->old_abo); - kfree(works->shared); - kfree(works); - - return 0; -} - -static enum hrtimer_restart dce_virtual_vblank_timer_handle(struct hrtimer *vblank_timer) -{ - struct amdgpu_crtc *amdgpu_crtc = container_of(vblank_timer, - struct amdgpu_crtc, vblank_timer); - struct drm_device *ddev = amdgpu_crtc->base.dev; - struct amdgpu_device *adev = drm_to_adev(ddev); - struct amdgpu_irq_src *source = adev->irq.client[AMDGPU_IRQ_CLIENTID_LEGACY].sources - [VISLANDS30_IV_SRCID_SMU_DISP_TIMER2_TRIGGER]; - int irq_type = amdgpu_display_crtc_idx_to_irq_type(adev, - amdgpu_crtc->crtc_id); - - if (amdgpu_irq_enabled(adev, source, irq_type)) { - drm_handle_vblank(ddev, amdgpu_crtc->crtc_id); - dce_virtual_pageflip(adev, amdgpu_crtc->crtc_id); - } - hrtimer_start(vblank_timer, DCE_VIRTUAL_VBLANK_PERIOD, - HRTIMER_MODE_REL); - - return HRTIMER_NORESTART; -} - -static void dce_virtual_set_crtc_vblank_interrupt_state(struct amdgpu_device *adev, - int crtc, - enum amdgpu_interrupt_state state) -{ - if (crtc >= adev->mode_info.num_crtc || !adev->mode_info.crtcs[crtc]) { - DRM_DEBUG("invalid crtc %d\n", crtc); - return; - } - - adev->mode_info.crtcs[crtc]->vsync_timer_enabled = state; - DRM_DEBUG("[FM]set crtc %d vblank interrupt state %d\n", crtc, state); -} - - -static int dce_virtual_set_crtc_irq_state(struct amdgpu_device *adev, - struct amdgpu_irq_src *source, - unsigned type, - enum amdgpu_interrupt_state state) -{ - if (type > AMDGPU_CRTC_IRQ_VBLANK6) - return -EINVAL; - - dce_virtual_set_crtc_vblank_interrupt_state(adev, type, state); - - return 0; -} - -static const struct amdgpu_irq_src_funcs dce_virtual_crtc_irq_funcs = { - .set = dce_virtual_set_crtc_irq_state, - .process = NULL, -}; - -static void dce_virtual_set_irq_funcs(struct amdgpu_device *adev) -{ - adev->crtc_irq.num_types = adev->mode_info.num_crtc; - adev->crtc_irq.funcs = &dce_virtual_crtc_irq_funcs; -} - -const struct amdgpu_ip_block_version dce_virtual_ip_block = -{ - .type = AMD_IP_BLOCK_TYPE_DCE, - .major = 1, - .minor = 0, - .rev = 0, - .funcs = &dce_virtual_ip_funcs, -}; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index a64b2c706090..16dbe593cba2 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -56,6 +56,10 @@ #define GFX10_NUM_GFX_RINGS_Sienna_Cichlid 1 #define GFX10_MEC_HPD_SIZE 2048 +#define RLCG_VFGATE_DISABLED 0x4000000 +#define RLCG_WRONG_OPERATION_TYPE 0x2000000 +#define RLCG_NOT_IN_RANGE 0x1000000 + #define F32_CE_PROGRAM_RAM_SIZE 65536 #define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L @@ -181,6 +185,9 @@ #define GFX_RLCG_GC_READ (0x1 << 28) #define GFX_RLCG_MMHUB_WRITE (0x2 << 28) +#define RLCG_ERROR_REPORT_ENABLED(adev) \ + (amdgpu_sriov_reg_indirect_mmhub(adev) || amdgpu_sriov_reg_indirect_gc(adev)) + MODULE_FIRMWARE("amdgpu/navi10_ce.bin"); MODULE_FIRMWARE("amdgpu/navi10_pfp.bin"); MODULE_FIRMWARE("amdgpu/navi10_me.bin"); @@ -249,6 +256,39 @@ MODULE_FIRMWARE("amdgpu/yellow_carp_mec.bin"); MODULE_FIRMWARE("amdgpu/yellow_carp_mec2.bin"); MODULE_FIRMWARE("amdgpu/yellow_carp_rlc.bin"); +MODULE_FIRMWARE("amdgpu/cyan_skillfish_ce.bin"); +MODULE_FIRMWARE("amdgpu/cyan_skillfish_pfp.bin"); +MODULE_FIRMWARE("amdgpu/cyan_skillfish_me.bin"); +MODULE_FIRMWARE("amdgpu/cyan_skillfish_mec.bin"); +MODULE_FIRMWARE("amdgpu/cyan_skillfish_mec2.bin"); +MODULE_FIRMWARE("amdgpu/cyan_skillfish_rlc.bin"); + +MODULE_FIRMWARE("amdgpu/cyan_skillfish2_ce.bin"); +MODULE_FIRMWARE("amdgpu/cyan_skillfish2_pfp.bin"); +MODULE_FIRMWARE("amdgpu/cyan_skillfish2_me.bin"); +MODULE_FIRMWARE("amdgpu/cyan_skillfish2_mec.bin"); +MODULE_FIRMWARE("amdgpu/cyan_skillfish2_mec2.bin"); +MODULE_FIRMWARE("amdgpu/cyan_skillfish2_rlc.bin"); + +static const struct soc15_reg_golden golden_settings_gc_10_0[] = +{ + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_INDEX, 0xffffffff, 0x00000000), + /* TA_GRAD_ADJ_UCONFIG -> TA_GRAD_ADJ */ + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_DATA, 0xffffffff, 0x2544c382), + /* VGT_TF_RING_SIZE_UMD -> VGT_TF_RING_SIZE */ + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_DATA, 0xffffffff, 0x2262c24e), + /* VGT_HS_OFFCHIP_PARAM_UMD -> VGT_HS_OFFCHIP_PARAM */ + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_DATA, 0xffffffff, 0x226cc24f), + /* VGT_TF_MEMORY_BASE_UMD -> VGT_TF_MEMORY_BASE */ + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_DATA, 0xffffffff, 0x226ec250), + /* VGT_TF_MEMORY_BASE_HI_UMD -> VGT_TF_MEMORY_BASE_HI */ + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_DATA, 0xffffffff, 0x2278c261), + /* VGT_ESGS_RING_SIZE_UMD -> VGT_ESGS_RING_SIZE */ + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_DATA, 0xffffffff, 0x2232c240), + /* VGT_GSVS_RING_SIZE_UMD -> VGT_GSVS_RING_SIZE */ + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_DATA, 0xffffffff, 0x2233c241), +}; + static const struct soc15_reg_golden golden_settings_gc_10_1[] = { SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_4, 0xffffffff, 0x00400014), @@ -1486,6 +1526,7 @@ static u32 gfx_v10_rlcg_rw(struct amdgpu_device *adev, u32 offset, u32 v, uint32 uint32_t i = 0; uint32_t retries = 50000; u32 ret = 0; + u32 tmp; scratch_reg0 = adev->rmmio + (adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG0_BASE_IDX] + mmSCRATCH_REG0) * 4; @@ -1519,9 +1560,8 @@ static u32 gfx_v10_rlcg_rw(struct amdgpu_device *adev, u32 offset, u32 v, uint32 writel(v, scratch_reg0); writel(offset | flag, scratch_reg1); writel(1, spare_int); - for (i = 0; i < retries; i++) { - u32 tmp; + for (i = 0; i < retries; i++) { tmp = readl(scratch_reg1); if (!(tmp & flag)) break; @@ -1529,8 +1569,19 @@ static u32 gfx_v10_rlcg_rw(struct amdgpu_device *adev, u32 offset, u32 v, uint32 udelay(10); } - if (i >= retries) - pr_err("timeout: rlcg program reg:0x%05x failed !\n", offset); + if (i >= retries) { + if (RLCG_ERROR_REPORT_ENABLED(adev)) { + if (tmp & RLCG_VFGATE_DISABLED) + pr_err("The vfgate is disabled, program reg:0x%05x failed!\n", offset); + else if (tmp & RLCG_WRONG_OPERATION_TYPE) + pr_err("Wrong operation type, program reg:0x%05x failed!\n", offset); + else if (tmp & RLCG_NOT_IN_RANGE) + pr_err("The register is not in range, program reg:0x%05x failed!\n", offset); + else + pr_err("Unknown error type, program reg:0x%05x failed!\n", offset); + } else + pr_err("timeout: rlcg program reg:0x%05x failed!\n", offset); + } } ret = readl(scratch_reg0); @@ -1538,7 +1589,7 @@ static u32 gfx_v10_rlcg_rw(struct amdgpu_device *adev, u32 offset, u32 v, uint32 return ret; } -static void gfx_v10_rlcg_wreg(struct amdgpu_device *adev, u32 offset, u32 value, u32 acc_flags, u32 hwip) +static void gfx_v10_sriov_wreg(struct amdgpu_device *adev, u32 offset, u32 value, u32 acc_flags, u32 hwip) { u32 rlcg_flag; @@ -1554,7 +1605,7 @@ static void gfx_v10_rlcg_wreg(struct amdgpu_device *adev, u32 offset, u32 value, WREG32(offset, value); } -static u32 gfx_v10_rlcg_rreg(struct amdgpu_device *adev, u32 offset, u32 acc_flags, u32 hwip) +static u32 gfx_v10_sriov_rreg(struct amdgpu_device *adev, u32 offset, u32 acc_flags, u32 hwip) { u32 rlcg_flag; @@ -3488,12 +3539,51 @@ static const struct soc15_reg_golden golden_settings_gc_10_3_5[] = { SOC15_REG_GOLDEN_VALUE(GC, 0, mmUTCL1_CTRL, 0xffbfffff, 0x00a00000) }; +static const struct soc15_reg_golden golden_settings_gc_10_0_cyan_skillfish[] = { + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGE_FAST_CLKS, 0x3fffffff, 0x0000493e), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_CPF_CLK_CTRL, 0xfcff8fff, 0xf8000100), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_CLK_CTRL, 0xff7f0fff, 0x3c000100), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0xa0000000, 0xa0000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_4, 0x00008000, 0x003c8014), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCH_DRAM_BURST_CTRL, 0x00000010, 0x00000017), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCH_PIPE_STEER, 0xffffffff, 0xd8d8d8d8), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCH_VC5_ENABLE, 0x00000003, 0x00000003), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_SD_CNTL, 0x800007ff, 0x000005ff), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG, 0xffffffff, 0x20000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0xffffffff, 0x00000200), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG4, 0xffffffff, 0x04800000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_LAST_OF_BURST_CONFIG, 0xffffffff, 0x03860210), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0x0c1800ff, 0x00000044), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCR_GENERAL_CNTL, 0x00009d00, 0x00008500), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCMC_VM_CACHEABLE_DRAM_ADDRESS_END, 0xffffffff, 0x000fffff), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL1_DRAM_BURST_CTRL, 0x00000010, 0x00000017), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL1_PIPE_STEER, 0xfcfcfcfc, 0xd8d8d8d8), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2_PIPE_STEER_0, 0x77707770, 0x21302130), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2_PIPE_STEER_1, 0x77707770, 0x21302130), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2A_ADDR_MATCH_MASK, 0xffffffff, 0xffffffcf), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_ADDR_MATCH_MASK, 0xffffffff, 0xffffffcf), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CGTT_SCLK_CTRL, 0x10000000, 0x10000100), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CTRL2, 0xfc02002f, 0x9402002f), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CTRL3, 0x00002188, 0x00000188), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x08000009, 0x08000009), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_0, 0xcc3fcc03, 0x842a4c02), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000000f, 0x00000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_SPARE, 0xffff3109, 0xffff3101), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_ARB_CONFIG, 0x00000100, 0x00000130), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_LDS_CLK_CTRL, 0xffffffff, 0xffffffff), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0x00030008, 0x01030000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmUTCL1_CTRL, 0x00800000, 0x00800000) +}; + #define DEFAULT_SH_MEM_CONFIG \ ((SH_MEM_ADDRESS_MODE_64 << SH_MEM_CONFIG__ADDRESS_MODE__SHIFT) | \ (SH_MEM_ALIGNMENT_MODE_UNALIGNED << SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT) | \ (SH_MEM_RETRY_MODE_ALL << SH_MEM_CONFIG__RETRY_MODE__SHIFT) | \ (3 << SH_MEM_CONFIG__INITIAL_INST_PREFETCH__SHIFT)) +/* TODO: pending on golden setting value of gb address config */ +#define CYAN_SKILLFISH_GB_ADDR_CONFIG_GOLDEN 0x00100044 static void gfx_v10_0_set_ring_funcs(struct amdgpu_device *adev); static void gfx_v10_0_set_irq_funcs(struct amdgpu_device *adev); @@ -3718,6 +3808,14 @@ static void gfx_v10_0_init_golden_registers(struct amdgpu_device *adev) golden_settings_gc_10_3_5, (const u32)ARRAY_SIZE(golden_settings_gc_10_3_5)); break; + case CHIP_CYAN_SKILLFISH: + soc15_program_register_sequence(adev, + golden_settings_gc_10_0, + (const u32)ARRAY_SIZE(golden_settings_gc_10_0)); + soc15_program_register_sequence(adev, + golden_settings_gc_10_0_cyan_skillfish, + (const u32)ARRAY_SIZE(golden_settings_gc_10_0_cyan_skillfish)); + break; default: break; } @@ -3891,6 +3989,7 @@ static void gfx_v10_0_check_fw_write_wait(struct amdgpu_device *adev) case CHIP_NAVI10: case CHIP_NAVI12: case CHIP_NAVI14: + case CHIP_CYAN_SKILLFISH: if ((adev->gfx.me_fw_version >= 0x00000046) && (adev->gfx.me_feature_version >= 27) && (adev->gfx.pfp_fw_version >= 0x00000068) && @@ -4025,6 +4124,12 @@ static int gfx_v10_0_init_microcode(struct amdgpu_device *adev) case CHIP_YELLOW_CARP: chip_name = "yellow_carp"; break; + case CHIP_CYAN_SKILLFISH: + if (adev->apu_flags & AMD_APU_IS_CYAN_SKILLFISH2) + chip_name = "cyan_skillfish2"; + else + chip_name = "cyan_skillfish"; + break; default: BUG(); } @@ -4516,6 +4621,7 @@ static void gfx_v10_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_IB_STS2); dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_IB_DBG1); dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_M0); + dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_MODE); } static void gfx_v10_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd, @@ -4604,6 +4710,14 @@ static void gfx_v10_0_gpu_early_init(struct amdgpu_device *adev) adev->gfx.config.gb_addr_config_fields.num_pkrs = 1 << REG_GET_FIELD(gb_addr_config, GB_ADDR_CONFIG, NUM_PKRS); break; + case CHIP_CYAN_SKILLFISH: + adev->gfx.config.max_hw_contexts = 8; + adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; + adev->gfx.config.sc_prim_fifo_size_backend = 0x100; + adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; + adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; + gb_addr_config = CYAN_SKILLFISH_GB_ADDR_CONFIG_GOLDEN; + break; default: BUG(); break; @@ -4708,6 +4822,7 @@ static int gfx_v10_0_sw_init(void *handle) case CHIP_NAVI10: case CHIP_NAVI14: case CHIP_NAVI12: + case CHIP_CYAN_SKILLFISH: adev->gfx.me.num_me = 1; adev->gfx.me.num_pipe_per_me = 1; adev->gfx.me.num_queue_per_pipe = 1; @@ -5319,7 +5434,8 @@ static int gfx_v10_0_rlc_resume(struct amdgpu_device *adev) { int r; - if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { + if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP && + adev->psp.autoload_supported) { r = gfx_v10_0_wait_for_rlc_autoload_complete(adev); if (r) @@ -5379,7 +5495,7 @@ static int gfx_v10_0_parse_rlc_toc(struct amdgpu_device *adev) int ret; RLC_TABLE_OF_CONTENT *rlc_toc; - ret = amdgpu_bo_create_reserved(adev, adev->psp.toc_bin_size, PAGE_SIZE, + ret = amdgpu_bo_create_reserved(adev, adev->psp.toc.size_bytes, PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT, &adev->gfx.rlc.rlc_toc_bo, &adev->gfx.rlc.rlc_toc_gpu_addr, @@ -5390,7 +5506,7 @@ static int gfx_v10_0_parse_rlc_toc(struct amdgpu_device *adev) } /* Copy toc from psp sos fw to rlc toc buffer */ - memcpy(adev->gfx.rlc.rlc_toc_buf, adev->psp.toc_start_addr, adev->psp.toc_bin_size); + memcpy(adev->gfx.rlc.rlc_toc_buf, adev->psp.toc.start_addr, adev->psp.toc.size_bytes); rlc_toc = (RLC_TABLE_OF_CONTENT *)adev->gfx.rlc.rlc_toc_buf; while (rlc_toc && (rlc_toc->id > FIRMWARE_ID_INVALID) && @@ -7608,10 +7724,8 @@ static int gfx_v10_0_soft_reset(void *handle) static uint64_t gfx_v10_0_get_gpu_clock_counter(struct amdgpu_device *adev) { - uint64_t clock; + uint64_t clock, clock_lo, clock_hi, hi_check; - amdgpu_gfx_off_ctrl(adev, false); - mutex_lock(&adev->gfx.gpu_clock_mutex); switch (adev->asic_type) { case CHIP_VANGOGH: case CHIP_YELLOW_CARP: @@ -7619,12 +7733,21 @@ static uint64_t gfx_v10_0_get_gpu_clock_counter(struct amdgpu_device *adev) ((uint64_t)RREG32_SOC15(SMUIO, 0, mmGOLDEN_TSC_COUNT_UPPER_Vangogh) << 32ULL); break; default: - clock = (uint64_t)RREG32_SOC15(SMUIO, 0, mmGOLDEN_TSC_COUNT_LOWER) | - ((uint64_t)RREG32_SOC15(SMUIO, 0, mmGOLDEN_TSC_COUNT_UPPER) << 32ULL); + preempt_disable(); + clock_hi = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_UPPER); + clock_lo = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_LOWER); + hi_check = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_UPPER); + /* The SMUIO TSC clock frequency is 100MHz, which sets 32-bit carry over + * roughly every 42 seconds. + */ + if (hi_check != clock_hi) { + clock_lo = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_LOWER); + clock_hi = hi_check; + } + preempt_enable(); + clock = clock_lo | (clock_hi << 32ULL); break; } - mutex_unlock(&adev->gfx.gpu_clock_mutex); - amdgpu_gfx_off_ctrl(adev, true); return clock; } @@ -7665,6 +7788,7 @@ static int gfx_v10_0_early_init(void *handle) case CHIP_NAVI10: case CHIP_NAVI14: case CHIP_NAVI12: + case CHIP_CYAN_SKILLFISH: adev->gfx.num_gfx_rings = GFX10_NUM_GFX_RINGS_NV1X; break; case CHIP_SIENNA_CICHLID: @@ -8261,8 +8385,8 @@ static const struct amdgpu_rlc_funcs gfx_v10_0_rlc_funcs_sriov = { .reset = gfx_v10_0_rlc_reset, .start = gfx_v10_0_rlc_start, .update_spm_vmid = gfx_v10_0_update_spm_vmid, - .rlcg_wreg = gfx_v10_rlcg_wreg, - .rlcg_rreg = gfx_v10_rlcg_rreg, + .sriov_wreg = gfx_v10_sriov_wreg, + .sriov_rreg = gfx_v10_sriov_rreg, .is_rlcg_access_range = gfx_v10_0_is_rlcg_access_range, }; @@ -9425,6 +9549,7 @@ static void gfx_v10_0_set_rlc_funcs(struct amdgpu_device *adev) case CHIP_DIMGREY_CAVEFISH: case CHIP_BEIGE_GOBY: case CHIP_YELLOW_CARP: + case CHIP_CYAN_SKILLFISH: adev->gfx.rlc.funcs = &gfx_v10_0_rlc_funcs; break; case CHIP_NAVI12: diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c index 3a8d52a54873..6a8dadea40f9 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c @@ -3027,6 +3027,7 @@ static void gfx_v6_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, u dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_HI); dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0); dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0); + dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_MODE); } static void gfx_v6_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd, diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c index 685212c3ddae..37b4a3db6360 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c @@ -4198,6 +4198,7 @@ static void gfx_v7_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, u dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_HI); dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0); dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0); + dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_MODE); } static void gfx_v7_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd, diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index c26e06059466..e0302c23e9a7 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -5279,6 +5279,7 @@ static void gfx_v8_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, u dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_HI); dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0); dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0); + dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_MODE); } static void gfx_v8_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd, diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 6a23c6826e12..603c259b073b 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -787,7 +787,7 @@ static void gfx_v9_0_rlcg_w(struct amdgpu_device *adev, u32 offset, u32 v, u32 f } -static void gfx_v9_0_rlcg_wreg(struct amdgpu_device *adev, u32 offset, +static void gfx_v9_0_sriov_wreg(struct amdgpu_device *adev, u32 offset, u32 v, u32 acc_flags, u32 hwip) { if ((acc_flags & AMDGPU_REGS_RLC) && @@ -2090,6 +2090,7 @@ static void gfx_v9_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, u dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS); dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0); dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0); + dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_MODE); } static void gfx_v9_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd, @@ -5150,7 +5151,7 @@ static const struct amdgpu_rlc_funcs gfx_v9_0_rlc_funcs = { .reset = gfx_v9_0_rlc_reset, .start = gfx_v9_0_rlc_start, .update_spm_vmid = gfx_v9_0_update_spm_vmid, - .rlcg_wreg = gfx_v9_0_rlcg_wreg, + .sriov_wreg = gfx_v9_0_sriov_wreg, .is_rlcg_access_range = gfx_v9_0_is_rlcg_access_range, }; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c index 1769c4cba2ad..00a2b36a24b3 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c @@ -85,7 +85,7 @@ static const struct soc15_reg_golden golden_settings_gc_9_4_2_alde[] = { SOC15_REG_GOLDEN_VALUE(GC, 0, regTCI_CNTL_3, 0xff, 0x20), }; -/** +/* * This shader is used to clear VGPRS and LDS, and also write the input * pattern into the write back buffer, which will be used by driver to * check whether all SIMDs have been covered. @@ -206,7 +206,7 @@ const struct soc15_reg_entry vgpr_init_regs_aldebaran[] = { { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE7), 0xffffffff }, }; -/** +/* * The below shaders are used to clear SGPRS, and also write the input * pattern into the write back buffer. The first two dispatch should be * scheduled simultaneously which make sure that all SGPRS could be @@ -302,7 +302,7 @@ const struct soc15_reg_entry sgpr96_init_regs_aldebaran[] = { { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE7), 0xffffffff }, }; -/** +/* * This shader is used to clear the uninitiated sgprs after the above * two dispatches, because of hardware feature, dispath 0 couldn't clear * top hole sgprs. Therefore need 4 waves per SIMD to cover these sgprs diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_1.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_1.c index 8fca72ebd11c..497b86c376c6 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_1.c +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_1.c @@ -75,9 +75,8 @@ int gfxhub_v1_1_get_xgmi_info(struct amdgpu_device *adev) max_physical_node_id = 7; break; case CHIP_ALDEBARAN: - /* just using duplicates for Aldebaran support, revisit later */ - max_num_physical_nodes = 8; - max_physical_node_id = 7; + max_num_physical_nodes = 16; + max_physical_node_id = 15; break; default: return -EINVAL; diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c index 4523df2785d6..41c3a0d70b7c 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c @@ -93,6 +93,7 @@ static int gmc_v10_0_process_interrupt(struct amdgpu_device *adev, struct amdgpu_iv_entry *entry) { bool retry_fault = !!(entry->src_data[1] & 0x80); + bool write_fault = !!(entry->src_data[1] & 0x20); struct amdgpu_vmhub *hub = &adev->vmhub[entry->vmid_src]; struct amdgpu_task_info task_info; uint32_t status = 0; @@ -121,7 +122,7 @@ static int gmc_v10_0_process_interrupt(struct amdgpu_device *adev, /* Try to handle the recoverable page faults by filling page * tables */ - if (amdgpu_vm_handle_fault(adev, entry->pasid, addr)) + if (amdgpu_vm_handle_fault(adev, entry->pasid, addr, write_fault)) return 1; } @@ -810,6 +811,7 @@ static int gmc_v10_0_mc_init(struct amdgpu_device *adev) case CHIP_DIMGREY_CAVEFISH: case CHIP_BEIGE_GOBY: case CHIP_YELLOW_CARP: + case CHIP_CYAN_SKILLFISH: default: adev->gmc.gart_size = 512ULL << 20; break; @@ -879,6 +881,7 @@ static int gmc_v10_0_sw_init(void *handle) case CHIP_DIMGREY_CAVEFISH: case CHIP_BEIGE_GOBY: case CHIP_YELLOW_CARP: + case CHIP_CYAN_SKILLFISH: adev->num_vmhubs = 2; /* * To fulfill 4-level page support, @@ -996,6 +999,7 @@ static void gmc_v10_0_init_golden_registers(struct amdgpu_device *adev) case CHIP_DIMGREY_CAVEFISH: case CHIP_BEIGE_GOBY: case CHIP_YELLOW_CARP: + case CHIP_CYAN_SKILLFISH: break; default: break; diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index 7eb70d69f760..d90c16a6b2b8 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -53,7 +53,9 @@ #include "mmhub_v1_7.h" #include "umc_v6_1.h" #include "umc_v6_0.h" +#include "umc_v6_7.h" #include "hdp_v4_0.h" +#include "mca_v3_0.h" #include "ivsrcid/vmc/irqsrcs_vmc_1_0.h" @@ -505,6 +507,7 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev, struct amdgpu_iv_entry *entry) { bool retry_fault = !!(entry->src_data[1] & 0x80); + bool write_fault = !!(entry->src_data[1] & 0x20); uint32_t status = 0, cid = 0, rw = 0; struct amdgpu_task_info task_info; struct amdgpu_vmhub *hub; @@ -535,7 +538,7 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev, /* Try to handle the recoverable page faults by filling page * tables */ - if (amdgpu_vm_handle_fault(adev, entry->pasid, addr)) + if (amdgpu_vm_handle_fault(adev, entry->pasid, addr, write_fault)) return 1; } @@ -1168,6 +1171,18 @@ static void gmc_v9_0_set_umc_funcs(struct amdgpu_device *adev) adev->umc.channel_idx_tbl = &umc_v6_1_channel_idx_tbl[0][0]; adev->umc.ras_funcs = &umc_v6_1_ras_funcs; break; + case CHIP_ALDEBARAN: + adev->umc.max_ras_err_cnt_per_query = UMC_V6_7_TOTAL_CHANNEL_NUM; + adev->umc.channel_inst_num = UMC_V6_7_CHANNEL_INSTANCE_NUM; + adev->umc.umc_inst_num = UMC_V6_7_UMC_INSTANCE_NUM; + adev->umc.channel_offs = UMC_V6_7_PER_CHANNEL_OFFSET; + if (!adev->gmc.xgmi.connected_to_cpu) + adev->umc.ras_funcs = &umc_v6_7_ras_funcs; + if (1 & adev->smuio.funcs->get_die_id(adev)) + adev->umc.channel_idx_tbl = &umc_v6_7_channel_idx_tbl_first[0][0]; + else + adev->umc.channel_idx_tbl = &umc_v6_7_channel_idx_tbl_second[0][0]; + break; default: break; } @@ -1216,6 +1231,18 @@ static void gmc_v9_0_set_hdp_ras_funcs(struct amdgpu_device *adev) adev->hdp.ras_funcs = &hdp_v4_0_ras_funcs; } +static void gmc_v9_0_set_mca_funcs(struct amdgpu_device *adev) +{ + switch (adev->asic_type) { + case CHIP_ALDEBARAN: + if (!adev->gmc.xgmi.connected_to_cpu) + adev->mca.funcs = &mca_v3_0_funcs; + break; + default: + break; + } +} + static int gmc_v9_0_early_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; @@ -1237,6 +1264,7 @@ static int gmc_v9_0_early_init(void *handle) gmc_v9_0_set_mmhub_ras_funcs(adev); gmc_v9_0_set_gfxhub_funcs(adev); gmc_v9_0_set_hdp_ras_funcs(adev); + gmc_v9_0_set_mca_funcs(adev); adev->gmc.shared_aperture_start = 0x2000000000000000ULL; adev->gmc.shared_aperture_end = @@ -1448,6 +1476,8 @@ static int gmc_v9_0_sw_init(void *handle) adev->gfxhub.funcs->init(adev); adev->mmhub.funcs->init(adev); + if (adev->mca.funcs) + adev->mca.funcs->init(adev); spin_lock_init(&adev->gmc.invalidate_lock); diff --git a/drivers/gpu/drm/amd/amdgpu/mca_v3_0.c b/drivers/gpu/drm/amd/amdgpu/mca_v3_0.c new file mode 100644 index 000000000000..058b65730a84 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/mca_v3_0.c @@ -0,0 +1,125 @@ +/* + * Copyright 2021 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#include "amdgpu_ras.h" +#include "amdgpu.h" +#include "amdgpu_mca.h" + +#define smnMCMP0_STATUST0 0x03830408 +#define smnMCMP1_STATUST0 0x03b30408 +#define smnMCMPIO_STATUST0 0x0c930408 + + +static void mca_v3_0_mp0_query_ras_error_count(struct amdgpu_device *adev, + void *ras_error_status) +{ + amdgpu_mca_query_ras_error_count(adev, + smnMCMP0_STATUST0, + ras_error_status); +} + +static int mca_v3_0_mp0_ras_late_init(struct amdgpu_device *adev) +{ + return amdgpu_mca_ras_late_init(adev, &adev->mca.mp0); +} + +static void mca_v3_0_mp0_ras_fini(struct amdgpu_device *adev) +{ + amdgpu_mca_ras_fini(adev, &adev->mca.mp0); +} + +const struct amdgpu_mca_ras_funcs mca_v3_0_mp0_ras_funcs = { + .ras_late_init = mca_v3_0_mp0_ras_late_init, + .ras_fini = mca_v3_0_mp0_ras_fini, + .query_ras_error_count = mca_v3_0_mp0_query_ras_error_count, + .query_ras_error_address = NULL, + .ras_block = AMDGPU_RAS_BLOCK__MP0, + .sysfs_name = "mp0_err_count", +}; + +static void mca_v3_0_mp1_query_ras_error_count(struct amdgpu_device *adev, + void *ras_error_status) +{ + amdgpu_mca_query_ras_error_count(adev, + smnMCMP1_STATUST0, + ras_error_status); +} + +static int mca_v3_0_mp1_ras_late_init(struct amdgpu_device *adev) +{ + return amdgpu_mca_ras_late_init(adev, &adev->mca.mp1); +} + +static void mca_v3_0_mp1_ras_fini(struct amdgpu_device *adev) +{ + amdgpu_mca_ras_fini(adev, &adev->mca.mp1); +} + +const struct amdgpu_mca_ras_funcs mca_v3_0_mp1_ras_funcs = { + .ras_late_init = mca_v3_0_mp1_ras_late_init, + .ras_fini = mca_v3_0_mp1_ras_fini, + .query_ras_error_count = mca_v3_0_mp1_query_ras_error_count, + .query_ras_error_address = NULL, + .ras_block = AMDGPU_RAS_BLOCK__MP1, + .sysfs_name = "mp1_err_count", +}; + +static void mca_v3_0_mpio_query_ras_error_count(struct amdgpu_device *adev, + void *ras_error_status) +{ + amdgpu_mca_query_ras_error_count(adev, + smnMCMPIO_STATUST0, + ras_error_status); +} + +static int mca_v3_0_mpio_ras_late_init(struct amdgpu_device *adev) +{ + return amdgpu_mca_ras_late_init(adev, &adev->mca.mpio); +} + +static void mca_v3_0_mpio_ras_fini(struct amdgpu_device *adev) +{ + amdgpu_mca_ras_fini(adev, &adev->mca.mpio); +} + +const struct amdgpu_mca_ras_funcs mca_v3_0_mpio_ras_funcs = { + .ras_late_init = mca_v3_0_mpio_ras_late_init, + .ras_fini = mca_v3_0_mpio_ras_fini, + .query_ras_error_count = mca_v3_0_mpio_query_ras_error_count, + .query_ras_error_address = NULL, + .ras_block = AMDGPU_RAS_BLOCK__MPIO, + .sysfs_name = "mpio_err_count", +}; + + +static void mca_v3_0_init(struct amdgpu_device *adev) +{ + struct amdgpu_mca *mca = &adev->mca; + + mca->mp0.ras_funcs = &mca_v3_0_mp0_ras_funcs; + mca->mp1.ras_funcs = &mca_v3_0_mp1_ras_funcs; + mca->mpio.ras_funcs = &mca_v3_0_mpio_ras_funcs; +} + +const struct amdgpu_mca_funcs mca_v3_0_funcs = { + .init = mca_v3_0_init, +};
\ No newline at end of file diff --git a/drivers/gpu/drm/amd/amdgpu/mca_v3_0.h b/drivers/gpu/drm/amd/amdgpu/mca_v3_0.h new file mode 100644 index 000000000000..b899b86194c2 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/mca_v3_0.h @@ -0,0 +1,26 @@ +/* + * Copyright (C) 2021 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#ifndef __MCA_V3_0_H__ +#define __MCA_V3_0_H__ + +extern const struct amdgpu_mca_funcs mca_v3_0_funcs; + +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/mmsch_v1_0.h b/drivers/gpu/drm/amd/amdgpu/mmsch_v1_0.h index 20958639b601..2cdab8062c86 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmsch_v1_0.h +++ b/drivers/gpu/drm/amd/amdgpu/mmsch_v1_0.h @@ -24,9 +24,7 @@ #ifndef __MMSCH_V1_0_H__ #define __MMSCH_V1_0_H__ -#define MMSCH_VERSION_MAJOR 1 -#define MMSCH_VERSION_MINOR 0 -#define MMSCH_VERSION (MMSCH_VERSION_MAJOR << 16 | MMSCH_VERSION_MINOR) +#define MMSCH_VERSION 0x1 enum mmsch_v1_0_command_type { MMSCH_COMMAND__DIRECT_REG_WRITE = 0, diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c index 9f7aac435d69..a35e6d87e537 100644 --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c @@ -96,7 +96,11 @@ static int xgpu_nv_poll_ack(struct amdgpu_device *adev) static int xgpu_nv_poll_msg(struct amdgpu_device *adev, enum idh_event event) { - int r, timeout = NV_MAILBOX_POLL_MSG_TIMEDOUT; + int r; + uint64_t timeout, now; + + now = (uint64_t)ktime_to_ms(ktime_get()); + timeout = now + NV_MAILBOX_POLL_MSG_TIMEDOUT; do { r = xgpu_nv_mailbox_rcv_msg(adev, event); @@ -104,8 +108,8 @@ static int xgpu_nv_poll_msg(struct amdgpu_device *adev, enum idh_event event) return 0; msleep(10); - timeout -= 10; - } while (timeout > 1); + now = (uint64_t)ktime_to_ms(ktime_get()); + } while (timeout > now); return -ETIME; @@ -149,9 +153,10 @@ static void xgpu_nv_mailbox_trans_msg (struct amdgpu_device *adev, static int xgpu_nv_send_access_requests(struct amdgpu_device *adev, enum idh_request req) { - int r; + int r, retry = 1; enum idh_event event = -1; +send_request: xgpu_nv_mailbox_trans_msg(adev, req, 0, 0, 0); switch (req) { @@ -170,6 +175,9 @@ static int xgpu_nv_send_access_requests(struct amdgpu_device *adev, if (event != -1) { r = xgpu_nv_poll_msg(adev, event); if (r) { + if (retry++ < 2) + goto send_request; + if (req != IDH_REQ_GPU_INIT_DATA) { pr_err("Doesn't get msg:%d from pf, error=%d\n", event, r); return r; @@ -279,6 +287,8 @@ static void xgpu_nv_mailbox_flr_work(struct work_struct *work) amdgpu_virt_fini_data_exchange(adev); atomic_set(&adev->in_gpu_reset, 1); + xgpu_nv_mailbox_trans_msg(adev, IDH_READY_TO_RESET, 0, 0, 0); + do { if (xgpu_nv_mailbox_peek_msg(adev) == IDH_FLR_NOTIFICATION_CMPL) goto flr_done; diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.h b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.h index 9f5808616174..73887b0aa1d6 100644 --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.h +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.h @@ -37,7 +37,8 @@ enum idh_request { IDH_REQ_GPU_RESET_ACCESS, IDH_REQ_GPU_INIT_DATA, - IDH_LOG_VF_ERROR = 200, + IDH_LOG_VF_ERROR = 200, + IDH_READY_TO_RESET = 201, }; enum idh_event { diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c b/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c index 7b79eeaa88aa..b184b656b9b6 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c @@ -508,6 +508,26 @@ static void nbio_v2_3_apply_l1_link_width_reconfig_wa(struct amdgpu_device *adev WREG32_PCIE(smnPCIE_LC_LINK_WIDTH_CNTL, reg_data); } +static void nbio_v2_3_clear_doorbell_interrupt(struct amdgpu_device *adev) +{ + uint32_t reg, reg_data; + + if (adev->asic_type != CHIP_SIENNA_CICHLID) + return; + + reg = RREG32_SOC15(NBIO, 0, mmBIF_RB_CNTL); + + /* Clear Interrupt Status + */ + if ((reg & BIF_RB_CNTL__RB_ENABLE_MASK) == 0) { + reg = RREG32_SOC15(NBIO, 0, mmBIF_DOORBELL_INT_CNTL); + if (reg & BIF_DOORBELL_INT_CNTL__DOORBELL_INTERRUPT_STATUS_MASK) { + reg_data = 1 << BIF_DOORBELL_INT_CNTL__DOORBELL_INTERRUPT_CLEAR__SHIFT; + WREG32_SOC15(NBIO, 0, mmBIF_DOORBELL_INT_CNTL, reg_data); + } + } +} + const struct amdgpu_nbio_funcs nbio_v2_3_funcs = { .get_hdp_flush_req_offset = nbio_v2_3_get_hdp_flush_req_offset, .get_hdp_flush_done_offset = nbio_v2_3_get_hdp_flush_done_offset, @@ -531,4 +551,5 @@ const struct amdgpu_nbio_funcs nbio_v2_3_funcs = { .program_aspm = nbio_v2_3_program_aspm, .apply_lc_spc_mode_wa = nbio_v2_3_apply_lc_spc_mode_wa, .apply_l1_link_width_reconfig_wa = nbio_v2_3_apply_l1_link_width_reconfig_wa, + .clear_doorbell_interrupt = nbio_v2_3_clear_doorbell_interrupt, }; diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c index cef929746739..ba1d3ab869c1 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c @@ -85,6 +85,11 @@ #define mmRCC_DEV0_EPF0_STRAP0_ALDE 0x0015 #define mmRCC_DEV0_EPF0_STRAP0_ALDE_BASE_IDX 2 +#define mmBIF_DOORBELL_INT_CNTL_ALDE 0x3878 +#define mmBIF_DOORBELL_INT_CNTL_ALDE_BASE_IDX 2 +#define BIF_DOORBELL_INT_CNTL_ALDE__DOORBELL_INTERRUPT_DISABLE__SHIFT 0x18 +#define BIF_DOORBELL_INT_CNTL_ALDE__DOORBELL_INTERRUPT_DISABLE_MASK 0x01000000L + static void nbio_v7_4_query_ras_error_count(struct amdgpu_device *adev, void *ras_error_status); @@ -346,14 +351,21 @@ static void nbio_v7_4_handle_ras_controller_intr_no_bifring(struct amdgpu_device struct ras_err_data err_data = {0, 0, 0, NULL}; struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); - bif_doorbell_intr_cntl = RREG32_SOC15(NBIO, 0, mmBIF_DOORBELL_INT_CNTL); + if (adev->asic_type == CHIP_ALDEBARAN) + bif_doorbell_intr_cntl = RREG32_SOC15(NBIO, 0, mmBIF_DOORBELL_INT_CNTL_ALDE); + else + bif_doorbell_intr_cntl = RREG32_SOC15(NBIO, 0, mmBIF_DOORBELL_INT_CNTL); + if (REG_GET_FIELD(bif_doorbell_intr_cntl, BIF_DOORBELL_INT_CNTL, RAS_CNTLR_INTERRUPT_STATUS)) { /* driver has to clear the interrupt status when bif ring is disabled */ bif_doorbell_intr_cntl = REG_SET_FIELD(bif_doorbell_intr_cntl, BIF_DOORBELL_INT_CNTL, RAS_CNTLR_INTERRUPT_CLEAR, 1); - WREG32_SOC15(NBIO, 0, mmBIF_DOORBELL_INT_CNTL, bif_doorbell_intr_cntl); + if (adev->asic_type == CHIP_ALDEBARAN) + WREG32_SOC15(NBIO, 0, mmBIF_DOORBELL_INT_CNTL_ALDE, bif_doorbell_intr_cntl); + else + WREG32_SOC15(NBIO, 0, mmBIF_DOORBELL_INT_CNTL, bif_doorbell_intr_cntl); if (!ras->disable_ras_err_cnt_harvest) { /* @@ -372,13 +384,13 @@ static void nbio_v7_4_handle_ras_controller_intr_no_bifring(struct amdgpu_device "errors detected in %s block, " "no user action is needed.\n", obj->err_data.ce_count, - adev->nbio.ras_if->name); + ras_block_str(adev->nbio.ras_if->block)); if (err_data.ue_count) dev_info(adev->dev, "%ld uncorrectable hardware " "errors detected in %s block\n", obj->err_data.ue_count, - adev->nbio.ras_if->name); + ras_block_str(adev->nbio.ras_if->block)); } dev_info(adev->dev, "RAS controller interrupt triggered " @@ -395,14 +407,22 @@ static void nbio_v7_4_handle_ras_err_event_athub_intr_no_bifring(struct amdgpu_d { uint32_t bif_doorbell_intr_cntl; - bif_doorbell_intr_cntl = RREG32_SOC15(NBIO, 0, mmBIF_DOORBELL_INT_CNTL); + if (adev->asic_type == CHIP_ALDEBARAN) + bif_doorbell_intr_cntl = RREG32_SOC15(NBIO, 0, mmBIF_DOORBELL_INT_CNTL_ALDE); + else + bif_doorbell_intr_cntl = RREG32_SOC15(NBIO, 0, mmBIF_DOORBELL_INT_CNTL); + if (REG_GET_FIELD(bif_doorbell_intr_cntl, BIF_DOORBELL_INT_CNTL, RAS_ATHUB_ERR_EVENT_INTERRUPT_STATUS)) { /* driver has to clear the interrupt status when bif ring is disabled */ bif_doorbell_intr_cntl = REG_SET_FIELD(bif_doorbell_intr_cntl, BIF_DOORBELL_INT_CNTL, RAS_ATHUB_ERR_EVENT_INTERRUPT_CLEAR, 1); - WREG32_SOC15(NBIO, 0, mmBIF_DOORBELL_INT_CNTL, bif_doorbell_intr_cntl); + + if (adev->asic_type == CHIP_ALDEBARAN) + WREG32_SOC15(NBIO, 0, mmBIF_DOORBELL_INT_CNTL_ALDE, bif_doorbell_intr_cntl); + else + WREG32_SOC15(NBIO, 0, mmBIF_DOORBELL_INT_CNTL, bif_doorbell_intr_cntl); amdgpu_ras_global_ras_isr(adev); } @@ -572,7 +592,11 @@ static void nbio_v7_4_query_ras_error_count(struct amdgpu_device *adev, static void nbio_v7_4_enable_doorbell_interrupt(struct amdgpu_device *adev, bool enable) { - WREG32_FIELD15(NBIO, 0, BIF_DOORBELL_INT_CNTL, + if (adev->asic_type == CHIP_ALDEBARAN) + WREG32_FIELD15(NBIO, 0, BIF_DOORBELL_INT_CNTL_ALDE, + DOORBELL_INTERRUPT_DISABLE, enable ? 0 : 1); + else + WREG32_FIELD15(NBIO, 0, BIF_DOORBELL_INT_CNTL, DOORBELL_INTERRUPT_DISABLE, enable ? 0 : 1); } diff --git a/drivers/gpu/drm/amd/amdgpu/nv.c b/drivers/gpu/drm/amd/amdgpu/nv.c index 94d029dbf30d..ff80786e3918 100644 --- a/drivers/gpu/drm/amd/amdgpu/nv.c +++ b/drivers/gpu/drm/amd/amdgpu/nv.c @@ -58,7 +58,7 @@ #include "jpeg_v2_0.h" #include "vcn_v3_0.h" #include "jpeg_v3_0.h" -#include "dce_virtual.h" +#include "amdgpu_vkms.h" #include "mes_v10_1.h" #include "mxgpu_nv.h" #include "smuio_v11_0.h" @@ -666,6 +666,9 @@ legacy_init: case CHIP_YELLOW_CARP: yellow_carp_reg_base_init(adev); break; + case CHIP_CYAN_SKILLFISH: + cyan_skillfish_reg_base_init(adev); + break; default: return -EINVAL; } @@ -682,7 +685,10 @@ int nv_set_ip_blocks(struct amdgpu_device *adev) { int r; - if (adev->flags & AMD_IS_APU) { + if (adev->asic_type == CHIP_CYAN_SKILLFISH) { + adev->nbio.funcs = &nbio_v2_3_funcs; + adev->nbio.hdp_flush_reg = &nbio_v2_3_hdp_flush_reg; + } else if (adev->flags & AMD_IS_APU) { adev->nbio.funcs = &nbio_v7_2_funcs; adev->nbio.hdp_flush_reg = &nbio_v7_2_hdp_flush_reg; } else { @@ -715,7 +721,7 @@ int nv_set_ip_blocks(struct amdgpu_device *adev) !amdgpu_sriov_vf(adev)) amdgpu_device_ip_block_add(adev, &smu_v11_0_ip_block); if (adev->enable_virtual_display || amdgpu_sriov_vf(adev)) - amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block); + amdgpu_device_ip_block_add(adev, &amdgpu_vkms_ip_block); #if defined(CONFIG_DRM_AMD_DC) else if (amdgpu_device_has_dc_support(adev)) amdgpu_device_ip_block_add(adev, &dm_ip_block); @@ -743,7 +749,7 @@ int nv_set_ip_blocks(struct amdgpu_device *adev) if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) amdgpu_device_ip_block_add(adev, &smu_v11_0_ip_block); if (adev->enable_virtual_display || amdgpu_sriov_vf(adev)) - amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block); + amdgpu_device_ip_block_add(adev, &amdgpu_vkms_ip_block); #if defined(CONFIG_DRM_AMD_DC) else if (amdgpu_device_has_dc_support(adev)) amdgpu_device_ip_block_add(adev, &dm_ip_block); @@ -773,7 +779,7 @@ int nv_set_ip_blocks(struct amdgpu_device *adev) is_support_sw_smu(adev)) amdgpu_device_ip_block_add(adev, &smu_v11_0_ip_block); if (adev->enable_virtual_display || amdgpu_sriov_vf(adev)) - amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block); + amdgpu_device_ip_block_add(adev, &amdgpu_vkms_ip_block); #if defined(CONFIG_DRM_AMD_DC) else if (amdgpu_device_has_dc_support(adev)) amdgpu_device_ip_block_add(adev, &dm_ip_block); @@ -796,7 +802,7 @@ int nv_set_ip_blocks(struct amdgpu_device *adev) is_support_sw_smu(adev)) amdgpu_device_ip_block_add(adev, &smu_v11_0_ip_block); if (adev->enable_virtual_display || amdgpu_sriov_vf(adev)) - amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block); + amdgpu_device_ip_block_add(adev, &amdgpu_vkms_ip_block); #if defined(CONFIG_DRM_AMD_DC) else if (amdgpu_device_has_dc_support(adev)) amdgpu_device_ip_block_add(adev, &dm_ip_block); @@ -817,7 +823,7 @@ int nv_set_ip_blocks(struct amdgpu_device *adev) amdgpu_device_ip_block_add(adev, &psp_v11_0_ip_block); amdgpu_device_ip_block_add(adev, &smu_v11_0_ip_block); if (adev->enable_virtual_display || amdgpu_sriov_vf(adev)) - amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block); + amdgpu_device_ip_block_add(adev, &amdgpu_vkms_ip_block); #if defined(CONFIG_DRM_AMD_DC) else if (amdgpu_device_has_dc_support(adev)) amdgpu_device_ip_block_add(adev, &dm_ip_block); @@ -837,7 +843,7 @@ int nv_set_ip_blocks(struct amdgpu_device *adev) is_support_sw_smu(adev)) amdgpu_device_ip_block_add(adev, &smu_v11_0_ip_block); if (adev->enable_virtual_display || amdgpu_sriov_vf(adev)) - amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block); + amdgpu_device_ip_block_add(adev, &amdgpu_vkms_ip_block); #if defined(CONFIG_DRM_AMD_DC) else if (amdgpu_device_has_dc_support(adev)) amdgpu_device_ip_block_add(adev, &dm_ip_block); @@ -859,7 +865,7 @@ int nv_set_ip_blocks(struct amdgpu_device *adev) amdgpu_device_ip_block_add(adev, &gfx_v10_0_ip_block); amdgpu_device_ip_block_add(adev, &sdma_v5_2_ip_block); if (adev->enable_virtual_display || amdgpu_sriov_vf(adev)) - amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block); + amdgpu_device_ip_block_add(adev, &amdgpu_vkms_ip_block); #if defined(CONFIG_DRM_AMD_DC) else if (amdgpu_device_has_dc_support(adev)) amdgpu_device_ip_block_add(adev, &dm_ip_block); @@ -877,11 +883,11 @@ int nv_set_ip_blocks(struct amdgpu_device *adev) amdgpu_device_ip_block_add(adev, &psp_v13_0_ip_block); amdgpu_device_ip_block_add(adev, &smu_v13_0_ip_block); if (adev->enable_virtual_display || amdgpu_sriov_vf(adev)) - amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block); + amdgpu_device_ip_block_add(adev, &amdgpu_vkms_ip_block); amdgpu_device_ip_block_add(adev, &gfx_v10_0_ip_block); amdgpu_device_ip_block_add(adev, &sdma_v5_2_ip_block); if (adev->enable_virtual_display || amdgpu_sriov_vf(adev)) - amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block); + amdgpu_device_ip_block_add(adev, &amdgpu_vkms_ip_block); #if defined(CONFIG_DRM_AMD_DC) else if (amdgpu_device_has_dc_support(adev)) amdgpu_device_ip_block_add(adev, &dm_ip_block); @@ -889,6 +895,20 @@ int nv_set_ip_blocks(struct amdgpu_device *adev) amdgpu_device_ip_block_add(adev, &vcn_v3_0_ip_block); amdgpu_device_ip_block_add(adev, &jpeg_v3_0_ip_block); break; + case CHIP_CYAN_SKILLFISH: + amdgpu_device_ip_block_add(adev, &nv_common_ip_block); + amdgpu_device_ip_block_add(adev, &gmc_v10_0_ip_block); + amdgpu_device_ip_block_add(adev, &navi10_ih_ip_block); + if (adev->apu_flags & AMD_APU_IS_CYAN_SKILLFISH2) { + if (likely(adev->firmware.load_type == AMDGPU_FW_LOAD_PSP)) + amdgpu_device_ip_block_add(adev, &psp_v11_0_8_ip_block); + amdgpu_device_ip_block_add(adev, &smu_v11_0_ip_block); + } + if (adev->enable_virtual_display || amdgpu_sriov_vf(adev)) + amdgpu_device_ip_block_add(adev, &amdgpu_vkms_ip_block); + amdgpu_device_ip_block_add(adev, &gfx_v10_0_ip_block); + amdgpu_device_ip_block_add(adev, &sdma_v5_0_ip_block); + break; default: return -EINVAL; } @@ -1241,6 +1261,11 @@ static int nv_common_early_init(void *handle) else adev->external_rev_id = adev->rev_id + 0x01; break; + case CHIP_CYAN_SKILLFISH: + adev->cg_flags = 0; + adev->pg_flags = 0; + adev->external_rev_id = adev->rev_id + 0x82; + break; default: /* FIXME: not supported yet */ return -EINVAL; diff --git a/drivers/gpu/drm/amd/amdgpu/nv.h b/drivers/gpu/drm/amd/amdgpu/nv.h index eb9aff1e7253..1f40ba3b0460 100644 --- a/drivers/gpu/drm/amd/amdgpu/nv.h +++ b/drivers/gpu/drm/amd/amdgpu/nv.h @@ -38,5 +38,6 @@ void vangogh_reg_base_init(struct amdgpu_device *adev); int dimgrey_cavefish_reg_base_init(struct amdgpu_device *adev); int beige_goby_reg_base_init(struct amdgpu_device *adev); int yellow_carp_reg_base_init(struct amdgpu_device *adev); +int cyan_skillfish_reg_base_init(struct amdgpu_device *adev); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c index 4b1cc5e9ee92..5872d68ed13d 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c @@ -84,29 +84,29 @@ static int psp_v10_0_init_microcode(struct psp_context *psp) ta_hdr = (const struct ta_firmware_header_v1_0 *) adev->psp.ta_fw->data; - adev->psp.ta_hdcp_ucode_version = - le32_to_cpu(ta_hdr->ta_hdcp_ucode_version); - adev->psp.ta_hdcp_ucode_size = - le32_to_cpu(ta_hdr->ta_hdcp_size_bytes); - adev->psp.ta_hdcp_start_addr = + adev->psp.hdcp.feature_version = + le32_to_cpu(ta_hdr->hdcp.fw_version); + adev->psp.hdcp.size_bytes = + le32_to_cpu(ta_hdr->hdcp.size_bytes); + adev->psp.hdcp.start_addr = (uint8_t *)ta_hdr + le32_to_cpu(ta_hdr->header.ucode_array_offset_bytes); - adev->psp.ta_dtm_ucode_version = - le32_to_cpu(ta_hdr->ta_dtm_ucode_version); - adev->psp.ta_dtm_ucode_size = - le32_to_cpu(ta_hdr->ta_dtm_size_bytes); - adev->psp.ta_dtm_start_addr = - (uint8_t *)adev->psp.ta_hdcp_start_addr + - le32_to_cpu(ta_hdr->ta_dtm_offset_bytes); - - adev->psp.ta_securedisplay_ucode_version = - le32_to_cpu(ta_hdr->ta_securedisplay_ucode_version); - adev->psp.ta_securedisplay_ucode_size = - le32_to_cpu(ta_hdr->ta_securedisplay_size_bytes); - adev->psp.ta_securedisplay_start_addr = - (uint8_t *)adev->psp.ta_hdcp_start_addr + - le32_to_cpu(ta_hdr->ta_securedisplay_offset_bytes); + adev->psp.dtm.feature_version = + le32_to_cpu(ta_hdr->dtm.fw_version); + adev->psp.dtm.size_bytes = + le32_to_cpu(ta_hdr->dtm.size_bytes); + adev->psp.dtm.start_addr = + (uint8_t *)adev->psp.hdcp.start_addr + + le32_to_cpu(ta_hdr->dtm.offset_bytes); + + adev->psp.securedisplay.feature_version = + le32_to_cpu(ta_hdr->securedisplay.fw_version); + adev->psp.securedisplay.size_bytes = + le32_to_cpu(ta_hdr->securedisplay.size_bytes); + adev->psp.securedisplay.start_addr = + (uint8_t *)adev->psp.hdcp.start_addr + + le32_to_cpu(ta_hdr->securedisplay.offset_bytes); adev->psp.ta_fw_version = le32_to_cpu(ta_hdr->header.ucode_version); } diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c index bc133db2d538..29bf9f09944b 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c @@ -80,6 +80,9 @@ MODULE_FIRMWARE("amdgpu/beige_goby_ta.bin"); /* For large FW files the time to complete can be very long */ #define USBC_PD_POLLING_LIMIT_S 240 +/* Read USB-PD from LFB */ +#define GFX_CMD_USB_PD_USE_LFB 0x480 + static int psp_v11_0_init_microcode(struct psp_context *psp) { struct amdgpu_device *adev = psp->adev; @@ -148,15 +151,15 @@ static int psp_v11_0_init_microcode(struct psp_context *psp) goto out2; ta_hdr = (const struct ta_firmware_header_v1_0 *)adev->psp.ta_fw->data; - adev->psp.ta_xgmi_ucode_version = le32_to_cpu(ta_hdr->ta_xgmi_ucode_version); - adev->psp.ta_xgmi_ucode_size = le32_to_cpu(ta_hdr->ta_xgmi_size_bytes); - adev->psp.ta_xgmi_start_addr = (uint8_t *)ta_hdr + + adev->psp.xgmi.feature_version = le32_to_cpu(ta_hdr->xgmi.fw_version); + adev->psp.xgmi.size_bytes = le32_to_cpu(ta_hdr->xgmi.size_bytes); + adev->psp.xgmi.start_addr = (uint8_t *)ta_hdr + le32_to_cpu(ta_hdr->header.ucode_array_offset_bytes); adev->psp.ta_fw_version = le32_to_cpu(ta_hdr->header.ucode_version); - adev->psp.ta_ras_ucode_version = le32_to_cpu(ta_hdr->ta_ras_ucode_version); - adev->psp.ta_ras_ucode_size = le32_to_cpu(ta_hdr->ta_ras_size_bytes); - adev->psp.ta_ras_start_addr = (uint8_t *)adev->psp.ta_xgmi_start_addr + - le32_to_cpu(ta_hdr->ta_ras_offset_bytes); + adev->psp.ras.feature_version = le32_to_cpu(ta_hdr->ras.fw_version); + adev->psp.ras.size_bytes = le32_to_cpu(ta_hdr->ras.size_bytes); + adev->psp.ras.start_addr = (uint8_t *)adev->psp.xgmi.start_addr + + le32_to_cpu(ta_hdr->ras.offset_bytes); } break; case CHIP_NAVI10: @@ -183,17 +186,17 @@ static int psp_v11_0_init_microcode(struct psp_context *psp) goto out2; ta_hdr = (const struct ta_firmware_header_v1_0 *)adev->psp.ta_fw->data; - adev->psp.ta_hdcp_ucode_version = le32_to_cpu(ta_hdr->ta_hdcp_ucode_version); - adev->psp.ta_hdcp_ucode_size = le32_to_cpu(ta_hdr->ta_hdcp_size_bytes); - adev->psp.ta_hdcp_start_addr = (uint8_t *)ta_hdr + + adev->psp.hdcp.feature_version = le32_to_cpu(ta_hdr->hdcp.fw_version); + adev->psp.hdcp.size_bytes = le32_to_cpu(ta_hdr->hdcp.size_bytes); + adev->psp.hdcp.start_addr = (uint8_t *)ta_hdr + le32_to_cpu(ta_hdr->header.ucode_array_offset_bytes); adev->psp.ta_fw_version = le32_to_cpu(ta_hdr->header.ucode_version); - adev->psp.ta_dtm_ucode_version = le32_to_cpu(ta_hdr->ta_dtm_ucode_version); - adev->psp.ta_dtm_ucode_size = le32_to_cpu(ta_hdr->ta_dtm_size_bytes); - adev->psp.ta_dtm_start_addr = (uint8_t *)adev->psp.ta_hdcp_start_addr + - le32_to_cpu(ta_hdr->ta_dtm_offset_bytes); + adev->psp.dtm.feature_version = le32_to_cpu(ta_hdr->dtm.fw_version); + adev->psp.dtm.size_bytes = le32_to_cpu(ta_hdr->dtm.size_bytes); + adev->psp.dtm.start_addr = (uint8_t *)adev->psp.hdcp.start_addr + + le32_to_cpu(ta_hdr->dtm.offset_bytes); } break; case CHIP_SIENNA_CICHLID: @@ -284,7 +287,7 @@ static int psp_v11_0_bootloader_load_kdb(struct psp_context *psp) return ret; /* Copy PSP KDB binary to memory */ - psp_copy_fw(psp, psp->kdb_start_addr, psp->kdb_bin_size); + psp_copy_fw(psp, psp->kdb.start_addr, psp->kdb.size_bytes); /* Provide the PSP KDB to bootloader */ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_36, @@ -315,7 +318,7 @@ static int psp_v11_0_bootloader_load_spl(struct psp_context *psp) return ret; /* Copy PSP SPL binary to memory */ - psp_copy_fw(psp, psp->spl_start_addr, psp->spl_bin_size); + psp_copy_fw(psp, psp->spl.start_addr, psp->spl.size_bytes); /* Provide the PSP SPL to bootloader */ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_36, @@ -346,7 +349,7 @@ static int psp_v11_0_bootloader_load_sysdrv(struct psp_context *psp) return ret; /* Copy PSP System Driver binary to memory */ - psp_copy_fw(psp, psp->sys_start_addr, psp->sys_bin_size); + psp_copy_fw(psp, psp->sys.start_addr, psp->sys.size_bytes); /* Provide the sys driver to bootloader */ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_36, @@ -380,7 +383,7 @@ static int psp_v11_0_bootloader_load_sos(struct psp_context *psp) return ret; /* Copy Secure OS binary to PSP memory */ - psp_copy_fw(psp, psp->sos_start_addr, psp->sos_bin_size); + psp_copy_fw(psp, psp->sos.start_addr, psp->sos.size_bytes); /* Provide the PSP secure OS to bootloader */ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_36, @@ -753,44 +756,26 @@ static void psp_v11_0_ring_set_wptr(struct psp_context *psp, uint32_t value) WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_67, value); } -static int psp_v11_0_load_usbc_pd_fw(struct psp_context *psp, dma_addr_t dma_addr) +static int psp_v11_0_load_usbc_pd_fw(struct psp_context *psp, uint64_t fw_pri_mc_addr) { struct amdgpu_device *adev = psp->adev; uint32_t reg_status; int ret, i = 0; - /* Write lower 32-bit address of the PD Controller FW */ - WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_36, lower_32_bits(dma_addr)); - ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_35), - 0x80000000, 0x80000000, false); - if (ret) - return ret; - - /* Fireup interrupt so PSP can pick up the lower address */ - WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_35, 0x800000); - ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_35), - 0x80000000, 0x80000000, false); - if (ret) - return ret; - - reg_status = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_35); - - if ((reg_status & 0xFFFF) != 0) { - DRM_ERROR("Lower address load failed - MP0_SMN_C2PMSG_35.Bits [15:0] = %02x...\n", - reg_status & 0xFFFF); - return -EIO; - } - - /* Write upper 32-bit address of the PD Controller FW */ - WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_36, upper_32_bits(dma_addr)); + /* + * LFB address which is aligned to 1MB address and has to be + * right-shifted by 20 so that LFB address can be passed on a 32-bit C2P + * register + */ + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_36, (fw_pri_mc_addr >> 20)); ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_35), 0x80000000, 0x80000000, false); if (ret) return ret; - /* Fireup interrupt so PSP can pick up the upper address */ - WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_35, 0x4000000); + /* Fireup interrupt so PSP can pick up the address */ + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_35, (GFX_CMD_USB_PD_USE_LFB << 16)); /* FW load takes very long time */ do { @@ -806,7 +791,7 @@ static int psp_v11_0_load_usbc_pd_fw(struct psp_context *psp, dma_addr_t dma_add done: if ((reg_status & 0xFFFF) != 0) { - DRM_ERROR("Upper address load failed - MP0_SMN_C2PMSG_35.Bits [15:0] = x%04x\n", + DRM_ERROR("Address load failed - MP0_SMN_C2PMSG_35.Bits [15:0] = 0x%04x\n", reg_status & 0xFFFF); return -EIO; } diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v11_0_8.c b/drivers/gpu/drm/amd/amdgpu/psp_v11_0_8.c new file mode 100644 index 000000000000..ff13e1beb49b --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/psp_v11_0_8.c @@ -0,0 +1,208 @@ +/* + * Copyright 2021 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#include "amdgpu.h" +#include "amdgpu_psp.h" +#include "amdgpu_ucode.h" +#include "soc15_common.h" +#include "psp_v11_0_8.h" + +#include "mp/mp_11_0_8_offset.h" + +static int psp_v11_0_8_ring_init(struct psp_context *psp, + enum psp_ring_type ring_type) +{ + int ret = 0; + struct psp_ring *ring; + struct amdgpu_device *adev = psp->adev; + + ring = &psp->km_ring; + + ring->ring_type = ring_type; + + /* allocate 4k Page of Local Frame Buffer memory for ring */ + ring->ring_size = 0x1000; + ret = amdgpu_bo_create_kernel(adev, ring->ring_size, PAGE_SIZE, + AMDGPU_GEM_DOMAIN_VRAM, + &adev->firmware.rbuf, + &ring->ring_mem_mc_addr, + (void **)&ring->ring_mem); + if (ret) { + ring->ring_size = 0; + return ret; + } + + return 0; +} + +static int psp_v11_0_8_ring_stop(struct psp_context *psp, + enum psp_ring_type ring_type) +{ + int ret = 0; + struct amdgpu_device *adev = psp->adev; + + if (amdgpu_sriov_vf(adev)) { + /* Write the ring destroy command*/ + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_101, + GFX_CTRL_CMD_ID_DESTROY_GPCOM_RING); + /* there might be handshake issue with hardware which needs delay */ + mdelay(20); + /* Wait for response flag (bit 31) */ + ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_101), + 0x80000000, 0x80000000, false); + } else { + /* Write the ring destroy command*/ + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_64, + GFX_CTRL_CMD_ID_DESTROY_RINGS); + /* there might be handshake issue with hardware which needs delay */ + mdelay(20); + /* Wait for response flag (bit 31) */ + ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64), + 0x80000000, 0x80000000, false); + } + + return ret; +} + +static int psp_v11_0_8_ring_create(struct psp_context *psp, + enum psp_ring_type ring_type) +{ + int ret = 0; + unsigned int psp_ring_reg = 0; + struct psp_ring *ring = &psp->km_ring; + struct amdgpu_device *adev = psp->adev; + + if (amdgpu_sriov_vf(adev)) { + ret = psp_v11_0_8_ring_stop(psp, ring_type); + if (ret) { + DRM_ERROR("psp_v11_0_8_ring_stop_sriov failed!\n"); + return ret; + } + + /* Write low address of the ring to C2PMSG_102 */ + psp_ring_reg = lower_32_bits(ring->ring_mem_mc_addr); + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_102, psp_ring_reg); + /* Write high address of the ring to C2PMSG_103 */ + psp_ring_reg = upper_32_bits(ring->ring_mem_mc_addr); + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_103, psp_ring_reg); + + /* Write the ring initialization command to C2PMSG_101 */ + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_101, + GFX_CTRL_CMD_ID_INIT_GPCOM_RING); + + /* there might be handshake issue with hardware which needs delay */ + mdelay(20); + + /* Wait for response flag (bit 31) in C2PMSG_101 */ + ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_101), + 0x80000000, 0x8000FFFF, false); + + } else { + /* Wait for sOS ready for ring creation */ + ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64), + 0x80000000, 0x80000000, false); + if (ret) { + DRM_ERROR("Failed to wait for trust OS ready for ring creation\n"); + return ret; + } + + /* Write low address of the ring to C2PMSG_69 */ + psp_ring_reg = lower_32_bits(ring->ring_mem_mc_addr); + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_69, psp_ring_reg); + /* Write high address of the ring to C2PMSG_70 */ + psp_ring_reg = upper_32_bits(ring->ring_mem_mc_addr); + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_70, psp_ring_reg); + /* Write size of ring to C2PMSG_71 */ + psp_ring_reg = ring->ring_size; + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_71, psp_ring_reg); + /* Write the ring initialization command to C2PMSG_64 */ + psp_ring_reg = ring_type; + psp_ring_reg = psp_ring_reg << 16; + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_64, psp_ring_reg); + + /* there might be handshake issue with hardware which needs delay */ + mdelay(20); + + /* Wait for response flag (bit 31) in C2PMSG_64 */ + ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64), + 0x80000000, 0x8000FFFF, false); + } + + return ret; +} + +static int psp_v11_0_8_ring_destroy(struct psp_context *psp, + enum psp_ring_type ring_type) +{ + int ret = 0; + struct psp_ring *ring = &psp->km_ring; + struct amdgpu_device *adev = psp->adev; + + ret = psp_v11_0_8_ring_stop(psp, ring_type); + if (ret) + DRM_ERROR("Fail to stop psp ring\n"); + + amdgpu_bo_free_kernel(&adev->firmware.rbuf, + &ring->ring_mem_mc_addr, + (void **)&ring->ring_mem); + + return ret; +} + +static uint32_t psp_v11_0_8_ring_get_wptr(struct psp_context *psp) +{ + uint32_t data; + struct amdgpu_device *adev = psp->adev; + + if (amdgpu_sriov_vf(adev)) + data = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_102); + else + data = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_67); + + return data; +} + +static void psp_v11_0_8_ring_set_wptr(struct psp_context *psp, uint32_t value) +{ + struct amdgpu_device *adev = psp->adev; + + if (amdgpu_sriov_vf(adev)) { + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_102, value); + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_101, + GFX_CTRL_CMD_ID_CONSUME_CMD); + } else + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_67, value); +} + +static const struct psp_funcs psp_v11_0_8_funcs = { + .ring_init = psp_v11_0_8_ring_init, + .ring_create = psp_v11_0_8_ring_create, + .ring_stop = psp_v11_0_8_ring_stop, + .ring_destroy = psp_v11_0_8_ring_destroy, + .ring_get_wptr = psp_v11_0_8_ring_get_wptr, + .ring_set_wptr = psp_v11_0_8_ring_set_wptr, +}; + +void psp_v11_0_8_set_psp_funcs(struct psp_context *psp) +{ + psp->funcs = &psp_v11_0_8_funcs; +} diff --git a/drivers/gpu/drm/amd/amdgpu/dce_virtual.h b/drivers/gpu/drm/amd/amdgpu/psp_v11_0_8.h index ed422012c8c6..890377a5afe0 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_virtual.h +++ b/drivers/gpu/drm/amd/amdgpu/psp_v11_0_8.h @@ -1,5 +1,5 @@ /* - * Copyright 2014 Advanced Micro Devices, Inc. + * Copyright 2021 Advanced Micro Devices, Inc. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -20,11 +20,11 @@ * OTHER DEALINGS IN THE SOFTWARE. * */ +#ifndef __PSP_V11_0_8_H__ +#define __PSP_V11_0_8_H__ -#ifndef __DCE_VIRTUAL_H__ -#define __DCE_VIRTUAL_H__ +#include "amdgpu_psp.h" -extern const struct amdgpu_ip_block_version dce_virtual_ip_block; +void psp_v11_0_8_set_psp_funcs(struct psp_context *psp); #endif - diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v12_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v12_0.c index 536d41f327c1..cc649406234b 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v12_0.c @@ -84,23 +84,23 @@ static int psp_v12_0_init_microcode(struct psp_context *psp) ta_hdr = (const struct ta_firmware_header_v1_0 *) adev->psp.ta_fw->data; - adev->psp.ta_hdcp_ucode_version = - le32_to_cpu(ta_hdr->ta_hdcp_ucode_version); - adev->psp.ta_hdcp_ucode_size = - le32_to_cpu(ta_hdr->ta_hdcp_size_bytes); - adev->psp.ta_hdcp_start_addr = + adev->psp.hdcp.feature_version = + le32_to_cpu(ta_hdr->hdcp.fw_version); + adev->psp.hdcp.size_bytes = + le32_to_cpu(ta_hdr->hdcp.size_bytes); + adev->psp.hdcp.start_addr = (uint8_t *)ta_hdr + le32_to_cpu(ta_hdr->header.ucode_array_offset_bytes); adev->psp.ta_fw_version = le32_to_cpu(ta_hdr->header.ucode_version); - adev->psp.ta_dtm_ucode_version = - le32_to_cpu(ta_hdr->ta_dtm_ucode_version); - adev->psp.ta_dtm_ucode_size = - le32_to_cpu(ta_hdr->ta_dtm_size_bytes); - adev->psp.ta_dtm_start_addr = - (uint8_t *)adev->psp.ta_hdcp_start_addr + - le32_to_cpu(ta_hdr->ta_dtm_offset_bytes); + adev->psp.dtm.feature_version = + le32_to_cpu(ta_hdr->dtm.fw_version); + adev->psp.dtm.size_bytes = + le32_to_cpu(ta_hdr->dtm.size_bytes); + adev->psp.dtm.start_addr = + (uint8_t *)adev->psp.hdcp.start_addr + + le32_to_cpu(ta_hdr->dtm.offset_bytes); } return 0; @@ -138,7 +138,7 @@ static int psp_v12_0_bootloader_load_sysdrv(struct psp_context *psp) return ret; /* Copy PSP System Driver binary to memory */ - psp_copy_fw(psp, psp->sys_start_addr, psp->sys_bin_size); + psp_copy_fw(psp, psp->sys.start_addr, psp->sys.size_bytes); /* Provide the sys driver to bootloader */ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_36, @@ -177,7 +177,7 @@ static int psp_v12_0_bootloader_load_sos(struct psp_context *psp) return ret; /* Copy Secure OS binary to PSP memory */ - psp_copy_fw(psp, psp->sos_start_addr, psp->sos_bin_size); + psp_copy_fw(psp, psp->sos.start_addr, psp->sos.size_bytes); /* Provide the PSP secure OS to bootloader */ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_36, diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c index c8949add88f3..47a500f64db2 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c @@ -35,6 +35,12 @@ MODULE_FIRMWARE("amdgpu/yellow_carp_asd.bin"); MODULE_FIRMWARE("amdgpu/yellow_carp_toc.bin"); MODULE_FIRMWARE("amdgpu/yellow_carp_ta.bin"); +/* For large FW files the time to complete can be very long */ +#define USBC_PD_POLLING_LIMIT_S 240 + +/* Read USB-PD from LFB */ +#define GFX_CMD_USB_PD_USE_LFB 0x480 + static int psp_v13_0_init_microcode(struct psp_context *psp) { struct amdgpu_device *adev = psp->adev; @@ -111,7 +117,9 @@ static int psp_v13_0_wait_for_bootloader(struct psp_context *psp) return ret; } -static int psp_v13_0_bootloader_load_kdb(struct psp_context *psp) +static int psp_v13_0_bootloader_load_component(struct psp_context *psp, + struct psp_bin_desc *bin_desc, + enum psp_bootloader_cmd bl_cmd) { int ret; uint32_t psp_gfxdrv_command_reg = 0; @@ -130,12 +138,12 @@ static int psp_v13_0_bootloader_load_kdb(struct psp_context *psp) memset(psp->fw_pri_buf, 0, PSP_1_MEG); /* Copy PSP KDB binary to memory */ - memcpy(psp->fw_pri_buf, psp->kdb_start_addr, psp->kdb_bin_size); + memcpy(psp->fw_pri_buf, bin_desc->start_addr, bin_desc->size_bytes); /* Provide the PSP KDB to bootloader */ WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_36, (uint32_t)(psp->fw_pri_mc_addr >> 20)); - psp_gfxdrv_command_reg = PSP_BL__LOAD_KEY_DATABASE; + psp_gfxdrv_command_reg = bl_cmd; WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_35, psp_gfxdrv_command_reg); @@ -144,40 +152,29 @@ static int psp_v13_0_bootloader_load_kdb(struct psp_context *psp) return ret; } -static int psp_v13_0_bootloader_load_sysdrv(struct psp_context *psp) +static int psp_v13_0_bootloader_load_kdb(struct psp_context *psp) { - int ret; - uint32_t psp_gfxdrv_command_reg = 0; - struct amdgpu_device *adev = psp->adev; - - /* Check sOS sign of life register to confirm sys driver and sOS - * are already been loaded. - */ - if (psp_v13_0_is_sos_alive(psp)) - return 0; - - ret = psp_v13_0_wait_for_bootloader(psp); - if (ret) - return ret; - - memset(psp->fw_pri_buf, 0, PSP_1_MEG); - - /* Copy PSP System Driver binary to memory */ - memcpy(psp->fw_pri_buf, psp->sys_start_addr, psp->sys_bin_size); + return psp_v13_0_bootloader_load_component(psp, &psp->kdb, PSP_BL__LOAD_KEY_DATABASE); +} - /* Provide the sys driver to bootloader */ - WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_36, - (uint32_t)(psp->fw_pri_mc_addr >> 20)); - psp_gfxdrv_command_reg = PSP_BL__LOAD_SYSDRV; - WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_35, - psp_gfxdrv_command_reg); +static int psp_v13_0_bootloader_load_sysdrv(struct psp_context *psp) +{ + return psp_v13_0_bootloader_load_component(psp, &psp->sys, PSP_BL__LOAD_SYSDRV); +} - /* there might be handshake issue with hardware which needs delay */ - mdelay(20); +static int psp_v13_0_bootloader_load_soc_drv(struct psp_context *psp) +{ + return psp_v13_0_bootloader_load_component(psp, &psp->soc_drv, PSP_BL__LOAD_SOCDRV); +} - ret = psp_v13_0_wait_for_bootloader(psp); +static int psp_v13_0_bootloader_load_intf_drv(struct psp_context *psp) +{ + return psp_v13_0_bootloader_load_component(psp, &psp->intf_drv, PSP_BL__LOAD_INTFDRV); +} - return ret; +static int psp_v13_0_bootloader_load_dbg_drv(struct psp_context *psp) +{ + return psp_v13_0_bootloader_load_component(psp, &psp->dbg_drv, PSP_BL__LOAD_DBGDRV); } static int psp_v13_0_bootloader_load_sos(struct psp_context *psp) @@ -199,7 +196,7 @@ static int psp_v13_0_bootloader_load_sos(struct psp_context *psp) memset(psp->fw_pri_buf, 0, PSP_1_MEG); /* Copy Secure OS binary to PSP memory */ - memcpy(psp->fw_pri_buf, psp->sos_start_addr, psp->sos_bin_size); + memcpy(psp->fw_pri_buf, psp->sos.start_addr, psp->sos.size_bytes); /* Provide the PSP secure OS to bootloader */ WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_36, @@ -382,10 +379,71 @@ static void psp_v13_0_ring_set_wptr(struct psp_context *psp, uint32_t value) WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_67, value); } +static int psp_v13_0_load_usbc_pd_fw(struct psp_context *psp, uint64_t fw_pri_mc_addr) +{ + struct amdgpu_device *adev = psp->adev; + uint32_t reg_status; + int ret, i = 0; + + /* + * LFB address which is aligned to 1MB address and has to be + * right-shifted by 20 so that LFB address can be passed on a 32-bit C2P + * register + */ + WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_36, (fw_pri_mc_addr >> 20)); + + ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_35), + 0x80000000, 0x80000000, false); + if (ret) + return ret; + + /* Fireup interrupt so PSP can pick up the address */ + WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_35, (GFX_CMD_USB_PD_USE_LFB << 16)); + + /* FW load takes very long time */ + do { + msleep(1000); + reg_status = RREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_35); + + if (reg_status & 0x80000000) + goto done; + + } while (++i < USBC_PD_POLLING_LIMIT_S); + + return -ETIME; +done: + + if ((reg_status & 0xFFFF) != 0) { + DRM_ERROR("Address load failed - MP0_SMN_C2PMSG_35.Bits [15:0] = %04x\n", + reg_status & 0xFFFF); + return -EIO; + } + + return 0; +} + +static int psp_v13_0_read_usbc_pd_fw(struct psp_context *psp, uint32_t *fw_ver) +{ + struct amdgpu_device *adev = psp->adev; + int ret; + + WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_35, C2PMSG_CMD_GFX_USB_PD_FW_VER); + + ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_35), + 0x80000000, 0x80000000, false); + if (!ret) + *fw_ver = RREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_36); + + return ret; +} + static const struct psp_funcs psp_v13_0_funcs = { .init_microcode = psp_v13_0_init_microcode, .bootloader_load_kdb = psp_v13_0_bootloader_load_kdb, .bootloader_load_sysdrv = psp_v13_0_bootloader_load_sysdrv, + .bootloader_load_soc_drv = psp_v13_0_bootloader_load_soc_drv, + .bootloader_load_intf_drv = psp_v13_0_bootloader_load_intf_drv, + .bootloader_load_dbg_drv = psp_v13_0_bootloader_load_dbg_drv, .bootloader_load_sos = psp_v13_0_bootloader_load_sos, .ring_init = psp_v13_0_ring_init, .ring_create = psp_v13_0_ring_create, @@ -393,6 +451,8 @@ static const struct psp_funcs psp_v13_0_funcs = { .ring_destroy = psp_v13_0_ring_destroy, .ring_get_wptr = psp_v13_0_ring_get_wptr, .ring_set_wptr = psp_v13_0_ring_set_wptr, + .load_usbc_pd_fw = psp_v13_0_load_usbc_pd_fw, + .read_usbc_pd_fw = psp_v13_0_read_usbc_pd_fw }; void psp_v13_0_set_psp_funcs(struct psp_context *psp) diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c b/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c index b86dcbabb635..1ed357cb0f49 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c @@ -103,7 +103,7 @@ static int psp_v3_1_bootloader_load_sysdrv(struct psp_context *psp) return ret; /* Copy PSP System Driver binary to memory */ - psp_copy_fw(psp, psp->sys_start_addr, psp->sys_bin_size); + psp_copy_fw(psp, psp->sys.start_addr, psp->sys.size_bytes); /* Provide the sys driver to bootloader */ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_36, @@ -142,7 +142,7 @@ static int psp_v3_1_bootloader_load_sos(struct psp_context *psp) return ret; /* Copy Secure OS binary to PSP memory */ - psp_copy_fw(psp, psp->sos_start_addr, psp->sos_bin_size); + psp_copy_fw(psp, psp->sos.start_addr, psp->sos.size_bytes); /* Provide the PSP secure OS to bootloader */ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_36, diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c index 6117ba8a4c3f..50bf3b71bc93 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c @@ -51,6 +51,12 @@ MODULE_FIRMWARE("amdgpu/navi14_sdma1.bin"); MODULE_FIRMWARE("amdgpu/navi12_sdma.bin"); MODULE_FIRMWARE("amdgpu/navi12_sdma1.bin"); +MODULE_FIRMWARE("amdgpu/cyan_skillfish_sdma.bin"); +MODULE_FIRMWARE("amdgpu/cyan_skillfish_sdma1.bin"); + +MODULE_FIRMWARE("amdgpu/cyan_skillfish2_sdma.bin"); +MODULE_FIRMWARE("amdgpu/cyan_skillfish2_sdma1.bin"); + #define SDMA1_REG_OFFSET 0x600 #define SDMA0_HYP_DEC_REG_START 0x5880 #define SDMA0_HYP_DEC_REG_END 0x5893 @@ -130,6 +136,37 @@ static const struct soc15_reg_golden golden_settings_sdma_nv12[] = { SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC3_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), }; +static const struct soc15_reg_golden golden_settings_sdma_cyan_skillfish[] = { + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_CHICKEN_BITS, 0xffbf1f0f, 0x03ab0107), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_GB_ADDR_CONFIG, 0x001877ff, 0x00000044), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_GB_ADDR_CONFIG_READ, 0x001877ff, 0x00000044), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_GFX_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_PAGE_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC0_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC1_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC2_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC3_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC4_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC5_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC6_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC7_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_UTCL1_PAGE, 0x007fffff, 0x004c5c00), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_CHICKEN_BITS, 0xffbf1f0f, 0x03ab0107), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_GB_ADDR_CONFIG, 0x001877ff, 0x00000044), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_GB_ADDR_CONFIG_READ, 0x001877ff, 0x00000044), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_GFX_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_PAGE_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC0_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC1_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC2_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC3_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC4_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC5_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC6_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC7_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_UTCL1_PAGE, 0x007fffff, 0x004c5c00) +}; + static u32 sdma_v5_0_get_reg_offset(struct amdgpu_device *adev, u32 instance, u32 internal_offset) { u32 base; @@ -180,6 +217,11 @@ static void sdma_v5_0_init_golden_registers(struct amdgpu_device *adev) golden_settings_sdma_nv12, (const u32)ARRAY_SIZE(golden_settings_sdma_nv12)); break; + case CHIP_CYAN_SKILLFISH: + soc15_program_register_sequence(adev, + golden_settings_sdma_cyan_skillfish, + (const u32)ARRAY_SIZE(golden_settings_sdma_cyan_skillfish)); + break; default: break; } @@ -200,7 +242,7 @@ static void sdma_v5_0_init_golden_registers(struct amdgpu_device *adev) static int sdma_v5_0_init_microcode(struct amdgpu_device *adev) { const char *chip_name; - char fw_name[30]; + char fw_name[40]; int err = 0, i; struct amdgpu_firmware_info *info = NULL; const struct common_firmware_header *header = NULL; @@ -221,6 +263,12 @@ static int sdma_v5_0_init_microcode(struct amdgpu_device *adev) case CHIP_NAVI12: chip_name = "navi12"; break; + case CHIP_CYAN_SKILLFISH: + if (adev->apu_flags & AMD_APU_IS_CYAN_SKILLFISH2) + chip_name = "cyan_skillfish2"; + else + chip_name = "cyan_skillfish"; + break; default: BUG(); } diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c index 7486e5306786..779f5c911e11 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c @@ -87,21 +87,6 @@ static u32 sdma_v5_2_get_reg_offset(struct amdgpu_device *adev, u32 instance, u3 return base + internal_offset; } -static void sdma_v5_2_init_golden_registers(struct amdgpu_device *adev) -{ - switch (adev->asic_type) { - case CHIP_SIENNA_CICHLID: - case CHIP_NAVY_FLOUNDER: - case CHIP_VANGOGH: - case CHIP_DIMGREY_CAVEFISH: - case CHIP_BEIGE_GOBY: - case CHIP_YELLOW_CARP: - break; - default: - break; - } -} - static int sdma_v5_2_init_inst_ctx(struct amdgpu_sdma_instance *sdma_inst) { int err = 0; @@ -1345,8 +1330,6 @@ static int sdma_v5_2_hw_init(void *handle) int r; struct amdgpu_device *adev = (struct amdgpu_device *)handle; - sdma_v5_2_init_golden_registers(adev); - r = sdma_v5_2_start(adev); return r; diff --git a/drivers/gpu/drm/amd/amdgpu/si.c b/drivers/gpu/drm/amd/amdgpu/si.c index 7cbc2bb03bc6..e6d2f74a7976 100644 --- a/drivers/gpu/drm/amd/amdgpu/si.c +++ b/drivers/gpu/drm/amd/amdgpu/si.c @@ -44,7 +44,7 @@ #include "dce_v6_0.h" #include "si.h" #include "uvd_v3_1.h" -#include "dce_virtual.h" +#include "amdgpu_vkms.h" #include "gca/gfx_6_0_d.h" #include "oss/oss_1_0_d.h" #include "oss/oss_1_0_sh_mask.h" @@ -2759,7 +2759,7 @@ int si_set_ip_blocks(struct amdgpu_device *adev) amdgpu_device_ip_block_add(adev, &si_dma_ip_block); amdgpu_device_ip_block_add(adev, &si_smu_ip_block); if (adev->enable_virtual_display) - amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block); + amdgpu_device_ip_block_add(adev, &amdgpu_vkms_ip_block); #if defined(CONFIG_DRM_AMD_DC) && defined(CONFIG_DRM_AMD_DC_SI) else if (amdgpu_device_has_dc_support(adev)) amdgpu_device_ip_block_add(adev, &dm_ip_block); @@ -2777,7 +2777,7 @@ int si_set_ip_blocks(struct amdgpu_device *adev) amdgpu_device_ip_block_add(adev, &si_dma_ip_block); amdgpu_device_ip_block_add(adev, &si_smu_ip_block); if (adev->enable_virtual_display) - amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block); + amdgpu_device_ip_block_add(adev, &amdgpu_vkms_ip_block); #if defined(CONFIG_DRM_AMD_DC) && defined(CONFIG_DRM_AMD_DC_SI) else if (amdgpu_device_has_dc_support(adev)) amdgpu_device_ip_block_add(adev, &dm_ip_block); @@ -2795,7 +2795,7 @@ int si_set_ip_blocks(struct amdgpu_device *adev) amdgpu_device_ip_block_add(adev, &si_dma_ip_block); amdgpu_device_ip_block_add(adev, &si_smu_ip_block); if (adev->enable_virtual_display) - amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block); + amdgpu_device_ip_block_add(adev, &amdgpu_vkms_ip_block); break; default: BUG(); diff --git a/drivers/gpu/drm/amd/amdgpu/smu_v11_0_i2c.c b/drivers/gpu/drm/amd/amdgpu/smu_v11_0_i2c.c index 5c7d769aee3f..73ffa8fde3df 100644 --- a/drivers/gpu/drm/amd/amdgpu/smu_v11_0_i2c.c +++ b/drivers/gpu/drm/amd/amdgpu/smu_v11_0_i2c.c @@ -41,9 +41,7 @@ #define I2C_SW_TIMEOUT 8 #define I2C_ABORT 0x10 -/* I2C transaction flags */ -#define I2C_NO_STOP 1 -#define I2C_RESTART 2 +#define I2C_X_RESTART BIT(31) #define to_amdgpu_device(x) (container_of(x, struct amdgpu_device, pm.smu_i2c)) @@ -56,12 +54,48 @@ static void smu_v11_0_i2c_set_clock_gating(struct i2c_adapter *control, bool en) WREG32_SOC15(SMUIO, 0, mmSMUIO_PWRMGT, reg); } +/* The T_I2C_POLL_US is defined as follows: + * + * "Define a timer interval (t_i2c_poll) equal to 10 times the + * signalling period for the highest I2C transfer speed used in the + * system and supported by DW_apb_i2c. For instance, if the highest + * I2C data transfer mode is 400 kb/s, then t_i2c_poll is 25 us." -- + * DesignWare DW_apb_i2c Databook, Version 1.21a, section 3.8.3.1, + * page 56, with grammar and syntax corrections. + * + * Vcc for our device is at 1.8V which puts it at 400 kHz, + * see Atmel AT24CM02 datasheet, section 8.3 DC Characteristics table, page 14. + * + * The procedure to disable the IP block is described in section + * 3.8.3 Disabling DW_apb_i2c on page 56. + */ +#define I2C_SPEED_MODE_FAST 2 +#define T_I2C_POLL_US 25 +#define I2C_MAX_T_POLL_COUNT 1000 -static void smu_v11_0_i2c_enable(struct i2c_adapter *control, bool enable) +static int smu_v11_0_i2c_enable(struct i2c_adapter *control, bool enable) { struct amdgpu_device *adev = to_amdgpu_device(control); WREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_ENABLE, enable ? 1 : 0); + + if (!enable) { + int ii; + + for (ii = I2C_MAX_T_POLL_COUNT; ii > 0; ii--) { + u32 en_stat = RREG32_SOC15(SMUIO, + 0, + mmCKSVII2C_IC_ENABLE_STATUS); + if (REG_GET_FIELD(en_stat, CKSVII2C_IC_ENABLE_STATUS, IC_EN)) + udelay(T_I2C_POLL_US); + else + return I2C_OK; + } + + return I2C_ABORT; + } + + return I2C_OK; } static void smu_v11_0_i2c_clear_status(struct i2c_adapter *control) @@ -83,8 +117,13 @@ static void smu_v11_0_i2c_configure(struct i2c_adapter *control) reg = REG_SET_FIELD(reg, CKSVII2C_IC_CON, IC_RESTART_EN, 1); reg = REG_SET_FIELD(reg, CKSVII2C_IC_CON, IC_10BITADDR_MASTER, 0); reg = REG_SET_FIELD(reg, CKSVII2C_IC_CON, IC_10BITADDR_SLAVE, 0); - /* Standard mode */ - reg = REG_SET_FIELD(reg, CKSVII2C_IC_CON, IC_MAX_SPEED_MODE, 2); + /* The values of IC_MAX_SPEED_MODE are, + * 1: standard mode, 0 - 100 Kb/s, + * 2: fast mode, <= 400 Kb/s, or fast mode plus, <= 1000 Kb/s, + * 3: high speed mode, <= 3.4 Mb/s. + */ + reg = REG_SET_FIELD(reg, CKSVII2C_IC_CON, IC_MAX_SPEED_MODE, + I2C_SPEED_MODE_FAST); reg = REG_SET_FIELD(reg, CKSVII2C_IC_CON, IC_MASTER_MODE, 1); WREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_CON, reg); @@ -113,13 +152,15 @@ static void smu_v11_0_i2c_set_clock(struct i2c_adapter *control) WREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_SDA_HOLD, 20); } -static void smu_v11_0_i2c_set_address(struct i2c_adapter *control, uint8_t address) +static void smu_v11_0_i2c_set_address(struct i2c_adapter *control, u16 address) { struct amdgpu_device *adev = to_amdgpu_device(control); - /* Convert fromr 8-bit to 7-bit address */ - address >>= 1; - WREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_TAR, (address & 0xFF)); + /* The IC_TAR::IC_TAR field is 10-bits wide. + * It takes a 7-bit or 10-bit addresses as an address, + * i.e. no read/write bit--no wire format, just the address. + */ + WREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_TAR, address & 0x3FF); } static uint32_t smu_v11_0_i2c_poll_tx_status(struct i2c_adapter *control) @@ -206,9 +247,6 @@ static uint32_t smu_v11_0_i2c_poll_rx_status(struct i2c_adapter *control) return ret; } - - - /** * smu_v11_0_i2c_transmit - Send a block of data over the I2C bus to a slave device. * @@ -221,17 +259,17 @@ static uint32_t smu_v11_0_i2c_poll_rx_status(struct i2c_adapter *control) * Returns 0 on success or error. */ static uint32_t smu_v11_0_i2c_transmit(struct i2c_adapter *control, - uint8_t address, uint8_t *data, - uint32_t numbytes, uint32_t i2c_flag) + u16 address, u8 *data, + u32 numbytes, u32 i2c_flag) { struct amdgpu_device *adev = to_amdgpu_device(control); - uint32_t bytes_sent, reg, ret = 0; + u32 bytes_sent, reg, ret = I2C_OK; unsigned long timeout_counter; bytes_sent = 0; DRM_DEBUG_DRIVER("I2C_Transmit(), address = %x, bytes = %d , data: ", - (uint16_t)address, numbytes); + address, numbytes); if (drm_debug_enabled(DRM_UT_DRIVER)) { print_hex_dump(KERN_INFO, "data: ", DUMP_PREFIX_NONE, @@ -246,53 +284,49 @@ static uint32_t smu_v11_0_i2c_transmit(struct i2c_adapter *control, /* Clear status bits */ smu_v11_0_i2c_clear_status(control); - timeout_counter = jiffies + msecs_to_jiffies(20); while (numbytes > 0) { reg = RREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_STATUS); - if (REG_GET_FIELD(reg, CKSVII2C_IC_STATUS, TFNF)) { - do { - reg = 0; - /* - * Prepare transaction, no need to set RESTART. I2C engine will send - * START as soon as it sees data in TXFIFO - */ - if (bytes_sent == 0) - reg = REG_SET_FIELD(reg, CKSVII2C_IC_DATA_CMD, RESTART, - (i2c_flag & I2C_RESTART) ? 1 : 0); - reg = REG_SET_FIELD(reg, CKSVII2C_IC_DATA_CMD, DAT, data[bytes_sent]); - - /* determine if we need to send STOP bit or not */ - if (numbytes == 1) - /* Final transaction, so send stop unless I2C_NO_STOP */ - reg = REG_SET_FIELD(reg, CKSVII2C_IC_DATA_CMD, STOP, - (i2c_flag & I2C_NO_STOP) ? 0 : 1); - /* Write */ - reg = REG_SET_FIELD(reg, CKSVII2C_IC_DATA_CMD, CMD, 0); - WREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_DATA_CMD, reg); - - /* Record that the bytes were transmitted */ - bytes_sent++; - numbytes--; - - reg = RREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_STATUS); - - } while (numbytes && REG_GET_FIELD(reg, CKSVII2C_IC_STATUS, TFNF)); - } - - /* - * We waited too long for the transmission FIFO to become not-full. - * Exit the loop with error. - */ - if (time_after(jiffies, timeout_counter)) { - ret |= I2C_SW_TIMEOUT; - goto Err; + if (!REG_GET_FIELD(reg, CKSVII2C_IC_STATUS, TFNF)) { + /* + * We waited for too long for the transmission + * FIFO to become not-full. Exit the loop + * with error. + */ + if (time_after(jiffies, timeout_counter)) { + ret |= I2C_SW_TIMEOUT; + goto Err; + } + } else { + reg = REG_SET_FIELD(reg, CKSVII2C_IC_DATA_CMD, DAT, + data[bytes_sent]); + + /* Final message, final byte, must generate a + * STOP to release the bus, i.e. don't hold + * SCL low. + */ + if (numbytes == 1 && i2c_flag & I2C_M_STOP) + reg = REG_SET_FIELD(reg, + CKSVII2C_IC_DATA_CMD, + STOP, 1); + + if (bytes_sent == 0 && i2c_flag & I2C_X_RESTART) + reg = REG_SET_FIELD(reg, + CKSVII2C_IC_DATA_CMD, + RESTART, 1); + + /* Write */ + reg = REG_SET_FIELD(reg, CKSVII2C_IC_DATA_CMD, CMD, 0); + WREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_DATA_CMD, reg); + + /* Record that the bytes were transmitted */ + bytes_sent++; + numbytes--; } } ret = smu_v11_0_i2c_poll_tx_status(control); - Err: /* Any error, no point in proceeding */ if (ret != I2C_OK) { @@ -323,8 +357,8 @@ Err: * Returns 0 on success or error. */ static uint32_t smu_v11_0_i2c_receive(struct i2c_adapter *control, - uint8_t address, uint8_t *data, - uint32_t numbytes, uint8_t i2c_flag) + u16 address, u8 *data, + u32 numbytes, u32 i2c_flag) { struct amdgpu_device *adev = to_amdgpu_device(control); uint32_t bytes_received, ret = I2C_OK; @@ -342,23 +376,21 @@ static uint32_t smu_v11_0_i2c_receive(struct i2c_adapter *control, smu_v11_0_i2c_clear_status(control); - /* Prepare transaction */ - - /* Each time we disable I2C, so this is not a restart */ - if (bytes_received == 0) - reg = REG_SET_FIELD(reg, CKSVII2C_IC_DATA_CMD, RESTART, - (i2c_flag & I2C_RESTART) ? 1 : 0); - reg = REG_SET_FIELD(reg, CKSVII2C_IC_DATA_CMD, DAT, 0); /* Read */ reg = REG_SET_FIELD(reg, CKSVII2C_IC_DATA_CMD, CMD, 1); - /* Transmitting last byte */ - if (numbytes == 1) - /* Final transaction, so send stop if requested */ - reg = REG_SET_FIELD(reg, CKSVII2C_IC_DATA_CMD, STOP, - (i2c_flag & I2C_NO_STOP) ? 0 : 1); + /* Final message, final byte, must generate a STOP + * to release the bus, i.e. don't hold SCL low. + */ + if (numbytes == 1 && i2c_flag & I2C_M_STOP) + reg = REG_SET_FIELD(reg, CKSVII2C_IC_DATA_CMD, + STOP, 1); + + if (bytes_received == 0 && i2c_flag & I2C_X_RESTART) + reg = REG_SET_FIELD(reg, CKSVII2C_IC_DATA_CMD, + RESTART, 1); WREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_DATA_CMD, reg); @@ -413,7 +445,6 @@ static void smu_v11_0_i2c_abort(struct i2c_adapter *control) DRM_DEBUG_DRIVER("I2C_Abort() Done."); } - static bool smu_v11_0_i2c_activity_done(struct i2c_adapter *control) { struct amdgpu_device *adev = to_amdgpu_device(control); @@ -425,7 +456,6 @@ static bool smu_v11_0_i2c_activity_done(struct i2c_adapter *control) reg_ic_enable_status = RREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_ENABLE_STATUS); reg_ic_enable = RREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_ENABLE); - if ((REG_GET_FIELD(reg_ic_enable, CKSVII2C_IC_ENABLE, ENABLE) == 0) && (REG_GET_FIELD(reg_ic_enable_status, CKSVII2C_IC_ENABLE_STATUS, IC_EN) == 1)) { /* @@ -455,6 +485,8 @@ static bool smu_v11_0_i2c_activity_done(struct i2c_adapter *control) static void smu_v11_0_i2c_init(struct i2c_adapter *control) { + int res; + /* Disable clock gating */ smu_v11_0_i2c_set_clock_gating(control, false); @@ -462,7 +494,9 @@ static void smu_v11_0_i2c_init(struct i2c_adapter *control) DRM_WARN("I2C busy !"); /* Disable I2C */ - smu_v11_0_i2c_enable(control, false); + res = smu_v11_0_i2c_enable(control, false); + if (res != I2C_OK) + smu_v11_0_i2c_abort(control); /* Configure I2C to operate as master and in standard mode */ smu_v11_0_i2c_configure(control); @@ -475,21 +509,22 @@ static void smu_v11_0_i2c_init(struct i2c_adapter *control) static void smu_v11_0_i2c_fini(struct i2c_adapter *control) { struct amdgpu_device *adev = to_amdgpu_device(control); - uint32_t reg_ic_enable_status, reg_ic_enable; + u32 status, enable, en_stat; + int res; - smu_v11_0_i2c_enable(control, false); + res = smu_v11_0_i2c_enable(control, false); + if (res != I2C_OK) { + status = RREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_STATUS); + enable = RREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_ENABLE); + en_stat = RREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_ENABLE_STATUS); - /* Double check if disabled, else force abort */ - reg_ic_enable_status = RREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_ENABLE_STATUS); - reg_ic_enable = RREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_ENABLE); - - if ((REG_GET_FIELD(reg_ic_enable, CKSVII2C_IC_ENABLE, ENABLE) == 0) && - (REG_GET_FIELD(reg_ic_enable_status, - CKSVII2C_IC_ENABLE_STATUS, IC_EN) == 1)) { - /* - * Nobody is using I2C engine, but engine remains active because - * someone missed to send STOP + /* Nobody is using the I2C engine, yet it remains + * active, possibly because someone missed to send + * STOP. */ + DRM_DEBUG_DRIVER("Aborting from fini: status:0x%08x " + "enable:0x%08x enable_stat:0x%08x", + status, enable, en_stat); smu_v11_0_i2c_abort(control); } @@ -531,22 +566,12 @@ static bool smu_v11_0_i2c_bus_unlock(struct i2c_adapter *control) /***************************** I2C GLUE ****************************/ static uint32_t smu_v11_0_i2c_read_data(struct i2c_adapter *control, - uint8_t address, - uint8_t *data, - uint32_t numbytes) + struct i2c_msg *msg, uint32_t i2c_flag) { - uint32_t ret = 0; - - /* First 2 bytes are dummy write to set EEPROM address */ - ret = smu_v11_0_i2c_transmit(control, address, data, 2, I2C_NO_STOP); - if (ret != I2C_OK) - goto Fail; + uint32_t ret; - /* Now read data starting with that address */ - ret = smu_v11_0_i2c_receive(control, address, data + 2, numbytes - 2, - I2C_RESTART); + ret = smu_v11_0_i2c_receive(control, msg->addr, msg->buf, msg->len, i2c_flag); -Fail: if (ret != I2C_OK) DRM_ERROR("ReadData() - I2C error occurred :%x", ret); @@ -554,28 +579,15 @@ Fail: } static uint32_t smu_v11_0_i2c_write_data(struct i2c_adapter *control, - uint8_t address, - uint8_t *data, - uint32_t numbytes) + struct i2c_msg *msg, uint32_t i2c_flag) { uint32_t ret; - ret = smu_v11_0_i2c_transmit(control, address, data, numbytes, 0); + ret = smu_v11_0_i2c_transmit(control, msg->addr, msg->buf, msg->len, i2c_flag); if (ret != I2C_OK) DRM_ERROR("WriteI2CData() - I2C error occurred :%x", ret); - else - /* - * According to EEPROM spec there is a MAX of 10 ms required for - * EEPROM to flush internal RX buffer after STOP was issued at the - * end of write transaction. During this time the EEPROM will not be - * responsive to any more commands - so wait a bit more. - * - * TODO Improve to wait for first ACK for slave address after - * internal write cycle done. - */ - msleep(10); - + return ret; } @@ -584,12 +596,11 @@ static void lock_bus(struct i2c_adapter *i2c, unsigned int flags) { struct amdgpu_device *adev = to_amdgpu_device(i2c); - if (!smu_v11_0_i2c_bus_lock(i2c)) { + mutex_lock(&adev->pm.smu_i2c_mutex); + if (!smu_v11_0_i2c_bus_lock(i2c)) DRM_ERROR("Failed to lock the bus from SMU"); - return; - } - - adev->pm.bus_locked = true; + else + adev->pm.bus_locked = true; } static int trylock_bus(struct i2c_adapter *i2c, unsigned int flags) @@ -602,12 +613,11 @@ static void unlock_bus(struct i2c_adapter *i2c, unsigned int flags) { struct amdgpu_device *adev = to_amdgpu_device(i2c); - if (!smu_v11_0_i2c_bus_unlock(i2c)) { + if (!smu_v11_0_i2c_bus_unlock(i2c)) DRM_ERROR("Failed to unlock the bus from SMU"); - return; - } - - adev->pm.bus_locked = false; + else + adev->pm.bus_locked = false; + mutex_unlock(&adev->pm.smu_i2c_mutex); } static const struct i2c_lock_operations smu_v11_0_i2c_i2c_lock_ops = { @@ -617,27 +627,60 @@ static const struct i2c_lock_operations smu_v11_0_i2c_i2c_lock_ops = { }; static int smu_v11_0_i2c_xfer(struct i2c_adapter *i2c_adap, - struct i2c_msg *msgs, int num) + struct i2c_msg *msg, int num) { int i, ret; - struct amdgpu_device *adev = to_amdgpu_device(i2c_adap); - - if (!adev->pm.bus_locked) { - DRM_ERROR("I2C bus unlocked, stopping transaction!"); - return -EIO; - } + u16 addr, dir; smu_v11_0_i2c_init(i2c_adap); + /* From the client's point of view, this sequence of + * messages-- the array i2c_msg *msg, is a single transaction + * on the bus, starting with START and ending with STOP. + * + * The client is welcome to send any sequence of messages in + * this array, as processing under this function here is + * striving to be agnostic. + * + * Record the first address and direction we see. If either + * changes for a subsequent message, generate ReSTART. The + * DW_apb_i2c databook, v1.21a, specifies that ReSTART is + * generated when the direction changes, with the default IP + * block parameter settings, but it doesn't specify if ReSTART + * is generated when the address changes (possibly...). We + * don't rely on the default IP block parameter settings as + * the block is shared and they may change. + */ + if (num > 0) { + addr = msg[0].addr; + dir = msg[0].flags & I2C_M_RD; + } + for (i = 0; i < num; i++) { - if (msgs[i].flags & I2C_M_RD) + u32 i2c_flag = 0; + + if (msg[i].addr != addr || (msg[i].flags ^ dir) & I2C_M_RD) { + addr = msg[i].addr; + dir = msg[i].flags & I2C_M_RD; + i2c_flag |= I2C_X_RESTART; + } + + if (i == num - 1) { + /* Set the STOP bit on the last message, so + * that the IP block generates a STOP after + * the last byte of the message. + */ + i2c_flag |= I2C_M_STOP; + } + + if (msg[i].flags & I2C_M_RD) ret = smu_v11_0_i2c_read_data(i2c_adap, - (uint8_t)msgs[i].addr, - msgs[i].buf, msgs[i].len); + msg + i, + i2c_flag); else ret = smu_v11_0_i2c_write_data(i2c_adap, - (uint8_t)msgs[i].addr, - msgs[i].buf, msgs[i].len); + msg + i, + i2c_flag); if (ret != I2C_OK) { num = -EIO; @@ -654,23 +697,28 @@ static u32 smu_v11_0_i2c_func(struct i2c_adapter *adap) return I2C_FUNC_I2C | I2C_FUNC_SMBUS_EMUL; } - static const struct i2c_algorithm smu_v11_0_i2c_algo = { .master_xfer = smu_v11_0_i2c_xfer, .functionality = smu_v11_0_i2c_func, }; +static const struct i2c_adapter_quirks smu_v11_0_i2c_control_quirks = { + .flags = I2C_AQ_NO_ZERO_LEN, +}; + int smu_v11_0_i2c_control_init(struct i2c_adapter *control) { struct amdgpu_device *adev = to_amdgpu_device(control); int res; + mutex_init(&adev->pm.smu_i2c_mutex); control->owner = THIS_MODULE; - control->class = I2C_CLASS_SPD; + control->class = I2C_CLASS_HWMON; control->dev.parent = &adev->pdev->dev; control->algo = &smu_v11_0_i2c_algo; snprintf(control->name, sizeof(control->name), "AMDGPU SMU"); control->lock_ops = &smu_v11_0_i2c_i2c_lock_ops; + control->quirks = &smu_v11_0_i2c_control_quirks; res = i2c_add_adapter(control); if (res) diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c index b7d350be8050..0fc97c364fd7 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.c +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c @@ -74,7 +74,7 @@ #include "smuio_v9_0.h" #include "smuio_v11_0.h" #include "smuio_v13_0.h" -#include "dce_virtual.h" +#include "amdgpu_vkms.h" #include "mxgpu_ai.h" #include "amdgpu_ras.h" #include "amdgpu_xgmi.h" @@ -575,7 +575,7 @@ soc15_asic_reset_method(struct amdgpu_device *adev) baco_reset = amdgpu_dpm_is_baco_supported(adev); break; case CHIP_VEGA20: - if (adev->psp.sos_fw_version >= 0x80067) + if (adev->psp.sos.fw_version >= 0x80067) baco_reset = amdgpu_dpm_is_baco_supported(adev); /* @@ -635,7 +635,7 @@ static bool soc15_supports_baco(struct amdgpu_device *adev) case CHIP_ARCTURUS: return amdgpu_dpm_is_baco_supported(adev); case CHIP_VEGA20: - if (adev->psp.sos_fw_version >= 0x80067) + if (adev->psp.sos.fw_version >= 0x80067) return amdgpu_dpm_is_baco_supported(adev); return false; default: @@ -843,7 +843,7 @@ int soc15_set_ip_blocks(struct amdgpu_device *adev) amdgpu_device_ip_block_add(adev, &pp_smu_ip_block); } if (adev->enable_virtual_display || amdgpu_sriov_vf(adev)) - amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block); + amdgpu_device_ip_block_add(adev, &amdgpu_vkms_ip_block); #if defined(CONFIG_DRM_AMD_DC) else if (amdgpu_device_has_dc_support(adev)) amdgpu_device_ip_block_add(adev, &dm_ip_block); @@ -863,7 +863,7 @@ int soc15_set_ip_blocks(struct amdgpu_device *adev) amdgpu_device_ip_block_add(adev, &sdma_v4_0_ip_block); amdgpu_device_ip_block_add(adev, &pp_smu_ip_block); if (adev->enable_virtual_display || amdgpu_sriov_vf(adev)) - amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block); + amdgpu_device_ip_block_add(adev, &amdgpu_vkms_ip_block); #if defined(CONFIG_DRM_AMD_DC) else if (amdgpu_device_has_dc_support(adev)) amdgpu_device_ip_block_add(adev, &dm_ip_block); @@ -885,7 +885,7 @@ int soc15_set_ip_blocks(struct amdgpu_device *adev) } if (adev->enable_virtual_display || amdgpu_sriov_vf(adev)) - amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block); + amdgpu_device_ip_block_add(adev, &amdgpu_vkms_ip_block); amdgpu_device_ip_block_add(adev, &gfx_v9_0_ip_block); amdgpu_device_ip_block_add(adev, &sdma_v4_0_ip_block); amdgpu_device_ip_block_add(adev, &smu_v11_0_ip_block); @@ -909,7 +909,7 @@ int soc15_set_ip_blocks(struct amdgpu_device *adev) amdgpu_device_ip_block_add(adev, &gfx_v9_0_ip_block); amdgpu_device_ip_block_add(adev, &sdma_v4_0_ip_block); if (adev->enable_virtual_display || amdgpu_sriov_vf(adev)) - amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block); + amdgpu_device_ip_block_add(adev, &amdgpu_vkms_ip_block); #if defined(CONFIG_DRM_AMD_DC) else if (amdgpu_device_has_dc_support(adev)) amdgpu_device_ip_block_add(adev, &dm_ip_block); @@ -1353,8 +1353,6 @@ static int soc15_common_early_init(void *handle) adev->asic_funcs = &vega20_asic_funcs; adev->cg_flags = AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS | - AMD_CG_SUPPORT_GFX_CGCG | - AMD_CG_SUPPORT_GFX_CGLS | AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_HDP_LS | AMD_CG_SUPPORT_SDMA_MGCG | diff --git a/drivers/gpu/drm/amd/amdgpu/soc15_common.h b/drivers/gpu/drm/amd/amdgpu/soc15_common.h index 0eeb5e073be8..8a9ca87d8663 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15_common.h +++ b/drivers/gpu/drm/amd/amdgpu/soc15_common.h @@ -28,13 +28,13 @@ #define SOC15_REG_OFFSET(ip, inst, reg) (adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg) #define __WREG32_SOC15_RLC__(reg, value, flag, hwip) \ - ((amdgpu_sriov_vf(adev) && adev->gfx.rlc.funcs && adev->gfx.rlc.funcs->rlcg_wreg) ? \ - adev->gfx.rlc.funcs->rlcg_wreg(adev, reg, value, flag, hwip) : \ + ((amdgpu_sriov_vf(adev) && adev->gfx.rlc.funcs && adev->gfx.rlc.funcs->sriov_wreg) ? \ + adev->gfx.rlc.funcs->sriov_wreg(adev, reg, value, flag, hwip) : \ WREG32(reg, value)) #define __RREG32_SOC15_RLC__(reg, flag, hwip) \ - ((amdgpu_sriov_vf(adev) && adev->gfx.rlc.funcs && adev->gfx.rlc.funcs->rlcg_rreg) ? \ - adev->gfx.rlc.funcs->rlcg_rreg(adev, reg, flag, hwip) : \ + ((amdgpu_sriov_vf(adev) && adev->gfx.rlc.funcs && adev->gfx.rlc.funcs->sriov_rreg) ? \ + adev->gfx.rlc.funcs->sriov_rreg(adev, reg, flag, hwip) : \ RREG32(reg)) #define WREG32_FIELD15(ip, idx, reg, field, val) \ diff --git a/drivers/gpu/drm/amd/amdgpu/ta_xgmi_if.h b/drivers/gpu/drm/amd/amdgpu/ta_xgmi_if.h index ac2c27b7630c..da815a93d46e 100644 --- a/drivers/gpu/drm/amd/amdgpu/ta_xgmi_if.h +++ b/drivers/gpu/drm/amd/amdgpu/ta_xgmi_if.h @@ -33,7 +33,8 @@ enum ta_command_xgmi { TA_COMMAND_XGMI__GET_NODE_ID = 0x01, TA_COMMAND_XGMI__GET_HIVE_ID = 0x02, TA_COMMAND_XGMI__GET_GET_TOPOLOGY_INFO = 0x03, - TA_COMMAND_XGMI__SET_TOPOLOGY_INFO = 0x04 + TA_COMMAND_XGMI__SET_TOPOLOGY_INFO = 0x04, + TA_COMMAND_XGMI__GET_PEER_LINKS = 0x0B }; /* XGMI related enumerations */ @@ -75,6 +76,11 @@ struct ta_xgmi_node_info { enum ta_xgmi_assigned_sdma_engine sdma_engine; }; +struct ta_xgmi_peer_link_info { + uint64_t node_id; + uint8_t num_links; +}; + struct ta_xgmi_cmd_initialize_output { uint32_t status; }; @@ -97,6 +103,11 @@ struct ta_xgmi_cmd_get_topology_info_output { struct ta_xgmi_node_info nodes[TA_XGMI__MAX_CONNECTED_NODES]; }; +struct ta_xgmi_cmd_get_peer_link_info_output { + uint32_t num_nodes; + struct ta_xgmi_peer_link_info nodes[TA_XGMI__MAX_CONNECTED_NODES]; +}; + struct ta_xgmi_cmd_set_topology_info_input { uint32_t num_nodes; struct ta_xgmi_node_info nodes[TA_XGMI__MAX_CONNECTED_NODES]; @@ -115,6 +126,7 @@ union ta_xgmi_cmd_output { struct ta_xgmi_cmd_get_node_id_output get_node_id; struct ta_xgmi_cmd_get_hive_id_output get_hive_id; struct ta_xgmi_cmd_get_topology_info_output get_topology_info; + struct ta_xgmi_cmd_get_peer_link_info_output get_link_info; }; /**********************************************************/ @@ -122,7 +134,8 @@ struct ta_xgmi_shared_memory { uint32_t cmd_id; uint32_t resp_id; enum ta_xgmi_status xgmi_status; - uint32_t reserved; + uint8_t flag_extend_link_record; + uint8_t reserved0[3]; union ta_xgmi_cmd_input xgmi_in_message; union ta_xgmi_cmd_output xgmi_out_message; }; diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c b/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c index 3a8f787374c0..bb30336b1e8d 100644 --- a/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c +++ b/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c @@ -28,6 +28,21 @@ #include "umc/umc_6_7_0_offset.h" #include "umc/umc_6_7_0_sh_mask.h" +const uint32_t + umc_v6_7_channel_idx_tbl_second[UMC_V6_7_UMC_INSTANCE_NUM][UMC_V6_7_CHANNEL_INSTANCE_NUM] = { + {28, 20, 24, 16, 12, 4, 8, 0}, + {6, 30, 2, 26, 22, 14, 18, 10}, + {19, 11, 15, 7, 3, 27, 31, 23}, + {9, 1, 5, 29, 25, 17, 21, 13} +}; +const uint32_t + umc_v6_7_channel_idx_tbl_first[UMC_V6_7_UMC_INSTANCE_NUM][UMC_V6_7_CHANNEL_INSTANCE_NUM] = { + {19, 11, 15, 7, 3, 27, 31, 23}, + {9, 1, 5, 29, 25, 17, 21, 13}, + {28, 20, 24, 16, 12, 4, 8, 0}, + {6, 30, 2, 26, 22, 14, 18, 10}, +}; + static inline uint32_t get_umc_v6_7_reg_offset(struct amdgpu_device *adev, uint32_t umc_inst, uint32_t ch_inst) diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v6_7.h b/drivers/gpu/drm/amd/amdgpu/umc_v6_7.h index 4eb85f247e96..57f2557e7aca 100644 --- a/drivers/gpu/drm/amd/amdgpu/umc_v6_7.h +++ b/drivers/gpu/drm/amd/amdgpu/umc_v6_7.h @@ -23,6 +23,9 @@ #ifndef __UMC_V6_7_H__ #define __UMC_V6_7_H__ +#include "soc15_common.h" +#include "amdgpu.h" + /* EccErrCnt max value */ #define UMC_V6_7_CE_CNT_MAX 0xffff /* umc ce interrupt threshold */ @@ -32,6 +35,18 @@ #define UMC_V6_7_INST_DIST 0x40000 +/* number of umc channel instance with memory map register access */ +#define UMC_V6_7_UMC_INSTANCE_NUM 4 +/* number of umc instance with memory map register access */ +#define UMC_V6_7_CHANNEL_INSTANCE_NUM 8 +/* total channel instances in one umc block */ +#define UMC_V6_7_TOTAL_CHANNEL_NUM (UMC_V6_7_CHANNEL_INSTANCE_NUM * UMC_V6_7_UMC_INSTANCE_NUM) +/* UMC regiser per channel offset */ +#define UMC_V6_7_PER_CHANNEL_OFFSET 0x400 extern const struct amdgpu_umc_ras_funcs umc_v6_7_ras_funcs; +extern const uint32_t + umc_v6_7_channel_idx_tbl_second[UMC_V6_7_UMC_INSTANCE_NUM][UMC_V6_7_CHANNEL_INSTANCE_NUM]; +extern const uint32_t + umc_v6_7_channel_idx_tbl_first[UMC_V6_7_UMC_INSTANCE_NUM][UMC_V6_7_CHANNEL_INSTANCE_NUM]; #endif diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v3_1.c b/drivers/gpu/drm/amd/amdgpu/uvd_v3_1.c index 6c0e91495365..7232241e3bfb 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v3_1.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v3_1.c @@ -698,6 +698,30 @@ static int uvd_v3_1_hw_fini(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; + /* + * Proper cleanups before halting the HW engine: + * - cancel the delayed idle work + * - enable powergating + * - enable clockgating + * - disable dpm + * + * TODO: to align with the VCN implementation, move the + * jobs for clockgating/powergating/dpm setting to + * ->set_powergating_state(). + */ + cancel_delayed_work_sync(&adev->uvd.idle_work); + + if (adev->pm.dpm_enabled) { + amdgpu_dpm_enable_uvd(adev, false); + } else { + amdgpu_asic_set_uvd_clocks(adev, 0, 0); + /* shutdown the UVD block */ + amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_UVD, + AMD_PG_STATE_GATE); + amdgpu_device_ip_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_UVD, + AMD_CG_STATE_GATE); + } + if (RREG32(mmUVD_STATUS) != 0) uvd_v3_1_stop(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c index a301518e4957..52d6de969f46 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c @@ -212,6 +212,30 @@ static int uvd_v4_2_hw_fini(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; + /* + * Proper cleanups before halting the HW engine: + * - cancel the delayed idle work + * - enable powergating + * - enable clockgating + * - disable dpm + * + * TODO: to align with the VCN implementation, move the + * jobs for clockgating/powergating/dpm setting to + * ->set_powergating_state(). + */ + cancel_delayed_work_sync(&adev->uvd.idle_work); + + if (adev->pm.dpm_enabled) { + amdgpu_dpm_enable_uvd(adev, false); + } else { + amdgpu_asic_set_uvd_clocks(adev, 0, 0); + /* shutdown the UVD block */ + amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_UVD, + AMD_PG_STATE_GATE); + amdgpu_device_ip_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_UVD, + AMD_CG_STATE_GATE); + } + if (RREG32(mmUVD_STATUS) != 0) uvd_v4_2_stop(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c index a4d5bd21c83c..db6d06758e4d 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c @@ -210,6 +210,30 @@ static int uvd_v5_0_hw_fini(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; + /* + * Proper cleanups before halting the HW engine: + * - cancel the delayed idle work + * - enable powergating + * - enable clockgating + * - disable dpm + * + * TODO: to align with the VCN implementation, move the + * jobs for clockgating/powergating/dpm setting to + * ->set_powergating_state(). + */ + cancel_delayed_work_sync(&adev->uvd.idle_work); + + if (adev->pm.dpm_enabled) { + amdgpu_dpm_enable_uvd(adev, false); + } else { + amdgpu_asic_set_uvd_clocks(adev, 0, 0); + /* shutdown the UVD block */ + amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_UVD, + AMD_PG_STATE_GATE); + amdgpu_device_ip_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_UVD, + AMD_CG_STATE_GATE); + } + if (RREG32(mmUVD_STATUS) != 0) uvd_v5_0_stop(adev); @@ -224,7 +248,6 @@ static int uvd_v5_0_suspend(void *handle) r = uvd_v5_0_hw_fini(adev); if (r) return r; - uvd_v5_0_set_clockgating_state(adev, AMD_CG_STATE_GATE); return amdgpu_uvd_suspend(adev); } diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c index cf3803f8f075..bc571833632e 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c @@ -543,6 +543,30 @@ static int uvd_v6_0_hw_fini(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; + /* + * Proper cleanups before halting the HW engine: + * - cancel the delayed idle work + * - enable powergating + * - enable clockgating + * - disable dpm + * + * TODO: to align with the VCN implementation, move the + * jobs for clockgating/powergating/dpm setting to + * ->set_powergating_state(). + */ + cancel_delayed_work_sync(&adev->uvd.idle_work); + + if (adev->pm.dpm_enabled) { + amdgpu_dpm_enable_uvd(adev, false); + } else { + amdgpu_asic_set_uvd_clocks(adev, 0, 0); + /* shutdown the UVD block */ + amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_UVD, + AMD_PG_STATE_GATE); + amdgpu_device_ip_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_UVD, + AMD_CG_STATE_GATE); + } + if (RREG32(mmUVD_STATUS) != 0) uvd_v6_0_stop(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c index 939bcfa2a4ec..b6e82d75561f 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c @@ -606,6 +606,30 @@ static int uvd_v7_0_hw_fini(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; + /* + * Proper cleanups before halting the HW engine: + * - cancel the delayed idle work + * - enable powergating + * - enable clockgating + * - disable dpm + * + * TODO: to align with the VCN implementation, move the + * jobs for clockgating/powergating/dpm setting to + * ->set_powergating_state(). + */ + cancel_delayed_work_sync(&adev->uvd.idle_work); + + if (adev->pm.dpm_enabled) { + amdgpu_dpm_enable_uvd(adev, false); + } else { + amdgpu_asic_set_uvd_clocks(adev, 0, 0); + /* shutdown the UVD block */ + amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_UVD, + AMD_PG_STATE_GATE); + amdgpu_device_ip_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_UVD, + AMD_CG_STATE_GATE); + } + if (!amdgpu_sriov_vf(adev)) uvd_v7_0_stop(adev); else { diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c index c7d28c169be5..b70c17f0c52e 100644 --- a/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c @@ -477,6 +477,31 @@ static int vce_v2_0_hw_init(void *handle) static int vce_v2_0_hw_fini(void *handle) { + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + /* + * Proper cleanups before halting the HW engine: + * - cancel the delayed idle work + * - enable powergating + * - enable clockgating + * - disable dpm + * + * TODO: to align with the VCN implementation, move the + * jobs for clockgating/powergating/dpm setting to + * ->set_powergating_state(). + */ + cancel_delayed_work_sync(&adev->vce.idle_work); + + if (adev->pm.dpm_enabled) { + amdgpu_dpm_enable_vce(adev, false); + } else { + amdgpu_asic_set_vce_clocks(adev, 0, 0); + amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCE, + AMD_PG_STATE_GATE); + amdgpu_device_ip_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_VCE, + AMD_CG_STATE_GATE); + } + return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c index 3b82fb289ef6..9de66893ccd6 100644 --- a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c @@ -490,6 +490,29 @@ static int vce_v3_0_hw_fini(void *handle) int r; struct amdgpu_device *adev = (struct amdgpu_device *)handle; + /* + * Proper cleanups before halting the HW engine: + * - cancel the delayed idle work + * - enable powergating + * - enable clockgating + * - disable dpm + * + * TODO: to align with the VCN implementation, move the + * jobs for clockgating/powergating/dpm setting to + * ->set_powergating_state(). + */ + cancel_delayed_work_sync(&adev->vce.idle_work); + + if (adev->pm.dpm_enabled) { + amdgpu_dpm_enable_vce(adev, false); + } else { + amdgpu_asic_set_vce_clocks(adev, 0, 0); + amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCE, + AMD_PG_STATE_GATE); + amdgpu_device_ip_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_VCE, + AMD_CG_STATE_GATE); + } + r = vce_v3_0_wait_for_idle(handle); if (r) return r; diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c index 90910d19db12..fec902b800c2 100644 --- a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c @@ -542,6 +542,29 @@ static int vce_v4_0_hw_fini(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; + /* + * Proper cleanups before halting the HW engine: + * - cancel the delayed idle work + * - enable powergating + * - enable clockgating + * - disable dpm + * + * TODO: to align with the VCN implementation, move the + * jobs for clockgating/powergating/dpm setting to + * ->set_powergating_state(). + */ + cancel_delayed_work_sync(&adev->vce.idle_work); + + if (adev->pm.dpm_enabled) { + amdgpu_dpm_enable_vce(adev, false); + } else { + amdgpu_asic_set_vce_clocks(adev, 0, 0); + amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCE, + AMD_PG_STATE_GATE); + amdgpu_device_ip_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_VCE, + AMD_CG_STATE_GATE); + } + if (!amdgpu_sriov_vf(adev)) { /* vce_v4_0_wait_for_idle(handle); */ vce_v4_0_stop(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c index 284bb42d6c86..121ee9f2b8d1 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c @@ -119,7 +119,7 @@ static int vcn_v1_0_sw_init(void *handle) adev->firmware.ucode[AMDGPU_UCODE_ID_VCN].fw = adev->vcn.fw; adev->firmware.fw_size += ALIGN(le32_to_cpu(hdr->ucode_size_bytes), PAGE_SIZE); - DRM_INFO("PSP loading VCN firmware\n"); + dev_info(adev->dev, "Will use PSP to load VCN firmware\n"); } r = amdgpu_vcn_resume(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c index 8af567c546db..f4686e918e0d 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c @@ -122,7 +122,7 @@ static int vcn_v2_0_sw_init(void *handle) adev->firmware.ucode[AMDGPU_UCODE_ID_VCN].fw = adev->vcn.fw; adev->firmware.fw_size += ALIGN(le32_to_cpu(hdr->ucode_size_bytes), PAGE_SIZE); - DRM_INFO("PSP loading VCN firmware\n"); + dev_info(adev->dev, "Will use PSP to load VCN firmware\n"); } r = amdgpu_vcn_resume(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c index 888b17d84691..e0c0c3734432 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c @@ -152,7 +152,7 @@ static int vcn_v2_5_sw_init(void *handle) adev->firmware.fw_size += ALIGN(le32_to_cpu(hdr->ucode_size_bytes), PAGE_SIZE); } - DRM_INFO("PSP loading VCN firmware\n"); + dev_info(adev->dev, "Will use PSP to load VCN firmware\n"); } r = amdgpu_vcn_resume(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c index 47d4f04cbd69..3d18aab88b4e 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c @@ -90,9 +90,7 @@ static int vcn_v3_0_early_init(void *handle) int i; if (amdgpu_sriov_vf(adev)) { - for (i = 0; i < VCN_INSTANCES_SIENNA_CICHLID; i++) - if (amdgpu_vcn_is_disabled_vcn(adev, VCN_DECODE_RING, i)) - adev->vcn.num_vcn_inst++; + adev->vcn.num_vcn_inst = VCN_INSTANCES_SIENNA_CICHLID; adev->vcn.harvest_config = 0; adev->vcn.num_enc_rings = 1; @@ -153,14 +151,13 @@ static int vcn_v3_0_sw_init(void *handle) adev->firmware.fw_size += ALIGN(le32_to_cpu(hdr->ucode_size_bytes), PAGE_SIZE); - if ((adev->vcn.num_vcn_inst == VCN_INSTANCES_SIENNA_CICHLID) || - (amdgpu_sriov_vf(adev) && adev->asic_type == CHIP_SIENNA_CICHLID)) { + if (adev->vcn.num_vcn_inst == VCN_INSTANCES_SIENNA_CICHLID) { adev->firmware.ucode[AMDGPU_UCODE_ID_VCN1].ucode_id = AMDGPU_UCODE_ID_VCN1; adev->firmware.ucode[AMDGPU_UCODE_ID_VCN1].fw = adev->vcn.fw; adev->firmware.fw_size += ALIGN(le32_to_cpu(hdr->ucode_size_bytes), PAGE_SIZE); } - DRM_INFO("PSP loading VCN firmware\n"); + dev_info(adev->dev, "Will use PSP to load VCN firmware\n"); } r = amdgpu_vcn_resume(adev); @@ -328,18 +325,28 @@ static int vcn_v3_0_hw_init(void *handle) continue; ring = &adev->vcn.inst[i].ring_dec; - ring->wptr = 0; - ring->wptr_old = 0; - vcn_v3_0_dec_ring_set_wptr(ring); - ring->sched.ready = true; - - for (j = 0; j < adev->vcn.num_enc_rings; ++j) { - ring = &adev->vcn.inst[i].ring_enc[j]; + if (amdgpu_vcn_is_disabled_vcn(adev, VCN_DECODE_RING, i)) { + ring->sched.ready = false; + dev_info(adev->dev, "ring %s is disabled by hypervisor\n", ring->name); + } else { ring->wptr = 0; ring->wptr_old = 0; - vcn_v3_0_enc_ring_set_wptr(ring); + vcn_v3_0_dec_ring_set_wptr(ring); ring->sched.ready = true; } + + for (j = 0; j < adev->vcn.num_enc_rings; ++j) { + ring = &adev->vcn.inst[i].ring_enc[j]; + if (amdgpu_vcn_is_disabled_vcn(adev, VCN_ENCODE_RING, i)) { + ring->sched.ready = false; + dev_info(adev->dev, "ring %s is disabled by hypervisor\n", ring->name); + } else { + ring->wptr = 0; + ring->wptr_old = 0; + vcn_v3_0_enc_ring_set_wptr(ring); + ring->sched.ready = true; + } + } } } else { for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c index 3d21c0799037..42a35d9520f9 100644 --- a/drivers/gpu/drm/amd/amdgpu/vi.c +++ b/drivers/gpu/drm/amd/amdgpu/vi.c @@ -77,7 +77,7 @@ #if defined(CONFIG_DRM_AMD_ACP) #include "amdgpu_acp.h" #endif -#include "dce_virtual.h" +#include "amdgpu_vkms.h" #include "mxgpu_vi.h" #include "amdgpu_dm.h" @@ -904,7 +904,14 @@ static bool vi_asic_supports_baco(struct amdgpu_device *adev) case CHIP_POLARIS11: case CHIP_POLARIS12: case CHIP_TOPAZ: - return amdgpu_dpm_is_baco_supported(adev); + /* Disable BACO support for the specific polaris12 SKU temporarily */ + if ((adev->pdev->device == 0x699F) && + (adev->pdev->revision == 0xC7) && + (adev->pdev->subsystem_vendor == 0x1028) && + (adev->pdev->subsystem_device == 0x0039)) + return false; + else + return amdgpu_dpm_is_baco_supported(adev); default: return false; } @@ -2102,7 +2109,7 @@ int vi_set_ip_blocks(struct amdgpu_device *adev) amdgpu_device_ip_block_add(adev, &sdma_v2_4_ip_block); amdgpu_device_ip_block_add(adev, &pp_smu_ip_block); if (adev->enable_virtual_display) - amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block); + amdgpu_device_ip_block_add(adev, &amdgpu_vkms_ip_block); break; case CHIP_FIJI: amdgpu_device_ip_block_add(adev, &vi_common_ip_block); @@ -2112,7 +2119,7 @@ int vi_set_ip_blocks(struct amdgpu_device *adev) amdgpu_device_ip_block_add(adev, &sdma_v3_0_ip_block); amdgpu_device_ip_block_add(adev, &pp_smu_ip_block); if (adev->enable_virtual_display || amdgpu_sriov_vf(adev)) - amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block); + amdgpu_device_ip_block_add(adev, &amdgpu_vkms_ip_block); #if defined(CONFIG_DRM_AMD_DC) else if (amdgpu_device_has_dc_support(adev)) amdgpu_device_ip_block_add(adev, &dm_ip_block); @@ -2132,7 +2139,7 @@ int vi_set_ip_blocks(struct amdgpu_device *adev) amdgpu_device_ip_block_add(adev, &sdma_v3_0_ip_block); amdgpu_device_ip_block_add(adev, &pp_smu_ip_block); if (adev->enable_virtual_display || amdgpu_sriov_vf(adev)) - amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block); + amdgpu_device_ip_block_add(adev, &amdgpu_vkms_ip_block); #if defined(CONFIG_DRM_AMD_DC) else if (amdgpu_device_has_dc_support(adev)) amdgpu_device_ip_block_add(adev, &dm_ip_block); @@ -2155,7 +2162,7 @@ int vi_set_ip_blocks(struct amdgpu_device *adev) amdgpu_device_ip_block_add(adev, &sdma_v3_1_ip_block); amdgpu_device_ip_block_add(adev, &pp_smu_ip_block); if (adev->enable_virtual_display) - amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block); + amdgpu_device_ip_block_add(adev, &amdgpu_vkms_ip_block); #if defined(CONFIG_DRM_AMD_DC) else if (amdgpu_device_has_dc_support(adev)) amdgpu_device_ip_block_add(adev, &dm_ip_block); @@ -2173,7 +2180,7 @@ int vi_set_ip_blocks(struct amdgpu_device *adev) amdgpu_device_ip_block_add(adev, &sdma_v3_0_ip_block); amdgpu_device_ip_block_add(adev, &pp_smu_ip_block); if (adev->enable_virtual_display) - amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block); + amdgpu_device_ip_block_add(adev, &amdgpu_vkms_ip_block); #if defined(CONFIG_DRM_AMD_DC) else if (amdgpu_device_has_dc_support(adev)) amdgpu_device_ip_block_add(adev, &dm_ip_block); @@ -2194,7 +2201,7 @@ int vi_set_ip_blocks(struct amdgpu_device *adev) amdgpu_device_ip_block_add(adev, &sdma_v3_0_ip_block); amdgpu_device_ip_block_add(adev, &pp_smu_ip_block); if (adev->enable_virtual_display) - amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block); + amdgpu_device_ip_block_add(adev, &amdgpu_vkms_ip_block); #if defined(CONFIG_DRM_AMD_DC) else if (amdgpu_device_has_dc_support(adev)) amdgpu_device_ip_block_add(adev, &dm_ip_block); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index e48acdd03c1a..86afd37b098d 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -1393,6 +1393,7 @@ static int kfd_ioctl_map_memory_to_gpu(struct file *filep, long err = 0; int i; uint32_t *devices_arr = NULL; + bool table_freed = false; dev = kfd_device_by_id(GET_GPU_ID(args->handle)); if (!dev) @@ -1450,7 +1451,8 @@ static int kfd_ioctl_map_memory_to_gpu(struct file *filep, goto get_mem_obj_from_handle_failed; } err = amdgpu_amdkfd_gpuvm_map_memory_to_gpu( - peer->kgd, (struct kgd_mem *)mem, peer_pdd->drm_priv); + peer->kgd, (struct kgd_mem *)mem, + peer_pdd->drm_priv, &table_freed); if (err) { pr_err("Failed to map to gpu %d/%d\n", i, args->n_devices); @@ -1468,16 +1470,17 @@ static int kfd_ioctl_map_memory_to_gpu(struct file *filep, } /* Flush TLBs after waiting for the page table updates to complete */ - for (i = 0; i < args->n_devices; i++) { - peer = kfd_device_by_id(devices_arr[i]); - if (WARN_ON_ONCE(!peer)) - continue; - peer_pdd = kfd_get_process_device_data(peer, p); - if (WARN_ON_ONCE(!peer_pdd)) - continue; - kfd_flush_tlb(peer_pdd, TLB_FLUSH_LEGACY); + if (table_freed) { + for (i = 0; i < args->n_devices; i++) { + peer = kfd_device_by_id(devices_arr[i]); + if (WARN_ON_ONCE(!peer)) + continue; + peer_pdd = kfd_get_process_device_data(peer, p); + if (WARN_ON_ONCE(!peer_pdd)) + continue; + kfd_flush_tlb(peer_pdd, TLB_FLUSH_LEGACY); + } } - kfree(devices_arr); return err; @@ -1565,10 +1568,29 @@ static int kfd_ioctl_unmap_memory_from_gpu(struct file *filep, } args->n_success = i+1; } - kfree(devices_arr); - mutex_unlock(&p->mutex); + if (dev->device_info->asic_family == CHIP_ALDEBARAN) { + err = amdgpu_amdkfd_gpuvm_sync_memory(dev->kgd, + (struct kgd_mem *) mem, true); + if (err) { + pr_debug("Sync memory failed, wait interrupted by user signal\n"); + goto sync_memory_failed; + } + + /* Flush TLBs after waiting for the page table updates to complete */ + for (i = 0; i < args->n_devices; i++) { + peer = kfd_device_by_id(devices_arr[i]); + if (WARN_ON_ONCE(!peer)) + continue; + peer_pdd = kfd_get_process_device_data(peer, p); + if (WARN_ON_ONCE(!peer_pdd)) + continue; + kfd_flush_tlb(peer_pdd, TLB_FLUSH_HEAVYWEIGHT); + } + } + kfree(devices_arr); + return 0; bind_process_to_device_failed: @@ -1576,6 +1598,7 @@ get_mem_obj_from_handle_failed: unmap_memory_from_gpu_failed: mutex_unlock(&p->mutex); copy_from_user_failed: +sync_memory_failed: kfree(devices_arr); return err; } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c index c6b02aee4993..cfedfb1e8596 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c @@ -1404,6 +1404,7 @@ static int kfd_fill_gpu_cache_info(struct kfd_dev *kdev, break; case CHIP_NAVI10: case CHIP_NAVI12: + case CHIP_CYAN_SKILLFISH: pcache_info = navi10_cache_info; num_of_cache_types = ARRAY_SIZE(navi10_cache_info); break; @@ -1989,8 +1990,19 @@ static int kfd_fill_gpu_direct_io_link_to_cpu(int *avail_size, sub_type_hdr->flags |= CRAT_IOLINK_FLAGS_BI_DIRECTIONAL; sub_type_hdr->io_interface_type = CRAT_IOLINK_TYPE_XGMI; sub_type_hdr->num_hops_xgmi = 1; + if (adev->asic_type == CHIP_ALDEBARAN) { + sub_type_hdr->minimum_bandwidth_mbs = + amdgpu_amdkfd_get_xgmi_bandwidth_mbytes( + kdev->kgd, NULL, true); + sub_type_hdr->maximum_bandwidth_mbs = + sub_type_hdr->minimum_bandwidth_mbs; + } } else { sub_type_hdr->io_interface_type = CRAT_IOLINK_TYPE_PCIEXPRESS; + sub_type_hdr->minimum_bandwidth_mbs = + amdgpu_amdkfd_get_pcie_bandwidth_mbytes(kdev->kgd, true); + sub_type_hdr->maximum_bandwidth_mbs = + amdgpu_amdkfd_get_pcie_bandwidth_mbytes(kdev->kgd, false); } sub_type_hdr->proximity_domain_from = proximity_domain; @@ -2033,6 +2045,11 @@ static int kfd_fill_gpu_xgmi_link_to_gpu(int *avail_size, sub_type_hdr->proximity_domain_to = proximity_domain_to; sub_type_hdr->num_hops_xgmi = amdgpu_amdkfd_get_xgmi_hops_count(kdev->kgd, peer_kdev->kgd); + sub_type_hdr->maximum_bandwidth_mbs = + amdgpu_amdkfd_get_xgmi_bandwidth_mbytes(kdev->kgd, peer_kdev->kgd, false); + sub_type_hdr->minimum_bandwidth_mbs = sub_type_hdr->maximum_bandwidth_mbs ? + amdgpu_amdkfd_get_xgmi_bandwidth_mbytes(kdev->kgd, NULL, true) : 0; + return 0; } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index 6b57dfd2cd2a..16a57b70cc1a 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c @@ -84,12 +84,14 @@ static const struct kfd2kgd_calls *kfd2kgd_funcs[] = { [CHIP_DIMGREY_CAVEFISH] = &gfx_v10_3_kfd2kgd, [CHIP_BEIGE_GOBY] = &gfx_v10_3_kfd2kgd, [CHIP_YELLOW_CARP] = &gfx_v10_3_kfd2kgd, + [CHIP_CYAN_SKILLFISH] = &gfx_v10_kfd2kgd, }; #ifdef KFD_SUPPORT_IOMMU_V2 static const struct kfd_device_info kaveri_device_info = { .asic_family = CHIP_KAVERI, .asic_name = "kaveri", + .gfx_target_version = 70000, .max_pasid_bits = 16, /* max num of queues for KV.TODO should be a dynamic value */ .max_no_of_hqd = 24, @@ -109,6 +111,7 @@ static const struct kfd_device_info kaveri_device_info = { static const struct kfd_device_info carrizo_device_info = { .asic_family = CHIP_CARRIZO, .asic_name = "carrizo", + .gfx_target_version = 80001, .max_pasid_bits = 16, /* max num of queues for CZ.TODO should be a dynamic value */ .max_no_of_hqd = 24, @@ -129,6 +132,7 @@ static const struct kfd_device_info carrizo_device_info = { static const struct kfd_device_info raven_device_info = { .asic_family = CHIP_RAVEN, .asic_name = "raven", + .gfx_target_version = 90002, .max_pasid_bits = 16, .max_no_of_hqd = 24, .doorbell_size = 8, @@ -147,6 +151,7 @@ static const struct kfd_device_info raven_device_info = { static const struct kfd_device_info hawaii_device_info = { .asic_family = CHIP_HAWAII, .asic_name = "hawaii", + .gfx_target_version = 70001, .max_pasid_bits = 16, /* max num of queues for KV.TODO should be a dynamic value */ .max_no_of_hqd = 24, @@ -166,6 +171,7 @@ static const struct kfd_device_info hawaii_device_info = { static const struct kfd_device_info tonga_device_info = { .asic_family = CHIP_TONGA, .asic_name = "tonga", + .gfx_target_version = 80002, .max_pasid_bits = 16, .max_no_of_hqd = 24, .doorbell_size = 4, @@ -184,6 +190,7 @@ static const struct kfd_device_info tonga_device_info = { static const struct kfd_device_info fiji_device_info = { .asic_family = CHIP_FIJI, .asic_name = "fiji", + .gfx_target_version = 80003, .max_pasid_bits = 16, .max_no_of_hqd = 24, .doorbell_size = 4, @@ -202,6 +209,7 @@ static const struct kfd_device_info fiji_device_info = { static const struct kfd_device_info fiji_vf_device_info = { .asic_family = CHIP_FIJI, .asic_name = "fiji", + .gfx_target_version = 80003, .max_pasid_bits = 16, .max_no_of_hqd = 24, .doorbell_size = 4, @@ -221,6 +229,7 @@ static const struct kfd_device_info fiji_vf_device_info = { static const struct kfd_device_info polaris10_device_info = { .asic_family = CHIP_POLARIS10, .asic_name = "polaris10", + .gfx_target_version = 80003, .max_pasid_bits = 16, .max_no_of_hqd = 24, .doorbell_size = 4, @@ -239,6 +248,7 @@ static const struct kfd_device_info polaris10_device_info = { static const struct kfd_device_info polaris10_vf_device_info = { .asic_family = CHIP_POLARIS10, .asic_name = "polaris10", + .gfx_target_version = 80003, .max_pasid_bits = 16, .max_no_of_hqd = 24, .doorbell_size = 4, @@ -257,6 +267,7 @@ static const struct kfd_device_info polaris10_vf_device_info = { static const struct kfd_device_info polaris11_device_info = { .asic_family = CHIP_POLARIS11, .asic_name = "polaris11", + .gfx_target_version = 80003, .max_pasid_bits = 16, .max_no_of_hqd = 24, .doorbell_size = 4, @@ -275,6 +286,7 @@ static const struct kfd_device_info polaris11_device_info = { static const struct kfd_device_info polaris12_device_info = { .asic_family = CHIP_POLARIS12, .asic_name = "polaris12", + .gfx_target_version = 80003, .max_pasid_bits = 16, .max_no_of_hqd = 24, .doorbell_size = 4, @@ -293,6 +305,7 @@ static const struct kfd_device_info polaris12_device_info = { static const struct kfd_device_info vegam_device_info = { .asic_family = CHIP_VEGAM, .asic_name = "vegam", + .gfx_target_version = 80003, .max_pasid_bits = 16, .max_no_of_hqd = 24, .doorbell_size = 4, @@ -311,6 +324,7 @@ static const struct kfd_device_info vegam_device_info = { static const struct kfd_device_info vega10_device_info = { .asic_family = CHIP_VEGA10, .asic_name = "vega10", + .gfx_target_version = 90000, .max_pasid_bits = 16, .max_no_of_hqd = 24, .doorbell_size = 8, @@ -329,6 +343,7 @@ static const struct kfd_device_info vega10_device_info = { static const struct kfd_device_info vega10_vf_device_info = { .asic_family = CHIP_VEGA10, .asic_name = "vega10", + .gfx_target_version = 90000, .max_pasid_bits = 16, .max_no_of_hqd = 24, .doorbell_size = 8, @@ -347,6 +362,7 @@ static const struct kfd_device_info vega10_vf_device_info = { static const struct kfd_device_info vega12_device_info = { .asic_family = CHIP_VEGA12, .asic_name = "vega12", + .gfx_target_version = 90004, .max_pasid_bits = 16, .max_no_of_hqd = 24, .doorbell_size = 8, @@ -365,6 +381,7 @@ static const struct kfd_device_info vega12_device_info = { static const struct kfd_device_info vega20_device_info = { .asic_family = CHIP_VEGA20, .asic_name = "vega20", + .gfx_target_version = 90006, .max_pasid_bits = 16, .max_no_of_hqd = 24, .doorbell_size = 8, @@ -383,6 +400,7 @@ static const struct kfd_device_info vega20_device_info = { static const struct kfd_device_info arcturus_device_info = { .asic_family = CHIP_ARCTURUS, .asic_name = "arcturus", + .gfx_target_version = 90008, .max_pasid_bits = 16, .max_no_of_hqd = 24, .doorbell_size = 8, @@ -401,6 +419,7 @@ static const struct kfd_device_info arcturus_device_info = { static const struct kfd_device_info aldebaran_device_info = { .asic_family = CHIP_ALDEBARAN, .asic_name = "aldebaran", + .gfx_target_version = 90010, .max_pasid_bits = 16, .max_no_of_hqd = 24, .doorbell_size = 8, @@ -419,6 +438,7 @@ static const struct kfd_device_info aldebaran_device_info = { static const struct kfd_device_info renoir_device_info = { .asic_family = CHIP_RENOIR, .asic_name = "renoir", + .gfx_target_version = 90002, .max_pasid_bits = 16, .max_no_of_hqd = 24, .doorbell_size = 8, @@ -437,6 +457,7 @@ static const struct kfd_device_info renoir_device_info = { static const struct kfd_device_info navi10_device_info = { .asic_family = CHIP_NAVI10, .asic_name = "navi10", + .gfx_target_version = 100100, .max_pasid_bits = 16, .max_no_of_hqd = 24, .doorbell_size = 8, @@ -455,6 +476,7 @@ static const struct kfd_device_info navi10_device_info = { static const struct kfd_device_info navi12_device_info = { .asic_family = CHIP_NAVI12, .asic_name = "navi12", + .gfx_target_version = 100101, .max_pasid_bits = 16, .max_no_of_hqd = 24, .doorbell_size = 8, @@ -473,6 +495,7 @@ static const struct kfd_device_info navi12_device_info = { static const struct kfd_device_info navi14_device_info = { .asic_family = CHIP_NAVI14, .asic_name = "navi14", + .gfx_target_version = 100102, .max_pasid_bits = 16, .max_no_of_hqd = 24, .doorbell_size = 8, @@ -491,6 +514,7 @@ static const struct kfd_device_info navi14_device_info = { static const struct kfd_device_info sienna_cichlid_device_info = { .asic_family = CHIP_SIENNA_CICHLID, .asic_name = "sienna_cichlid", + .gfx_target_version = 100300, .max_pasid_bits = 16, .max_no_of_hqd = 24, .doorbell_size = 8, @@ -509,6 +533,7 @@ static const struct kfd_device_info sienna_cichlid_device_info = { static const struct kfd_device_info navy_flounder_device_info = { .asic_family = CHIP_NAVY_FLOUNDER, .asic_name = "navy_flounder", + .gfx_target_version = 100301, .max_pasid_bits = 16, .max_no_of_hqd = 24, .doorbell_size = 8, @@ -527,6 +552,7 @@ static const struct kfd_device_info navy_flounder_device_info = { static const struct kfd_device_info vangogh_device_info = { .asic_family = CHIP_VANGOGH, .asic_name = "vangogh", + .gfx_target_version = 100303, .max_pasid_bits = 16, .max_no_of_hqd = 24, .doorbell_size = 8, @@ -545,6 +571,7 @@ static const struct kfd_device_info vangogh_device_info = { static const struct kfd_device_info dimgrey_cavefish_device_info = { .asic_family = CHIP_DIMGREY_CAVEFISH, .asic_name = "dimgrey_cavefish", + .gfx_target_version = 100302, .max_pasid_bits = 16, .max_no_of_hqd = 24, .doorbell_size = 8, @@ -563,6 +590,7 @@ static const struct kfd_device_info dimgrey_cavefish_device_info = { static const struct kfd_device_info beige_goby_device_info = { .asic_family = CHIP_BEIGE_GOBY, .asic_name = "beige_goby", + .gfx_target_version = 100304, .max_pasid_bits = 16, .max_no_of_hqd = 24, .doorbell_size = 8, @@ -581,6 +609,7 @@ static const struct kfd_device_info beige_goby_device_info = { static const struct kfd_device_info yellow_carp_device_info = { .asic_family = CHIP_YELLOW_CARP, .asic_name = "yellow_carp", + .gfx_target_version = 100305, .max_pasid_bits = 16, .max_no_of_hqd = 24, .doorbell_size = 8, @@ -596,6 +625,25 @@ static const struct kfd_device_info yellow_carp_device_info = { .num_sdma_queues_per_engine = 2, }; +static const struct kfd_device_info cyan_skillfish_device_info = { + .asic_family = CHIP_CYAN_SKILLFISH, + .asic_name = "cyan_skillfish", + .gfx_target_version = 100103, + .max_pasid_bits = 16, + .max_no_of_hqd = 24, + .doorbell_size = 8, + .ih_ring_entry_size = 8 * sizeof(uint32_t), + .event_interrupt_class = &event_interrupt_class_v9, + .num_of_watch_points = 4, + .mqd_size_aligned = MQD_SIZE_ALIGNED, + .needs_iommu_device = false, + .supports_cwsr = true, + .needs_pci_atomics = true, + .num_sdma_engines = 2, + .num_xgmi_sdma_engines = 0, + .num_sdma_queues_per_engine = 8, +}; + /* For each entry, [0] is regular and [1] is virtualisation device. */ static const struct kfd_device_info *kfd_supported_devices[][2] = { #ifdef KFD_SUPPORT_IOMMU_V2 @@ -625,6 +673,7 @@ static const struct kfd_device_info *kfd_supported_devices[][2] = { [CHIP_DIMGREY_CAVEFISH] = {&dimgrey_cavefish_device_info, &dimgrey_cavefish_device_info}, [CHIP_BEIGE_GOBY] = {&beige_goby_device_info, &beige_goby_device_info}, [CHIP_YELLOW_CARP] = {&yellow_carp_device_info, NULL}, + [CHIP_CYAN_SKILLFISH] = {&cyan_skillfish_device_info, NULL}, }; static int kfd_gtt_sa_init(struct kfd_dev *kfd, unsigned int buf_size, @@ -1369,7 +1418,7 @@ void kfd_dec_compute_active(struct kfd_dev *kfd) WARN_ONCE(count < 0, "Compute profile ref. count error"); } -void kgd2kfd_smi_event_throttle(struct kfd_dev *kfd, uint32_t throttle_bitmask) +void kgd2kfd_smi_event_throttle(struct kfd_dev *kfd, uint64_t throttle_bitmask) { if (kfd && kfd->init_complete) kfd_smi_event_update_thermal_throttling(kfd, throttle_bitmask); @@ -1382,18 +1431,12 @@ void kgd2kfd_smi_event_throttle(struct kfd_dev *kfd, uint32_t throttle_bitmask) */ int kfd_debugfs_hang_hws(struct kfd_dev *dev) { - int r = 0; - if (dev->dqm->sched_policy != KFD_SCHED_POLICY_HWS) { pr_err("HWS is not enabled"); return -EINVAL; } - r = pm_debugfs_hang_hws(&dev->dqm->packets); - if (!r) - r = dqm_debugfs_execute_queues(dev->dqm); - - return r; + return dqm_debugfs_hang_hws(dev->dqm); } #endif diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index 16a1713808c2..f8fce9d05f50 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -211,6 +211,15 @@ static void deallocate_doorbell(struct qcm_process_device *qpd, WARN_ON(!old); } +static void program_trap_handler_settings(struct device_queue_manager *dqm, + struct qcm_process_device *qpd) +{ + if (dqm->dev->kfd2kgd->program_trap_handler_settings) + dqm->dev->kfd2kgd->program_trap_handler_settings( + dqm->dev->kgd, qpd->vmid, + qpd->tba_addr, qpd->tma_addr); +} + static int allocate_vmid(struct device_queue_manager *dqm, struct qcm_process_device *qpd, struct queue *q) @@ -241,6 +250,10 @@ static int allocate_vmid(struct device_queue_manager *dqm, program_sh_mem_settings(dqm, qpd); + if (dqm->dev->device_info->asic_family >= CHIP_VEGA10 && + dqm->dev->cwsr_enabled) + program_trap_handler_settings(dqm, qpd); + /* qpd->page_table_base is set earlier when register_process() * is called, i.e. when the first queue is created. */ @@ -260,7 +273,7 @@ static int allocate_vmid(struct device_queue_manager *dqm, static int flush_texture_cache_nocpsch(struct kfd_dev *kdev, struct qcm_process_device *qpd) { - const struct packet_manager_funcs *pmf = qpd->dqm->packets.pmf; + const struct packet_manager_funcs *pmf = qpd->dqm->packet_mgr.pmf; int ret; if (!qpd->ib_kaddr) @@ -582,7 +595,9 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q) } retval = mqd_mgr->destroy_mqd(mqd_mgr, q->mqd, - KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN, + (dqm->dev->cwsr_enabled? + KFD_PREEMPT_TYPE_WAVEFRONT_SAVE: + KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN), KFD_UNMAP_LATENCY_MS, q->pipe, q->queue); if (retval) { pr_err("destroy mqd failed\n"); @@ -675,7 +690,9 @@ static int evict_process_queues_nocpsch(struct device_queue_manager *dqm, continue; retval = mqd_mgr->destroy_mqd(mqd_mgr, q->mqd, - KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN, + (dqm->dev->cwsr_enabled? + KFD_PREEMPT_TYPE_WAVEFRONT_SAVE: + KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN), KFD_UNMAP_LATENCY_MS, q->pipe, q->queue); if (retval && !ret) /* Return the first error, but keep going to @@ -1000,7 +1017,7 @@ static int start_nocpsch(struct device_queue_manager *dqm) init_interrupts(dqm); if (dqm->dev->device_info->asic_family == CHIP_HAWAII) - return pm_init(&dqm->packets, dqm); + return pm_init(&dqm->packet_mgr, dqm); dqm->sched_running = true; return 0; @@ -1009,7 +1026,7 @@ static int start_nocpsch(struct device_queue_manager *dqm) static int stop_nocpsch(struct device_queue_manager *dqm) { if (dqm->dev->device_info->asic_family == CHIP_HAWAII) - pm_uninit(&dqm->packets, false); + pm_uninit(&dqm->packet_mgr, false); dqm->sched_running = false; return 0; @@ -1124,7 +1141,7 @@ static int set_sched_resources(struct device_queue_manager *dqm) "queue mask: 0x%8llX\n", res.vmid_mask, res.queue_mask); - return pm_send_set_resources(&dqm->packets, &res); + return pm_send_set_resources(&dqm->packet_mgr, &res); } static int initialize_cpsch(struct device_queue_manager *dqm) @@ -1164,7 +1181,8 @@ static int start_cpsch(struct device_queue_manager *dqm) retval = 0; - retval = pm_init(&dqm->packets, dqm); + dqm_lock(dqm); + retval = pm_init(&dqm->packet_mgr, dqm); if (retval) goto fail_packet_manager_init; @@ -1186,7 +1204,6 @@ static int start_cpsch(struct device_queue_manager *dqm) init_interrupts(dqm); - dqm_lock(dqm); /* clear hang status when driver try to start the hw scheduler */ dqm->is_hws_hang = false; dqm->is_resetting = false; @@ -1197,8 +1214,9 @@ static int start_cpsch(struct device_queue_manager *dqm) return 0; fail_allocate_vidmem: fail_set_sched_resources: - pm_uninit(&dqm->packets, false); + pm_uninit(&dqm->packet_mgr, false); fail_packet_manager_init: + dqm_unlock(dqm); return retval; } @@ -1211,12 +1229,12 @@ static int stop_cpsch(struct device_queue_manager *dqm) unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0); hanging = dqm->is_hws_hang || dqm->is_resetting; dqm->sched_running = false; - dqm_unlock(dqm); - pm_release_ib(&dqm->packets); + pm_release_ib(&dqm->packet_mgr); kfd_gtt_sa_free(dqm->dev, dqm->fence_mem); - pm_uninit(&dqm->packets, hanging); + pm_uninit(&dqm->packet_mgr, hanging); + dqm_unlock(dqm); return 0; } @@ -1390,7 +1408,7 @@ static int map_queues_cpsch(struct device_queue_manager *dqm) if (dqm->active_runlist) return 0; - retval = pm_send_runlist(&dqm->packets, &dqm->queues); + retval = pm_send_runlist(&dqm->packet_mgr, &dqm->queues); pr_debug("%s sent runlist\n", __func__); if (retval) { pr_err("failed to execute runlist\n"); @@ -1416,13 +1434,13 @@ static int unmap_queues_cpsch(struct device_queue_manager *dqm, if (!dqm->active_runlist) return retval; - retval = pm_send_unmap_queue(&dqm->packets, KFD_QUEUE_TYPE_COMPUTE, + retval = pm_send_unmap_queue(&dqm->packet_mgr, KFD_QUEUE_TYPE_COMPUTE, filter, filter_param, false, 0); if (retval) return retval; *dqm->fence_addr = KFD_FENCE_INIT; - pm_send_query_status(&dqm->packets, dqm->fence_gpu_addr, + pm_send_query_status(&dqm->packet_mgr, dqm->fence_gpu_addr, KFD_FENCE_COMPLETED); /* should be timed out */ retval = amdkfd_fence_wait_timeout(dqm->fence_addr, KFD_FENCE_COMPLETED, @@ -1448,14 +1466,14 @@ static int unmap_queues_cpsch(struct device_queue_manager *dqm, * check those fields */ mqd_mgr = dqm->mqd_mgrs[KFD_MQD_TYPE_HIQ]; - if (mqd_mgr->read_doorbell_id(dqm->packets.priv_queue->queue->mqd)) { + if (mqd_mgr->read_doorbell_id(dqm->packet_mgr.priv_queue->queue->mqd)) { pr_err("HIQ MQD's queue_doorbell_id0 is not 0, Queue preemption time out\n"); while (halt_if_hws_hang) schedule(); return -ETIME; } - pm_release_ib(&dqm->packets); + pm_release_ib(&dqm->packet_mgr); dqm->active_runlist = false; return retval; @@ -1946,6 +1964,7 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev) case CHIP_DIMGREY_CAVEFISH: case CHIP_BEIGE_GOBY: case CHIP_YELLOW_CARP: + case CHIP_CYAN_SKILLFISH: device_queue_manager_init_v10_navi10(&dqm->asic_ops); break; default: @@ -2099,11 +2118,16 @@ int dqm_debugfs_hqds(struct seq_file *m, void *data) return r; } -int dqm_debugfs_execute_queues(struct device_queue_manager *dqm) +int dqm_debugfs_hang_hws(struct device_queue_manager *dqm) { int r = 0; dqm_lock(dqm); + r = pm_debugfs_hang_hws(&dqm->packet_mgr); + if (r) { + dqm_unlock(dqm); + return r; + } dqm->active_runlist = true; r = execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0); dqm_unlock(dqm); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h index 71e2fde56b2b..c8719682c4da 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h @@ -169,7 +169,7 @@ struct device_queue_manager { struct device_queue_manager_asic_ops asic_ops; struct mqd_manager *mqd_mgrs[KFD_MQD_TYPE_MAX]; - struct packet_manager packets; + struct packet_manager packet_mgr; struct kfd_dev *dev; struct mutex lock_hidden; /* use dqm_lock/unlock(dqm) */ struct list_head queues; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c index a9b329f0f862..2e86692def19 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c @@ -422,6 +422,7 @@ int kfd_init_apertures(struct kfd_process *process) case CHIP_DIMGREY_CAVEFISH: case CHIP_BEIGE_GOBY: case CHIP_YELLOW_CARP: + case CHIP_CYAN_SKILLFISH: kfd_init_apertures_v9(pdd, id); break; default: diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c index 88813dad731f..c021519af810 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c @@ -98,36 +98,78 @@ void mqd_symmetrically_map_cu_mask(struct mqd_manager *mm, uint32_t *se_mask) { struct kfd_cu_info cu_info; - uint32_t cu_per_se[KFD_MAX_NUM_SE] = {0}; - int i, se, sh, cu = 0; - + uint32_t cu_per_sh[KFD_MAX_NUM_SE][KFD_MAX_NUM_SH_PER_SE] = {0}; + int i, se, sh, cu; amdgpu_amdkfd_get_cu_info(mm->dev->kgd, &cu_info); if (cu_mask_count > cu_info.cu_active_number) cu_mask_count = cu_info.cu_active_number; + /* Exceeding these bounds corrupts the stack and indicates a coding error. + * Returning with no CU's enabled will hang the queue, which should be + * attention grabbing. + */ + if (cu_info.num_shader_engines > KFD_MAX_NUM_SE) { + pr_err("Exceeded KFD_MAX_NUM_SE, chip reports %d\n", cu_info.num_shader_engines); + return; + } + if (cu_info.num_shader_arrays_per_engine > KFD_MAX_NUM_SH_PER_SE) { + pr_err("Exceeded KFD_MAX_NUM_SH, chip reports %d\n", + cu_info.num_shader_arrays_per_engine * cu_info.num_shader_engines); + return; + } + /* Count active CUs per SH. + * + * Some CUs in an SH may be disabled. HW expects disabled CUs to be + * represented in the high bits of each SH's enable mask (the upper and lower + * 16 bits of se_mask) and will take care of the actual distribution of + * disabled CUs within each SH automatically. + * Each half of se_mask must be filled only on bits 0-cu_per_sh[se][sh]-1. + * + * See note on Arcturus cu_bitmap layout in gfx_v9_0_get_cu_info. + */ for (se = 0; se < cu_info.num_shader_engines; se++) for (sh = 0; sh < cu_info.num_shader_arrays_per_engine; sh++) - cu_per_se[se] += hweight32(cu_info.cu_bitmap[se % 4][sh + (se / 4)]); - - /* Symmetrically map cu_mask to all SEs: - * cu_mask[0] bit0 -> se_mask[0] bit0; - * cu_mask[0] bit1 -> se_mask[1] bit0; - * ... (if # SE is 4) - * cu_mask[0] bit4 -> se_mask[0] bit1; + cu_per_sh[se][sh] = hweight32(cu_info.cu_bitmap[se % 4][sh + (se / 4)]); + + /* Symmetrically map cu_mask to all SEs & SHs: + * se_mask programs up to 2 SH in the upper and lower 16 bits. + * + * Examples + * Assuming 1 SH/SE, 4 SEs: + * cu_mask[0] bit0 -> se_mask[0] bit0 + * cu_mask[0] bit1 -> se_mask[1] bit0 + * ... + * cu_mask[0] bit4 -> se_mask[0] bit1 + * ... + * + * Assuming 2 SH/SE, 4 SEs + * cu_mask[0] bit0 -> se_mask[0] bit0 (SE0,SH0,CU0) + * cu_mask[0] bit1 -> se_mask[1] bit0 (SE1,SH0,CU0) + * ... + * cu_mask[0] bit4 -> se_mask[0] bit16 (SE0,SH1,CU0) + * cu_mask[0] bit5 -> se_mask[1] bit16 (SE1,SH1,CU0) + * ... + * cu_mask[0] bit8 -> se_mask[0] bit1 (SE0,SH0,CU1) * ... + * + * First ensure all CUs are disabled, then enable user specified CUs. */ - se = 0; - for (i = 0; i < cu_mask_count; i++) { - if (cu_mask[i / 32] & (1 << (i % 32))) - se_mask[se] |= 1 << cu; - - do { - se++; - if (se == cu_info.num_shader_engines) { - se = 0; - cu++; + for (i = 0; i < cu_info.num_shader_engines; i++) + se_mask[i] = 0; + + i = 0; + for (cu = 0; cu < 16; cu++) { + for (sh = 0; sh < cu_info.num_shader_arrays_per_engine; sh++) { + for (se = 0; se < cu_info.num_shader_engines; se++) { + if (cu_per_sh[se][sh] > cu) { + if (cu_mask[i / 32] & (1 << (i % 32))) + se_mask[se] |= 1 << (cu + sh * 16); + i++; + if (i == cu_mask_count) + return; + } } - } while (cu >= cu_per_se[se] && cu < 32); + } } } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h index b5e2ea7550d4..6e6918ccedfd 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h @@ -27,6 +27,7 @@ #include "kfd_priv.h" #define KFD_MAX_NUM_SE 8 +#define KFD_MAX_NUM_SH_PER_SE 2 /** * struct mqd_manager diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c index d8e940f03102..e547f1f8c49f 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c @@ -251,6 +251,7 @@ int pm_init(struct packet_manager *pm, struct device_queue_manager *dqm) case CHIP_DIMGREY_CAVEFISH: case CHIP_BEIGE_GOBY: case CHIP_YELLOW_CARP: + case CHIP_CYAN_SKILLFISH: pm->pmf = &kfd_v9_pm_funcs; break; case CHIP_ALDEBARAN: @@ -278,6 +279,7 @@ void pm_uninit(struct packet_manager *pm, bool hanging) { mutex_destroy(&pm->lock); kernel_queue_uninit(pm->priv_queue, hanging); + pm->priv_queue = NULL; } int pm_send_set_resources(struct packet_manager *pm, @@ -447,6 +449,9 @@ int pm_debugfs_hang_hws(struct packet_manager *pm) uint32_t *buffer, size; int r = 0; + if (!pm->priv_queue) + return -EAGAIN; + size = pm->pmf->query_status_size; mutex_lock(&pm->lock); kq_acquire_packet_buffer(pm->priv_queue, diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index 3426743ed228..ab83b0de6b22 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -196,6 +196,7 @@ struct kfd_event_interrupt_class { struct kfd_device_info { enum amd_asic_type asic_family; const char *asic_name; + uint32_t gfx_target_version; const struct kfd_event_interrupt_class *event_interrupt_class; unsigned int max_pasid_bits; unsigned int max_no_of_hqd; @@ -1194,7 +1195,7 @@ int pm_debugfs_runlist(struct seq_file *m, void *data); int kfd_debugfs_hang_hws(struct kfd_dev *dev); int pm_debugfs_hang_hws(struct packet_manager *pm); -int dqm_debugfs_execute_queues(struct device_queue_manager *dqm); +int dqm_debugfs_hang_hws(struct device_queue_manager *dqm); #else diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index 8a2c6fc438c0..21ec8a18cad2 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -714,7 +714,8 @@ static int kfd_process_alloc_gpuvm(struct kfd_process_device *pdd, if (err) goto err_alloc_mem; - err = amdgpu_amdkfd_gpuvm_map_memory_to_gpu(kdev->kgd, mem, pdd->drm_priv); + err = amdgpu_amdkfd_gpuvm_map_memory_to_gpu(kdev->kgd, mem, + pdd->drm_priv, NULL); if (err) goto err_map_mem; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c index 246522423559..ed4bc5f844ce 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c @@ -205,23 +205,23 @@ void kfd_smi_event_update_gpu_reset(struct kfd_dev *dev, bool post_reset) } void kfd_smi_event_update_thermal_throttling(struct kfd_dev *dev, - uint32_t throttle_bitmask) + uint64_t throttle_bitmask) { struct amdgpu_device *adev = (struct amdgpu_device *)dev->kgd; /* * ThermalThrottle msg = throttle_bitmask(8): * thermal_interrupt_count(16): - * 1 byte event + 1 byte space + 8 byte throttle_bitmask + + * 1 byte event + 1 byte space + 16 byte throttle_bitmask + * 1 byte : + 16 byte thermal_interupt_counter + 1 byte \n + - * 1 byte \0 = 29 + * 1 byte \0 = 37 */ - char fifo_in[29]; + char fifo_in[37]; int len; if (list_empty(&dev->smi_clients)) return; - len = snprintf(fifo_in, sizeof(fifo_in), "%x %x:%llx\n", + len = snprintf(fifo_in, sizeof(fifo_in), "%x %llx:%llx\n", KFD_SMI_EVENT_THERMAL_THROTTLE, throttle_bitmask, atomic64_read(&adev->smu.throttle_int_counter)); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.h b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.h index b9b0438202e2..bffd0c32b060 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.h @@ -26,7 +26,7 @@ int kfd_smi_event_open(struct kfd_dev *dev, uint32_t *fd); void kfd_smi_event_update_vmfault(struct kfd_dev *dev, uint16_t pasid); void kfd_smi_event_update_thermal_throttling(struct kfd_dev *dev, - uint32_t throttle_bitmask); + uint64_t throttle_bitmask); void kfd_smi_event_update_gpu_reset(struct kfd_dev *dev, bool post_reset); #endif diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index e883731c3f8f..491373fcdb38 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -120,6 +120,7 @@ static void svm_range_remove_notifier(struct svm_range *prange) static int svm_range_dma_map_dev(struct amdgpu_device *adev, struct svm_range *prange, + unsigned long offset, unsigned long npages, unsigned long *hmm_pfns, uint32_t gpuidx) { enum dma_data_direction dir = DMA_BIDIRECTIONAL; @@ -136,7 +137,8 @@ svm_range_dma_map_dev(struct amdgpu_device *adev, struct svm_range *prange, prange->dma_addr[gpuidx] = addr; } - for (i = 0; i < prange->npages; i++) { + addr += offset; + for (i = 0; i < npages; i++) { if (WARN_ONCE(addr[i] && !dma_mapping_error(dev, addr[i]), "leaking dma mapping\n")) dma_unmap_page(dev, addr[i], PAGE_SIZE, dir); @@ -167,6 +169,7 @@ svm_range_dma_map_dev(struct amdgpu_device *adev, struct svm_range *prange, static int svm_range_dma_map(struct svm_range *prange, unsigned long *bitmap, + unsigned long offset, unsigned long npages, unsigned long *hmm_pfns) { struct kfd_process *p; @@ -187,7 +190,8 @@ svm_range_dma_map(struct svm_range *prange, unsigned long *bitmap, } adev = (struct amdgpu_device *)pdd->dev->kgd; - r = svm_range_dma_map_dev(adev, prange, hmm_pfns, gpuidx); + r = svm_range_dma_map_dev(adev, prange, offset, npages, + hmm_pfns, gpuidx); if (r) break; } @@ -1088,11 +1092,6 @@ svm_range_get_pte_flags(struct amdgpu_device *adev, struct svm_range *prange, pte_flags |= snoop ? AMDGPU_PTE_SNOOPED : 0; pte_flags |= amdgpu_gem_va_map_flags(adev, mapping_flags); - - pr_debug("svms 0x%p [0x%lx 0x%lx] vram %d PTE 0x%llx mapping 0x%x\n", - prange->svms, prange->start, prange->last, - (domain == SVM_RANGE_VRAM_DOMAIN) ? 1:0, pte_flags, mapping_flags); - return pte_flags; } @@ -1156,7 +1155,8 @@ svm_range_unmap_from_gpus(struct svm_range *prange, unsigned long start, static int svm_range_map_to_gpu(struct amdgpu_device *adev, struct amdgpu_vm *vm, - struct svm_range *prange, dma_addr_t *dma_addr, + struct svm_range *prange, unsigned long offset, + unsigned long npages, bool readonly, dma_addr_t *dma_addr, struct amdgpu_device *bo_adev, struct dma_fence **fence) { struct amdgpu_bo_va bo_va; @@ -1167,14 +1167,15 @@ svm_range_map_to_gpu(struct amdgpu_device *adev, struct amdgpu_vm *vm, int r = 0; int64_t i; - pr_debug("svms 0x%p [0x%lx 0x%lx]\n", prange->svms, prange->start, - prange->last); + last_start = prange->start + offset; + + pr_debug("svms 0x%p [0x%lx 0x%lx] readonly %d\n", prange->svms, + last_start, last_start + npages - 1, readonly); if (prange->svm_bo && prange->ttm_res) bo_va.is_xgmi = amdgpu_xgmi_same_hive(adev, bo_adev); - last_start = prange->start; - for (i = 0; i < prange->npages; i++) { + for (i = offset; i < offset + npages; i++) { last_domain = dma_addr[i] & SVM_RANGE_VRAM_DOMAIN; dma_addr[i] &= ~SVM_RANGE_VRAM_DOMAIN; if ((prange->start + i) < prange->last && @@ -1183,13 +1184,21 @@ svm_range_map_to_gpu(struct amdgpu_device *adev, struct amdgpu_vm *vm, pr_debug("Mapping range [0x%lx 0x%llx] on domain: %s\n", last_start, prange->start + i, last_domain ? "GPU" : "CPU"); + pte_flags = svm_range_get_pte_flags(adev, prange, last_domain); - r = amdgpu_vm_bo_update_mapping(adev, bo_adev, vm, false, false, NULL, - last_start, + if (readonly) + pte_flags &= ~AMDGPU_PTE_WRITEABLE; + + pr_debug("svms 0x%p map [0x%lx 0x%llx] vram %d PTE 0x%llx\n", + prange->svms, last_start, prange->start + i, + (last_domain == SVM_RANGE_VRAM_DOMAIN) ? 1 : 0, + pte_flags); + + r = amdgpu_vm_bo_update_mapping(adev, bo_adev, vm, false, false, + NULL, last_start, prange->start + i, pte_flags, last_start - prange->start, - NULL, - dma_addr, + NULL, dma_addr, &vm->last_update, &table_freed); if (r) { @@ -1220,8 +1229,10 @@ out: return r; } -static int svm_range_map_to_gpus(struct svm_range *prange, - unsigned long *bitmap, bool wait) +static int +svm_range_map_to_gpus(struct svm_range *prange, unsigned long offset, + unsigned long npages, bool readonly, + unsigned long *bitmap, bool wait) { struct kfd_process_device *pdd; struct amdgpu_device *bo_adev; @@ -1257,7 +1268,8 @@ static int svm_range_map_to_gpus(struct svm_range *prange, } r = svm_range_map_to_gpu(adev, drm_priv_to_vm(pdd->drm_priv), - prange, prange->dma_addr[gpuidx], + prange, offset, npages, readonly, + prange->dma_addr[gpuidx], bo_adev, wait ? &fence : NULL); if (r) break; @@ -1390,7 +1402,7 @@ static int svm_range_validate_and_map(struct mm_struct *mm, int32_t gpuidx, bool intr, bool wait) { struct svm_validate_context ctx; - struct hmm_range *hmm_range; + unsigned long start, end, addr; struct kfd_process *p; void *owner; int32_t idx; @@ -1448,40 +1460,66 @@ static int svm_range_validate_and_map(struct mm_struct *mm, break; } } - r = amdgpu_hmm_range_get_pages(&prange->notifier, mm, NULL, - prange->start << PAGE_SHIFT, - prange->npages, &hmm_range, - false, true, owner); - if (r) { - pr_debug("failed %d to get svm range pages\n", r); - goto unreserve_out; - } - r = svm_range_dma_map(prange, ctx.bitmap, - hmm_range->hmm_pfns); - if (r) { - pr_debug("failed %d to dma map range\n", r); - goto unreserve_out; - } + start = prange->start << PAGE_SHIFT; + end = (prange->last + 1) << PAGE_SHIFT; + for (addr = start; addr < end && !r; ) { + struct hmm_range *hmm_range; + struct vm_area_struct *vma; + unsigned long next; + unsigned long offset; + unsigned long npages; + bool readonly; - prange->validated_once = true; + vma = find_vma(mm, addr); + if (!vma || addr < vma->vm_start) { + r = -EFAULT; + goto unreserve_out; + } + readonly = !(vma->vm_flags & VM_WRITE); - svm_range_lock(prange); - if (amdgpu_hmm_range_get_pages_done(hmm_range)) { - pr_debug("hmm update the range, need validate again\n"); - r = -EAGAIN; - goto unlock_out; - } - if (!list_empty(&prange->child_list)) { - pr_debug("range split by unmap in parallel, validate again\n"); - r = -EAGAIN; - goto unlock_out; - } + next = min(vma->vm_end, end); + npages = (next - addr) >> PAGE_SHIFT; + r = amdgpu_hmm_range_get_pages(&prange->notifier, mm, NULL, + addr, npages, &hmm_range, + readonly, true, owner); + if (r) { + pr_debug("failed %d to get svm range pages\n", r); + goto unreserve_out; + } - r = svm_range_map_to_gpus(prange, ctx.bitmap, wait); + offset = (addr - start) >> PAGE_SHIFT; + r = svm_range_dma_map(prange, ctx.bitmap, offset, npages, + hmm_range->hmm_pfns); + if (r) { + pr_debug("failed %d to dma map range\n", r); + goto unreserve_out; + } + + svm_range_lock(prange); + if (amdgpu_hmm_range_get_pages_done(hmm_range)) { + pr_debug("hmm update the range, need validate again\n"); + r = -EAGAIN; + goto unlock_out; + } + if (!list_empty(&prange->child_list)) { + pr_debug("range split by unmap in parallel, validate again\n"); + r = -EAGAIN; + goto unlock_out; + } + + r = svm_range_map_to_gpus(prange, offset, npages, readonly, + ctx.bitmap, wait); unlock_out: - svm_range_unlock(prange); + svm_range_unlock(prange); + + addr = next; + } + + if (addr == end) + prange->validated_once = true; + unreserve_out: svm_range_unreserve_bos(&ctx); @@ -2400,9 +2438,29 @@ svm_range_count_fault(struct amdgpu_device *adev, struct kfd_process *p, WRITE_ONCE(pdd->faults, pdd->faults + 1); } +static bool +svm_fault_allowed(struct mm_struct *mm, uint64_t addr, bool write_fault) +{ + unsigned long requested = VM_READ; + struct vm_area_struct *vma; + + if (write_fault) + requested |= VM_WRITE; + + vma = find_vma(mm, addr << PAGE_SHIFT); + if (!vma || (addr << PAGE_SHIFT) < vma->vm_start) { + pr_debug("address 0x%llx VMA is removed\n", addr); + return true; + } + + pr_debug("requested 0x%lx, vma permission flags 0x%lx\n", requested, + vma->vm_flags); + return (vma->vm_flags & requested) == requested; +} + int svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid, - uint64_t addr) + uint64_t addr, bool write_fault) { struct mm_struct *mm = NULL; struct svm_range_list *svms; @@ -2484,6 +2542,13 @@ retry_write_locked: goto out_unlock_range; } + if (!svm_fault_allowed(mm, addr, write_fault)) { + pr_debug("fault addr 0x%llx no %s permission\n", addr, + write_fault ? "write" : "read"); + r = -EPERM; + goto out_unlock_range; + } + best_loc = svm_range_best_restore_location(prange, adev, &gpuidx); if (best_loc == -1) { pr_debug("svms %p failed get best restore loc [0x%lx 0x%lx]\n", @@ -2675,22 +2740,26 @@ svm_range_add(struct kfd_process *p, uint64_t start, uint64_t size, return 0; } -/* svm_range_best_prefetch_location - decide the best prefetch location +/** + * svm_range_best_prefetch_location - decide the best prefetch location * @prange: svm range structure * * For xnack off: - * If range map to single GPU, the best acutal location is prefetch loc, which + * If range map to single GPU, the best prefetch location is prefetch_loc, which * can be CPU or GPU. * - * If range map to multiple GPUs, only if mGPU connection on xgmi same hive, - * the best actual location could be prefetch_loc GPU. If mGPU connection on - * PCIe, the best actual location is always CPU, because GPU cannot access vram - * of other GPUs, assuming PCIe small bar (large bar support is not upstream). + * If range is ACCESS or ACCESS_IN_PLACE by mGPUs, only if mGPU connection on + * XGMI same hive, the best prefetch location is prefetch_loc GPU, othervise + * the best prefetch location is always CPU, because GPU can not have coherent + * mapping VRAM of other GPUs even with large-BAR PCIe connection. * * For xnack on: - * The best actual location is prefetch location. If mGPU connection on xgmi - * same hive, range map to multiple GPUs. Otherwise, the range only map to - * actual location GPU. Other GPU access vm fault will trigger migration. + * If range is not ACCESS_IN_PLACE by mGPUs, the best prefetch location is + * prefetch_loc, other GPU access will generate vm fault and trigger migration. + * + * If range is ACCESS_IN_PLACE by mGPUs, only if mGPU connection on XGMI same + * hive, the best prefetch location is prefetch_loc GPU, otherwise the best + * prefetch location is always CPU. * * Context: Process context * @@ -2710,11 +2779,6 @@ svm_range_best_prefetch_location(struct svm_range *prange) p = container_of(prange->svms, struct kfd_process, svms); - /* xnack on */ - if (p->xnack_enabled) - goto out; - - /* xnack off */ if (!best_loc || best_loc == KFD_IOCTL_SVM_LOCATION_UNDEFINED) goto out; @@ -2724,8 +2788,12 @@ svm_range_best_prefetch_location(struct svm_range *prange) best_loc = 0; goto out; } - bitmap_or(bitmap, prange->bitmap_access, prange->bitmap_aip, - MAX_GPU_INSTANCE); + + if (p->xnack_enabled) + bitmap_copy(bitmap, prange->bitmap_aip, MAX_GPU_INSTANCE); + else + bitmap_or(bitmap, prange->bitmap_access, prange->bitmap_aip, + MAX_GPU_INSTANCE); for_each_set_bit(gpuidx, bitmap, MAX_GPU_INSTANCE) { pdd = kfd_process_device_from_gpuidx(p, gpuidx); @@ -3019,7 +3087,8 @@ svm_range_get_attr(struct kfd_process *p, uint64_t start, uint64_t size, struct svm_range *prange; uint32_t prefetch_loc = KFD_IOCTL_SVM_LOCATION_UNDEFINED; uint32_t location = KFD_IOCTL_SVM_LOCATION_UNDEFINED; - uint32_t flags = 0xffffffff; + uint32_t flags_and = 0xffffffff; + uint32_t flags_or = 0; int gpuidx; uint32_t i; @@ -3054,12 +3123,12 @@ svm_range_get_attr(struct kfd_process *p, uint64_t start, uint64_t size, get_accessible = true; break; case KFD_IOCTL_SVM_ATTR_SET_FLAGS: + case KFD_IOCTL_SVM_ATTR_CLR_FLAGS: get_flags = true; break; case KFD_IOCTL_SVM_ATTR_GRANULARITY: get_granularity = true; break; - case KFD_IOCTL_SVM_ATTR_CLR_FLAGS: case KFD_IOCTL_SVM_ATTR_ACCESS_IN_PLACE: case KFD_IOCTL_SVM_ATTR_NO_ACCESS: fallthrough; @@ -3077,7 +3146,8 @@ svm_range_get_attr(struct kfd_process *p, uint64_t start, uint64_t size, if (!node) { pr_debug("range attrs not found return default values\n"); svm_range_set_default_attributes(&location, &prefetch_loc, - &granularity, &flags); + &granularity, &flags_and); + flags_or = flags_and; if (p->xnack_enabled) bitmap_copy(bitmap_access, svms->bitmap_supported, MAX_GPU_INSTANCE); @@ -3123,8 +3193,10 @@ svm_range_get_attr(struct kfd_process *p, uint64_t start, uint64_t size, bitmap_and(bitmap_aip, bitmap_aip, prange->bitmap_aip, MAX_GPU_INSTANCE); } - if (get_flags) - flags &= prange->flags; + if (get_flags) { + flags_and &= prange->flags; + flags_or |= prange->flags; + } if (get_granularity && prange->granularity < granularity) granularity = prange->granularity; @@ -3158,7 +3230,10 @@ fill_values: attrs[i].type = KFD_IOCTL_SVM_ATTR_NO_ACCESS; break; case KFD_IOCTL_SVM_ATTR_SET_FLAGS: - attrs[i].value = flags; + attrs[i].value = flags_and; + break; + case KFD_IOCTL_SVM_ATTR_CLR_FLAGS: + attrs[i].value = ~flags_or; break; case KFD_IOCTL_SVM_ATTR_GRANULARITY: attrs[i].value = (uint32_t)granularity; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h index 3fc1fd8b4fbc..c6ec55354c7b 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h @@ -175,7 +175,7 @@ int svm_range_split_by_granularity(struct kfd_process *p, struct mm_struct *mm, unsigned long addr, struct svm_range *parent, struct svm_range *prange); int svm_range_restore_pages(struct amdgpu_device *adev, - unsigned int pasid, uint64_t addr); + unsigned int pasid, uint64_t addr, bool write_fault); int svm_range_schedule_evict_svm_bo(struct amdgpu_amdkfd_fence *fence); void svm_range_add_list_work(struct svm_range_list *svms, struct svm_range *prange, struct mm_struct *mm, @@ -209,7 +209,8 @@ static inline void svm_range_list_fini(struct kfd_process *p) } static inline int svm_range_restore_pages(struct amdgpu_device *adev, - unsigned int pasid, uint64_t addr) + unsigned int pasid, uint64_t addr, + bool write_fault) { return -EFAULT; } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c index b1ce072aa20b..98cca5f2b27f 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c @@ -478,6 +478,8 @@ static ssize_t node_show(struct kobject *kobj, struct attribute *attr, dev->node_props.simd_per_cu); sysfs_show_32bit_prop(buffer, offs, "max_slots_scratch_cu", dev->node_props.max_slots_scratch_cu); + sysfs_show_32bit_prop(buffer, offs, "gfx_target_version", + dev->node_props.gfx_target_version); sysfs_show_32bit_prop(buffer, offs, "vendor_id", dev->node_props.vendor_id); sysfs_show_32bit_prop(buffer, offs, "device_id", @@ -1360,6 +1362,7 @@ int kfd_topology_add_device(struct kfd_dev *gpu) dev->node_props.simd_arrays_per_engine = cu_info.num_shader_arrays_per_engine; + dev->node_props.gfx_target_version = gpu->device_info->gfx_target_version; dev->node_props.vendor_id = gpu->pdev->vendor; dev->node_props.device_id = gpu->pdev->device; dev->node_props.capability |= @@ -1424,6 +1427,7 @@ int kfd_topology_add_device(struct kfd_dev *gpu) case CHIP_DIMGREY_CAVEFISH: case CHIP_BEIGE_GOBY: case CHIP_YELLOW_CARP: + case CHIP_CYAN_SKILLFISH: dev->node_props.capability |= ((HSA_CAP_DOORBELL_TYPE_2_0 << HSA_CAP_DOORBELL_TYPE_TOTALBITS_SHIFT) & HSA_CAP_DOORBELL_TYPE_TOTALBITS_MASK); @@ -1630,7 +1634,7 @@ int kfd_debugfs_rls_by_device(struct seq_file *m, void *data) } seq_printf(m, "Node %u, gpu_id %x:\n", i++, dev->gpu->id); - r = pm_debugfs_runlist(m, &dev->gpu->dqm->packets); + r = pm_debugfs_runlist(m, &dev->gpu->dqm->packet_mgr); if (r) break; } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h index 8b48c6692007..a8db017c9b8e 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h @@ -78,6 +78,7 @@ struct kfd_node_properties { uint32_t simd_per_cu; uint32_t max_slots_scratch_cu; uint32_t engine_id; + uint32_t gfx_target_version; uint32_t vendor_id; uint32_t device_id; uint32_t location_id; diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/Makefile b/drivers/gpu/drm/amd/display/amdgpu_dm/Makefile index 91fb72c96545..718e123a3230 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/Makefile +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/Makefile @@ -27,6 +27,10 @@ AMDGPUDM = amdgpu_dm.o amdgpu_dm_irq.o amdgpu_dm_mst_types.o amdgpu_dm_color.o +ifdef CONFIG_DRM_AMD_DC_DCN +AMDGPUDM += dc_fpu.o +endif + ifneq ($(CONFIG_DRM_AMD_DC),) AMDGPUDM += amdgpu_dm_services.o amdgpu_dm_helpers.o amdgpu_dm_pp_smu.o amdgpu_dm_psr.o endif diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index afa96c8f721b..816723691d51 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -618,6 +618,7 @@ static void dm_dcn_vertical_interrupt0_high_irq(void *interrupt_params) } #endif +#define DMUB_TRACE_MAX_READ 64 /** * dm_dmub_outbox1_low_irq() - Handles Outbox interrupt * @interrupt_params: used for determining the Outbox instance @@ -625,7 +626,6 @@ static void dm_dcn_vertical_interrupt0_high_irq(void *interrupt_params) * Handles the Outbox Interrupt * event handler. */ -#define DMUB_TRACE_MAX_READ 64 static void dm_dmub_outbox1_low_irq(void *interrupt_params) { struct dmub_notification notify; @@ -1044,10 +1044,10 @@ static void mmhub_read_system_context(struct amdgpu_device *adev, struct dc_phy_ } #endif #if defined(CONFIG_DRM_AMD_DC_DCN) -static void event_mall_stutter(struct work_struct *work) +static void vblank_control_worker(struct work_struct *work) { - - struct vblank_workqueue *vblank_work = container_of(work, struct vblank_workqueue, mall_work); + struct vblank_control_work *vblank_work = + container_of(work, struct vblank_control_work, work); struct amdgpu_display_manager *dm = vblank_work->dm; mutex_lock(&dm->dc_lock); @@ -1061,27 +1061,25 @@ static void event_mall_stutter(struct work_struct *work) DRM_DEBUG_KMS("Allow idle optimizations (MALL): %d\n", dm->active_vblank_irq_count == 0); - mutex_unlock(&dm->dc_lock); -} - -static struct vblank_workqueue *vblank_create_workqueue(struct amdgpu_device *adev, struct dc *dc) -{ - - int max_caps = dc->caps.max_links; - struct vblank_workqueue *vblank_work; - int i = 0; - - vblank_work = kcalloc(max_caps, sizeof(*vblank_work), GFP_KERNEL); - if (ZERO_OR_NULL_PTR(vblank_work)) { - kfree(vblank_work); - return NULL; + /* Control PSR based on vblank requirements from OS */ + if (vblank_work->stream && vblank_work->stream->link) { + if (vblank_work->enable) { + if (vblank_work->stream->link->psr_settings.psr_allow_active) + amdgpu_dm_psr_disable(vblank_work->stream); + } else if (vblank_work->stream->link->psr_settings.psr_feature_enabled && + !vblank_work->stream->link->psr_settings.psr_allow_active && + vblank_work->acrtc->dm_irq_params.allow_psr_entry) { + amdgpu_dm_psr_enable(vblank_work->stream); + } } - for (i = 0; i < max_caps; i++) - INIT_WORK(&vblank_work[i].mall_work, event_mall_stutter); + mutex_unlock(&dm->dc_lock); + + dc_stream_release(vblank_work->stream); - return vblank_work; + kfree(vblank_work); } + #endif static int amdgpu_dm_init(struct amdgpu_device *adev) { @@ -1224,12 +1222,10 @@ static int amdgpu_dm_init(struct amdgpu_device *adev) #if defined(CONFIG_DRM_AMD_DC_DCN) if (adev->dm.dc->caps.max_links > 0) { - adev->dm.vblank_workqueue = vblank_create_workqueue(adev, adev->dm.dc); - - if (!adev->dm.vblank_workqueue) + adev->dm.vblank_control_workqueue = + create_singlethread_workqueue("dm_vblank_control_workqueue"); + if (!adev->dm.vblank_control_workqueue) DRM_ERROR("amdgpu: failed to initialize vblank_workqueue.\n"); - else - DRM_DEBUG_DRIVER("amdgpu: vblank_workqueue init done %p.\n", adev->dm.vblank_workqueue); } #endif @@ -1302,6 +1298,13 @@ static void amdgpu_dm_fini(struct amdgpu_device *adev) { int i; +#if defined(CONFIG_DRM_AMD_DC_DCN) + if (adev->dm.vblank_control_workqueue) { + destroy_workqueue(adev->dm.vblank_control_workqueue); + adev->dm.vblank_control_workqueue = NULL; + } +#endif + for (i = 0; i < adev->dm.display_indexes_num; i++) { drm_encoder_cleanup(&adev->dm.mst_encoders[i].base); } @@ -1325,14 +1328,6 @@ static void amdgpu_dm_fini(struct amdgpu_device *adev) dc_deinit_callbacks(adev->dm.dc); #endif -#if defined(CONFIG_DRM_AMD_DC_DCN) - if (adev->dm.vblank_workqueue) { - adev->dm.vblank_workqueue->dm = NULL; - kfree(adev->dm.vblank_workqueue); - adev->dm.vblank_workqueue = NULL; - } -#endif - dc_dmub_srv_destroy(&adev->dm.dc->ctx->dmub_srv); if (dc_enable_dmub_notifications(adev->dm.dc)) { @@ -2412,6 +2407,7 @@ static void update_connector_ext_caps(struct amdgpu_dm_connector *aconnector) static const u8 pre_computed_values[] = { 50, 51, 52, 53, 55, 56, 57, 58, 59, 61, 62, 63, 65, 66, 68, 69, 71, 72, 74, 75, 77, 79, 81, 82, 84, 86, 88, 90, 92, 94, 96, 98}; + int i; if (!aconnector || !aconnector->dc_link) return; @@ -2423,7 +2419,13 @@ static void update_connector_ext_caps(struct amdgpu_dm_connector *aconnector) conn_base = &aconnector->base; adev = drm_to_adev(conn_base->dev); dm = &adev->dm; - caps = &dm->backlight_caps; + for (i = 0; i < dm->num_of_edps; i++) { + if (link == dm->backlight_link[i]) + break; + } + if (i >= dm->num_of_edps) + return; + caps = &dm->backlight_caps[i]; caps->ext_caps = &aconnector->dc_link->dpcd_sink_ext_caps; caps->aux_support = false; max_cll = conn_base->hdr_sink_metadata.hdmi_type1.max_cll; @@ -3423,35 +3425,36 @@ static int amdgpu_dm_mode_config_init(struct amdgpu_device *adev) #if defined(CONFIG_BACKLIGHT_CLASS_DEVICE) ||\ defined(CONFIG_BACKLIGHT_CLASS_DEVICE_MODULE) -static void amdgpu_dm_update_backlight_caps(struct amdgpu_display_manager *dm) +static void amdgpu_dm_update_backlight_caps(struct amdgpu_display_manager *dm, + int bl_idx) { #if defined(CONFIG_ACPI) struct amdgpu_dm_backlight_caps caps; memset(&caps, 0, sizeof(caps)); - if (dm->backlight_caps.caps_valid) + if (dm->backlight_caps[bl_idx].caps_valid) return; amdgpu_acpi_get_backlight_caps(&caps); if (caps.caps_valid) { - dm->backlight_caps.caps_valid = true; + dm->backlight_caps[bl_idx].caps_valid = true; if (caps.aux_support) return; - dm->backlight_caps.min_input_signal = caps.min_input_signal; - dm->backlight_caps.max_input_signal = caps.max_input_signal; + dm->backlight_caps[bl_idx].min_input_signal = caps.min_input_signal; + dm->backlight_caps[bl_idx].max_input_signal = caps.max_input_signal; } else { - dm->backlight_caps.min_input_signal = + dm->backlight_caps[bl_idx].min_input_signal = AMDGPU_DM_DEFAULT_MIN_BACKLIGHT; - dm->backlight_caps.max_input_signal = + dm->backlight_caps[bl_idx].max_input_signal = AMDGPU_DM_DEFAULT_MAX_BACKLIGHT; } #else - if (dm->backlight_caps.aux_support) + if (dm->backlight_caps[bl_idx].aux_support) return; - dm->backlight_caps.min_input_signal = AMDGPU_DM_DEFAULT_MIN_BACKLIGHT; - dm->backlight_caps.max_input_signal = AMDGPU_DM_DEFAULT_MAX_BACKLIGHT; + dm->backlight_caps[bl_idx].min_input_signal = AMDGPU_DM_DEFAULT_MIN_BACKLIGHT; + dm->backlight_caps[bl_idx].max_input_signal = AMDGPU_DM_DEFAULT_MAX_BACKLIGHT; #endif } @@ -3502,41 +3505,31 @@ static u32 convert_brightness_to_user(const struct amdgpu_dm_backlight_caps *cap } static int amdgpu_dm_backlight_set_level(struct amdgpu_display_manager *dm, + int bl_idx, u32 user_brightness) { struct amdgpu_dm_backlight_caps caps; - struct dc_link *link[AMDGPU_DM_MAX_NUM_EDP]; - u32 brightness[AMDGPU_DM_MAX_NUM_EDP]; + struct dc_link *link; + u32 brightness; bool rc; - int i; - amdgpu_dm_update_backlight_caps(dm); - caps = dm->backlight_caps; + amdgpu_dm_update_backlight_caps(dm, bl_idx); + caps = dm->backlight_caps[bl_idx]; - for (i = 0; i < dm->num_of_edps; i++) { - dm->brightness[i] = user_brightness; - brightness[i] = convert_brightness_from_user(&caps, dm->brightness[i]); - link[i] = (struct dc_link *)dm->backlight_link[i]; - } + dm->brightness[bl_idx] = user_brightness; + brightness = convert_brightness_from_user(&caps, dm->brightness[bl_idx]); + link = (struct dc_link *)dm->backlight_link[bl_idx]; /* Change brightness based on AUX property */ if (caps.aux_support) { - for (i = 0; i < dm->num_of_edps; i++) { - rc = dc_link_set_backlight_level_nits(link[i], true, brightness[i], - AUX_BL_DEFAULT_TRANSITION_TIME_MS); - if (!rc) { - DRM_DEBUG("DM: Failed to update backlight via AUX on eDP[%d]\n", i); - break; - } - } + rc = dc_link_set_backlight_level_nits(link, true, brightness, + AUX_BL_DEFAULT_TRANSITION_TIME_MS); + if (!rc) + DRM_DEBUG("DM: Failed to update backlight via AUX on eDP[%d]\n", bl_idx); } else { - for (i = 0; i < dm->num_of_edps; i++) { - rc = dc_link_set_backlight_level(dm->backlight_link[i], brightness[i], 0); - if (!rc) { - DRM_DEBUG("DM: Failed to update backlight on eDP[%d]\n", i); - break; - } - } + rc = dc_link_set_backlight_level(link, brightness, 0); + if (!rc) + DRM_DEBUG("DM: Failed to update backlight on eDP[%d]\n", bl_idx); } return rc ? 0 : 1; @@ -3545,33 +3538,41 @@ static int amdgpu_dm_backlight_set_level(struct amdgpu_display_manager *dm, static int amdgpu_dm_backlight_update_status(struct backlight_device *bd) { struct amdgpu_display_manager *dm = bl_get_data(bd); + int i; - amdgpu_dm_backlight_set_level(dm, bd->props.brightness); + for (i = 0; i < dm->num_of_edps; i++) { + if (bd == dm->backlight_dev[i]) + break; + } + if (i >= AMDGPU_DM_MAX_NUM_EDP) + i = 0; + amdgpu_dm_backlight_set_level(dm, i, bd->props.brightness); return 0; } -static u32 amdgpu_dm_backlight_get_level(struct amdgpu_display_manager *dm) +static u32 amdgpu_dm_backlight_get_level(struct amdgpu_display_manager *dm, + int bl_idx) { struct amdgpu_dm_backlight_caps caps; + struct dc_link *link = (struct dc_link *)dm->backlight_link[bl_idx]; - amdgpu_dm_update_backlight_caps(dm); - caps = dm->backlight_caps; + amdgpu_dm_update_backlight_caps(dm, bl_idx); + caps = dm->backlight_caps[bl_idx]; if (caps.aux_support) { - struct dc_link *link = (struct dc_link *)dm->backlight_link[0]; u32 avg, peak; bool rc; rc = dc_link_get_backlight_level_nits(link, &avg, &peak); if (!rc) - return dm->brightness[0]; + return dm->brightness[bl_idx]; return convert_brightness_to_user(&caps, avg); } else { - int ret = dc_link_get_backlight_level(dm->backlight_link[0]); + int ret = dc_link_get_backlight_level(link); if (ret == DC_ERROR_UNEXPECTED) - return dm->brightness[0]; + return dm->brightness[bl_idx]; return convert_brightness_to_user(&caps, ret); } } @@ -3579,8 +3580,15 @@ static u32 amdgpu_dm_backlight_get_level(struct amdgpu_display_manager *dm) static int amdgpu_dm_backlight_get_brightness(struct backlight_device *bd) { struct amdgpu_display_manager *dm = bl_get_data(bd); + int i; - return amdgpu_dm_backlight_get_level(dm); + for (i = 0; i < dm->num_of_edps; i++) { + if (bd == dm->backlight_dev[i]) + break; + } + if (i >= AMDGPU_DM_MAX_NUM_EDP) + i = 0; + return amdgpu_dm_backlight_get_level(dm, i); } static const struct backlight_ops amdgpu_dm_backlight_ops = { @@ -3594,31 +3602,28 @@ amdgpu_dm_register_backlight_device(struct amdgpu_display_manager *dm) { char bl_name[16]; struct backlight_properties props = { 0 }; - int i; - amdgpu_dm_update_backlight_caps(dm); - for (i = 0; i < dm->num_of_edps; i++) - dm->brightness[i] = AMDGPU_MAX_BL_LEVEL; + amdgpu_dm_update_backlight_caps(dm, dm->num_of_edps); + dm->brightness[dm->num_of_edps] = AMDGPU_MAX_BL_LEVEL; props.max_brightness = AMDGPU_MAX_BL_LEVEL; props.brightness = AMDGPU_MAX_BL_LEVEL; props.type = BACKLIGHT_RAW; snprintf(bl_name, sizeof(bl_name), "amdgpu_bl%d", - adev_to_drm(dm->adev)->primary->index); + adev_to_drm(dm->adev)->primary->index + dm->num_of_edps); - dm->backlight_dev = backlight_device_register(bl_name, - adev_to_drm(dm->adev)->dev, - dm, - &amdgpu_dm_backlight_ops, - &props); + dm->backlight_dev[dm->num_of_edps] = backlight_device_register(bl_name, + adev_to_drm(dm->adev)->dev, + dm, + &amdgpu_dm_backlight_ops, + &props); - if (IS_ERR(dm->backlight_dev)) + if (IS_ERR(dm->backlight_dev[dm->num_of_edps])) DRM_ERROR("DM: Backlight registration failed!\n"); else DRM_DEBUG_DRIVER("DM: Registered Backlight device: %s\n", bl_name); } - #endif static int initialize_plane(struct amdgpu_display_manager *dm, @@ -3675,10 +3680,10 @@ static void register_backlight_device(struct amdgpu_display_manager *dm, * DM initialization because not having a backlight control * is better then a black screen. */ - if (!dm->backlight_dev) + if (!dm->backlight_dev[dm->num_of_edps]) amdgpu_dm_register_backlight_device(dm); - if (dm->backlight_dev) { + if (dm->backlight_dev[dm->num_of_edps]) { dm->backlight_link[dm->num_of_edps] = link; dm->num_of_edps++; } @@ -4747,7 +4752,7 @@ fill_gfx9_plane_attributes_from_modifiers(struct amdgpu_device *adev, const bool force_disable_dcc) { const uint64_t modifier = afb->base.modifier; - int ret; + int ret = 0; fill_gfx9_tiling_info_from_modifier(adev, tiling_info, modifier); tiling_info->gfx9.swizzle = modifier_gfx9_swizzle_mode(modifier); @@ -4765,9 +4770,9 @@ fill_gfx9_plane_attributes_from_modifiers(struct amdgpu_device *adev, ret = validate_dcc(adev, format, rotation, tiling_info, dcc, address, plane_size); if (ret) - return ret; + drm_dbg_kms(adev_to_drm(adev), "validate_dcc: returned error: %d\n", ret); - return 0; + return ret; } static int @@ -5994,7 +5999,7 @@ static inline int dm_set_vblank(struct drm_crtc *crtc, bool enable) struct dm_crtc_state *acrtc_state = to_dm_crtc_state(crtc->state); #if defined(CONFIG_DRM_AMD_DC_DCN) struct amdgpu_display_manager *dm = &adev->dm; - unsigned long flags; + struct vblank_control_work *work; #endif int rc = 0; @@ -6019,12 +6024,21 @@ static inline int dm_set_vblank(struct drm_crtc *crtc, bool enable) return 0; #if defined(CONFIG_DRM_AMD_DC_DCN) - spin_lock_irqsave(&dm->vblank_lock, flags); - dm->vblank_workqueue->dm = dm; - dm->vblank_workqueue->otg_inst = acrtc->otg_inst; - dm->vblank_workqueue->enable = enable; - spin_unlock_irqrestore(&dm->vblank_lock, flags); - schedule_work(&dm->vblank_workqueue->mall_work); + work = kzalloc(sizeof(*work), GFP_ATOMIC); + if (!work) + return -ENOMEM; + + INIT_WORK(&work->work, vblank_control_worker); + work->dm = dm; + work->acrtc = acrtc; + work->enable = enable; + + if (acrtc_state->stream) { + dc_stream_retain(acrtc_state->stream); + work->stream = acrtc_state->stream; + } + + queue_work(dm->vblank_control_workqueue, &work->work); #endif return 0; @@ -6198,6 +6212,7 @@ static void amdgpu_dm_connector_destroy(struct drm_connector *connector) const struct dc_link *link = aconnector->dc_link; struct amdgpu_device *adev = drm_to_adev(connector->dev); struct amdgpu_display_manager *dm = &adev->dm; + int i; /* * Call only if mst_mgr was iniitalized before since it's not done @@ -6208,12 +6223,11 @@ static void amdgpu_dm_connector_destroy(struct drm_connector *connector) #if defined(CONFIG_BACKLIGHT_CLASS_DEVICE) ||\ defined(CONFIG_BACKLIGHT_CLASS_DEVICE_MODULE) - - if ((link->connector_signal & (SIGNAL_TYPE_EDP | SIGNAL_TYPE_LVDS)) && - link->type != dc_connection_none && - dm->backlight_dev) { - backlight_device_unregister(dm->backlight_dev); - dm->backlight_dev = NULL; + for (i = 0; i < dm->num_of_edps; i++) { + if ((link == dm->backlight_link[i]) && dm->backlight_dev[i]) { + backlight_device_unregister(dm->backlight_dev[i]); + dm->backlight_dev[i] = NULL; + } } #endif @@ -7570,8 +7584,10 @@ static uint add_fs_modes(struct amdgpu_dm_connector *aconnector) * 60 - Commonly used * 48,72,96 - Multiples of 24 */ - const uint32_t common_rates[] = { 23976, 24000, 25000, 29970, 30000, - 48000, 50000, 60000, 72000, 96000 }; + static const uint32_t common_rates[] = { + 23976, 24000, 25000, 29970, 30000, + 48000, 50000, 60000, 72000, 96000 + }; /* * Find mode with highest refresh rate with the same resolution @@ -8627,6 +8643,14 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state, /* Update the planes if changed or disable if we don't have any. */ if ((planes_count || acrtc_state->active_planes == 0) && acrtc_state->stream) { +#if defined(CONFIG_DRM_AMD_DC_DCN) + /* + * If PSR or idle optimizations are enabled then flush out + * any pending work before hardware programming. + */ + flush_workqueue(dm->vblank_control_workqueue); +#endif + bundle->stream_update.stream = acrtc_state->stream; if (new_pcrtc_state->mode_changed) { bundle->stream_update.src = acrtc_state->stream->src; @@ -8695,16 +8719,20 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state, acrtc_state->stream->link->psr_settings.psr_version != DC_PSR_VERSION_UNSUPPORTED && !acrtc_state->stream->link->psr_settings.psr_feature_enabled) amdgpu_dm_link_setup_psr(acrtc_state->stream); - else if ((acrtc_state->update_type == UPDATE_TYPE_FAST) && - acrtc_state->stream->link->psr_settings.psr_feature_enabled && - !acrtc_state->stream->link->psr_settings.psr_allow_active) { - struct amdgpu_dm_connector *aconn = (struct amdgpu_dm_connector *) - acrtc_state->stream->dm_stream_context; + + /* Decrement skip count when PSR is enabled and we're doing fast updates. */ + if (acrtc_state->update_type == UPDATE_TYPE_FAST && + acrtc_state->stream->link->psr_settings.psr_feature_enabled) { + struct amdgpu_dm_connector *aconn = + (struct amdgpu_dm_connector *)acrtc_state->stream->dm_stream_context; if (aconn->psr_skip_count > 0) aconn->psr_skip_count--; - else - amdgpu_dm_psr_enable(acrtc_state->stream); + + /* Allow PSR when skip count is 0. */ + acrtc_attach->dm_irq_params.allow_psr_entry = !aconn->psr_skip_count; + } else { + acrtc_attach->dm_irq_params.allow_psr_entry = false; } mutex_unlock(&dm->dc_lock); @@ -8953,8 +8981,12 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state) if (dc_state) { /* if there mode set or reset, disable eDP PSR */ - if (mode_set_reset_required) + if (mode_set_reset_required) { +#if defined(CONFIG_DRM_AMD_DC_DCN) + flush_workqueue(dm->vblank_control_workqueue); +#endif amdgpu_dm_psr_disable_all(dm); + } dm_enable_per_frame_crtc_master_sync(dc_state); mutex_lock(&dm->dc_lock); @@ -9191,8 +9223,11 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state) #if defined(CONFIG_BACKLIGHT_CLASS_DEVICE) || \ defined(CONFIG_BACKLIGHT_CLASS_DEVICE_MODULE) /* restore the backlight level */ - if (dm->backlight_dev && (amdgpu_dm_backlight_get_level(dm) != dm->brightness[0])) - amdgpu_dm_backlight_set_level(dm, dm->brightness[0]); + for (i = 0; i < dm->num_of_edps; i++) { + if (dm->backlight_dev[i] && + (amdgpu_dm_backlight_get_level(dm, i) != dm->brightness[i])) + amdgpu_dm_backlight_set_level(dm, i, dm->brightness[i]); + } #endif /* * send vblank event on all events not handled in flip and @@ -10554,13 +10589,68 @@ static bool is_dp_capable_without_timing_msa(struct dc *dc, return capable; } -static bool parse_edid_cea(struct amdgpu_dm_connector *aconnector, +static bool dm_edid_parser_send_cea(struct amdgpu_display_manager *dm, + unsigned int offset, + unsigned int total_length, + uint8_t *data, + unsigned int length, + struct amdgpu_hdmi_vsdb_info *vsdb) +{ + bool res; + union dmub_rb_cmd cmd; + struct dmub_cmd_send_edid_cea *input; + struct dmub_cmd_edid_cea_output *output; + + if (length > DMUB_EDID_CEA_DATA_CHUNK_BYTES) + return false; + + memset(&cmd, 0, sizeof(cmd)); + + input = &cmd.edid_cea.data.input; + + cmd.edid_cea.header.type = DMUB_CMD__EDID_CEA; + cmd.edid_cea.header.sub_type = 0; + cmd.edid_cea.header.payload_bytes = + sizeof(cmd.edid_cea) - sizeof(cmd.edid_cea.header); + input->offset = offset; + input->length = length; + input->total_length = total_length; + memcpy(input->payload, data, length); + + res = dc_dmub_srv_cmd_with_reply_data(dm->dc->ctx->dmub_srv, &cmd); + if (!res) { + DRM_ERROR("EDID CEA parser failed\n"); + return false; + } + + output = &cmd.edid_cea.data.output; + + if (output->type == DMUB_CMD__EDID_CEA_ACK) { + if (!output->ack.success) { + DRM_ERROR("EDID CEA ack failed at offset %d\n", + output->ack.offset); + } + } else if (output->type == DMUB_CMD__EDID_CEA_AMD_VSDB) { + if (!output->amd_vsdb.vsdb_found) + return false; + + vsdb->freesync_supported = output->amd_vsdb.freesync_supported; + vsdb->amd_vsdb_version = output->amd_vsdb.amd_vsdb_version; + vsdb->min_refresh_rate_hz = output->amd_vsdb.min_frame_rate; + vsdb->max_refresh_rate_hz = output->amd_vsdb.max_frame_rate; + } else { + DRM_WARN("Unknown EDID CEA parser results\n"); + return false; + } + + return true; +} + +static bool parse_edid_cea_dmcu(struct amdgpu_display_manager *dm, uint8_t *edid_ext, int len, struct amdgpu_hdmi_vsdb_info *vsdb_info) { int i; - struct amdgpu_device *adev = drm_to_adev(aconnector->base.dev); - struct dc *dc = adev->dm.dc; /* send extension block to DMCU for parsing */ for (i = 0; i < len; i += 8) { @@ -10568,14 +10658,14 @@ static bool parse_edid_cea(struct amdgpu_dm_connector *aconnector, int offset; /* send 8 bytes a time */ - if (!dc_edid_parser_send_cea(dc, i, len, &edid_ext[i], 8)) + if (!dc_edid_parser_send_cea(dm->dc, i, len, &edid_ext[i], 8)) return false; if (i+8 == len) { /* EDID block sent completed, expect result */ int version, min_rate, max_rate; - res = dc_edid_parser_recv_amd_vsdb(dc, &version, &min_rate, &max_rate); + res = dc_edid_parser_recv_amd_vsdb(dm->dc, &version, &min_rate, &max_rate); if (res) { /* amd vsdb found */ vsdb_info->freesync_supported = 1; @@ -10589,7 +10679,7 @@ static bool parse_edid_cea(struct amdgpu_dm_connector *aconnector, } /* check for ack*/ - res = dc_edid_parser_recv_cea_ack(dc, &offset); + res = dc_edid_parser_recv_cea_ack(dm->dc, &offset); if (!res) return false; } @@ -10597,6 +10687,34 @@ static bool parse_edid_cea(struct amdgpu_dm_connector *aconnector, return false; } +static bool parse_edid_cea_dmub(struct amdgpu_display_manager *dm, + uint8_t *edid_ext, int len, + struct amdgpu_hdmi_vsdb_info *vsdb_info) +{ + int i; + + /* send extension block to DMCU for parsing */ + for (i = 0; i < len; i += 8) { + /* send 8 bytes a time */ + if (!dm_edid_parser_send_cea(dm, i, len, &edid_ext[i], 8, vsdb_info)) + return false; + } + + return vsdb_info->freesync_supported; +} + +static bool parse_edid_cea(struct amdgpu_dm_connector *aconnector, + uint8_t *edid_ext, int len, + struct amdgpu_hdmi_vsdb_info *vsdb_info) +{ + struct amdgpu_device *adev = drm_to_adev(aconnector->base.dev); + + if (adev->dm.dmub_srv) + return parse_edid_cea_dmub(&adev->dm, edid_ext, len, vsdb_info); + else + return parse_edid_cea_dmcu(&adev->dm, edid_ext, len, vsdb_info); +} + static int parse_hdmi_amd_vsdb(struct amdgpu_dm_connector *aconnector, struct edid *edid, struct amdgpu_hdmi_vsdb_info *vsdb_info) { diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h index 9522d4ca299e..d1d353a7c77d 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h @@ -60,6 +60,7 @@ enum aux_return_code_type; /* Forward declarations */ struct amdgpu_device; +struct amdgpu_crtc; struct drm_device; struct dc; struct amdgpu_bo; @@ -86,16 +87,18 @@ struct dm_compressor_info { }; /** - * struct vblank_workqueue - Works to be executed in a separate thread during vblank - * @mall_work: work for mall stutter + * struct vblank_control_work - Work data for vblank control + * @work: Kernel work data for the work event * @dm: amdgpu display manager device - * @otg_inst: otg instance of which vblank is being set - * @enable: true if enable vblank + * @acrtc: amdgpu CRTC instance for which the event has occurred + * @stream: DC stream for which the event has occurred + * @enable: true if enabling vblank */ -struct vblank_workqueue { - struct work_struct mall_work; +struct vblank_control_work { + struct work_struct work; struct amdgpu_display_manager *dm; - int otg_inst; + struct amdgpu_crtc *acrtc; + struct dc_stream_state *stream; bool enable; }; @@ -365,13 +368,13 @@ struct amdgpu_display_manager { spinlock_t irq_handler_list_table_lock; - struct backlight_device *backlight_dev; + struct backlight_device *backlight_dev[AMDGPU_DM_MAX_NUM_EDP]; const struct dc_link *backlight_link[AMDGPU_DM_MAX_NUM_EDP]; uint8_t num_of_edps; - struct amdgpu_dm_backlight_caps backlight_caps; + struct amdgpu_dm_backlight_caps backlight_caps[AMDGPU_DM_MAX_NUM_EDP]; struct mod_freesync *freesync_module; #ifdef CONFIG_DRM_AMD_DC_HDCP @@ -380,11 +383,11 @@ struct amdgpu_display_manager { #if defined(CONFIG_DRM_AMD_DC_DCN) /** - * @vblank_workqueue: + * @vblank_control_workqueue: * - * amdgpu workqueue during vblank + * Deferred work for vblank control events. */ - struct vblank_workqueue *vblank_workqueue; + struct workqueue_struct *vblank_control_workqueue; #endif struct drm_atomic_state *cached_state; diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c index f1145086a468..87daa78a32b8 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c @@ -197,29 +197,29 @@ static ssize_t dp_link_settings_read(struct file *f, char __user *buf, rd_buf_ptr = rd_buf; - str_len = strlen("Current: %d %d %d "); - snprintf(rd_buf_ptr, str_len, "Current: %d %d %d ", + str_len = strlen("Current: %d 0x%x %d "); + snprintf(rd_buf_ptr, str_len, "Current: %d 0x%x %d ", link->cur_link_settings.lane_count, link->cur_link_settings.link_rate, link->cur_link_settings.link_spread); rd_buf_ptr += str_len; - str_len = strlen("Verified: %d %d %d "); - snprintf(rd_buf_ptr, str_len, "Verified: %d %d %d ", + str_len = strlen("Verified: %d 0x%x %d "); + snprintf(rd_buf_ptr, str_len, "Verified: %d 0x%x %d ", link->verified_link_cap.lane_count, link->verified_link_cap.link_rate, link->verified_link_cap.link_spread); rd_buf_ptr += str_len; - str_len = strlen("Reported: %d %d %d "); - snprintf(rd_buf_ptr, str_len, "Reported: %d %d %d ", + str_len = strlen("Reported: %d 0x%x %d "); + snprintf(rd_buf_ptr, str_len, "Reported: %d 0x%x %d ", link->reported_link_cap.lane_count, link->reported_link_cap.link_rate, link->reported_link_cap.link_spread); rd_buf_ptr += str_len; - str_len = strlen("Preferred: %d %d %d "); - snprintf(rd_buf_ptr, str_len, "Preferred: %d %d %d\n", + str_len = strlen("Preferred: %d 0x%x %d "); + snprintf(rd_buf_ptr, str_len, "Preferred: %d 0x%x %d\n", link->preferred_link_setting.lane_count, link->preferred_link_setting.link_rate, link->preferred_link_setting.link_spread); @@ -377,7 +377,7 @@ static ssize_t dp_phy_settings_read(struct file *f, char __user *buf, if (!rd_buf) return -EINVAL; - snprintf(rd_buf, rd_buf_size, " %d %d %d ", + snprintf(rd_buf, rd_buf_size, " %d %d %d\n", link->cur_lane_setting.VOLTAGE_SWING, link->cur_lane_setting.PRE_EMPHASIS, link->cur_lane_setting.POST_CURSOR2); diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c index e63c6885c757..c5f1dc3b5961 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c @@ -79,12 +79,12 @@ static uint8_t *psp_get_srm(struct psp_context *psp, uint32_t *srm_version, uint struct ta_hdcp_shared_memory *hdcp_cmd; - if (!psp->hdcp_context.hdcp_initialized) { + if (!psp->hdcp_context.context.initialized) { DRM_WARN("Failed to get hdcp srm. HDCP TA is not initialized."); return NULL; } - hdcp_cmd = (struct ta_hdcp_shared_memory *)psp->hdcp_context.hdcp_shared_buf; + hdcp_cmd = (struct ta_hdcp_shared_memory *)psp->hdcp_context.context.mem_context.shared_buf; memset(hdcp_cmd, 0, sizeof(struct ta_hdcp_shared_memory)); hdcp_cmd->cmd_id = TA_HDCP_COMMAND__HDCP_GET_SRM; @@ -105,12 +105,12 @@ static int psp_set_srm(struct psp_context *psp, uint8_t *srm, uint32_t srm_size, struct ta_hdcp_shared_memory *hdcp_cmd; - if (!psp->hdcp_context.hdcp_initialized) { + if (!psp->hdcp_context.context.initialized) { DRM_WARN("Failed to get hdcp srm. HDCP TA is not initialized."); return -EINVAL; } - hdcp_cmd = (struct ta_hdcp_shared_memory *)psp->hdcp_context.hdcp_shared_buf; + hdcp_cmd = (struct ta_hdcp_shared_memory *)psp->hdcp_context.context.mem_context.shared_buf; memset(hdcp_cmd, 0, sizeof(struct ta_hdcp_shared_memory)); memcpy(hdcp_cmd->in_msg.hdcp_set_srm.srm_buf, srm, srm_size); @@ -414,12 +414,12 @@ static bool enable_assr(void *handle, struct dc_link *link) struct ta_dtm_shared_memory *dtm_cmd; bool res = true; - if (!psp->dtm_context.dtm_initialized) { + if (!psp->dtm_context.context.initialized) { DRM_INFO("Failed to enable ASSR, DTM TA is not initialized."); return false; } - dtm_cmd = (struct ta_dtm_shared_memory *)psp->dtm_context.dtm_shared_buf; + dtm_cmd = (struct ta_dtm_shared_memory *)psp->dtm_context.context.mem_context.shared_buf; mutex_lock(&psp->dtm_context.mutex); memset(dtm_cmd, 0, sizeof(struct ta_dtm_shared_memory)); @@ -655,10 +655,8 @@ struct hdcp_workqueue *hdcp_create_workqueue(struct amdgpu_device *adev, struct INIT_DELAYED_WORK(&hdcp_work[i].property_validate_dwork, event_property_validate); hdcp_work[i].hdcp.config.psp.handle = &adev->psp; - if (dc->ctx->dce_version == DCN_VERSION_3_1) { + if (dc->ctx->dce_version == DCN_VERSION_3_1) hdcp_work[i].hdcp.config.psp.caps.dtm_v3_supported = 1; - hdcp_work[i].hdcp.config.psp.caps.opm_state_query_supported = false; - } hdcp_work[i].hdcp.config.ddc.handle = dc_get_link_at_index(dc, i); hdcp_work[i].hdcp.config.ddc.funcs.write_i2c = lp_write_i2c; hdcp_work[i].hdcp.config.ddc.funcs.read_i2c = lp_read_i2c; diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq_params.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq_params.h index f3b93ba69a27..79b5f9999fec 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq_params.h +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq_params.h @@ -33,6 +33,7 @@ struct dm_irq_params { struct mod_vrr_params vrr_params; struct dc_stream_state *stream; int active_planes; + bool allow_psr_entry; struct mod_freesync_config freesync_config; #ifdef CONFIG_DEBUG_FS diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c index 5568d4e518e6..1bcba6943fd7 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c @@ -213,6 +213,29 @@ static int dm_dp_mst_get_modes(struct drm_connector *connector) drm_connector_update_edid_property( &aconnector->base, NULL); + + DRM_DEBUG_KMS("Can't get EDID of %s. Add default remote sink.", connector->name); + if (!aconnector->dc_sink) { + struct dc_sink *dc_sink; + struct dc_sink_init_data init_params = { + .link = aconnector->dc_link, + .sink_signal = SIGNAL_TYPE_DISPLAY_PORT_MST }; + + dc_sink = dc_link_add_remote_sink( + aconnector->dc_link, + NULL, + 0, + &init_params); + + if (!dc_sink) { + DRM_ERROR("Unable to add a remote sink\n"); + return 0; + } + + dc_sink->priv = aconnector; + aconnector->dc_sink = dc_sink; + } + return ret; } diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_trace.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_trace.h index 46a33f64cf8e..fdcaea22b456 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_trace.h +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_trace.h @@ -637,6 +637,30 @@ TRACE_EVENT(amdgpu_refresh_rate_track, __entry->refresh_rate_ns) ); +TRACE_EVENT(dcn_fpu, + TP_PROTO(bool begin, const char *function, const int line, const int recursion_depth), + TP_ARGS(begin, function, line, recursion_depth), + + TP_STRUCT__entry( + __field(bool, begin) + __field(const char *, function) + __field(int, line) + __field(int, recursion_depth) + ), + TP_fast_assign( + __entry->begin = begin; + __entry->function = function; + __entry->line = line; + __entry->recursion_depth = recursion_depth; + ), + TP_printk("%s: recursion_depth: %d: %s()+%d:", + __entry->begin ? "begin" : "end", + __entry->recursion_depth, + __entry->function, + __entry->line + ) +); + #endif /* _AMDGPU_DM_TRACE_H_ */ #undef TRACE_INCLUDE_PATH diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.c b/drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.c new file mode 100644 index 000000000000..c9f47d167472 --- /dev/null +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.c @@ -0,0 +1,144 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright 2021 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#include "dc_trace.h" + +#if defined(CONFIG_X86) +#include <asm/fpu/api.h> +#elif defined(CONFIG_PPC64) +#include <asm/switch_to.h> +#include <asm/cputable.h> +#endif + +/** + * DOC: DC FPU manipulation overview + * + * DC core uses FPU operations in multiple parts of the code, which requires a + * more specialized way to manage these areas' entrance. To fulfill this + * requirement, we created some wrapper functions that encapsulate + * kernel_fpu_begin/end to better fit our need in the display component. In + * summary, in this file, you can find functions related to FPU operation + * management. + */ + +static DEFINE_PER_CPU(int, fpu_recursion_depth); + +/** + * dc_assert_fp_enabled - Check if FPU protection is enabled + * + * This function tells if the code is already under FPU protection or not. A + * function that works as an API for a set of FPU operations can use this + * function for checking if the caller invoked it after DC_FP_START(). For + * example, take a look at dcn2x.c file. + */ +inline void dc_assert_fp_enabled(void) +{ + int *pcpu, depth = 0; + + pcpu = get_cpu_ptr(&fpu_recursion_depth); + depth = *pcpu; + put_cpu_ptr(&fpu_recursion_depth); + + ASSERT(depth > 1); +} + +/** + * dc_fpu_begin - Enables FPU protection + * @function_name: A string containing the function name for debug purposes + * (usually __func__) + * + * @line: A line number where DC_FP_START was invoked for debug purpose + * (usually __LINE__) + * + * This function is responsible for managing the use of kernel_fpu_begin() with + * the advantage of providing an event trace for debugging. + * + * Note: Do not call this function directly; always use DC_FP_START(). + */ +void dc_fpu_begin(const char *function_name, const int line) +{ + int *pcpu; + + pcpu = get_cpu_ptr(&fpu_recursion_depth); + *pcpu += 1; + + if (*pcpu == 1) { +#if defined(CONFIG_X86) + kernel_fpu_begin(); +#elif defined(CONFIG_PPC64) + if (cpu_has_feature(CPU_FTR_VSX_COMP)) { + preempt_disable(); + enable_kernel_vsx(); + } else if (cpu_has_feature(CPU_FTR_ALTIVEC_COMP)) { + preempt_disable(); + enable_kernel_altivec(); + } else if (!cpu_has_feature(CPU_FTR_FPU_UNAVAILABLE)) { + preempt_disable(); + enable_kernel_fp(); + } +#endif + } + + TRACE_DCN_FPU(true, function_name, line, *pcpu); + put_cpu_ptr(&fpu_recursion_depth); +} + +/** + * dc_fpu_end - Disable FPU protection + * @function_name: A string containing the function name for debug purposes + * @line: A-line number where DC_FP_END was invoked for debug purpose + * + * This function is responsible for managing the use of kernel_fpu_end() with + * the advantage of providing an event trace for debugging. + * + * Note: Do not call this function directly; always use DC_FP_END(). + */ +void dc_fpu_end(const char *function_name, const int line) +{ + int *pcpu; + + pcpu = get_cpu_ptr(&fpu_recursion_depth); + *pcpu -= 1; + if (*pcpu <= 0) { +#if defined(CONFIG_X86) + kernel_fpu_end(); +#elif defined(CONFIG_PPC64) + if (cpu_has_feature(CPU_FTR_VSX_COMP)) { + disable_kernel_vsx(); + preempt_enable(); + } else if (cpu_has_feature(CPU_FTR_ALTIVEC_COMP)) { + disable_kernel_altivec(); + preempt_enable(); + } else if (!cpu_has_feature(CPU_FTR_FPU_UNAVAILABLE)) { + disable_kernel_fp(); + preempt_enable(); + } +#endif + } + + TRACE_DCN_FPU(false, function_name, line, *pcpu); + put_cpu_ptr(&fpu_recursion_depth); +} diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.h b/drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.h new file mode 100644 index 000000000000..b8275b397920 --- /dev/null +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.h @@ -0,0 +1,34 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright 2021 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#ifndef __DC_FPU_H__ +#define __DC_FPU_H__ + +void dc_assert_fp_enabled(void); +void dc_fpu_begin(const char *function_name, const int line); +void dc_fpu_end(const char *function_name, const int line); + +#endif /* __DC_FPU_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn10/rv1_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn10/rv1_clk_mgr.c index e133edc587d3..76ec8ec92efd 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn10/rv1_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn10/rv1_clk_mgr.c @@ -264,9 +264,9 @@ static void rv1_update_clocks(struct clk_mgr *clk_mgr_base, if (pp_smu->set_hard_min_fclk_by_freq && pp_smu->set_hard_min_dcfclk_by_freq && pp_smu->set_min_deep_sleep_dcfclk) { - pp_smu->set_hard_min_fclk_by_freq(&pp_smu->pp_smu, new_clocks->fclk_khz / 1000); - pp_smu->set_hard_min_dcfclk_by_freq(&pp_smu->pp_smu, new_clocks->dcfclk_khz / 1000); - pp_smu->set_min_deep_sleep_dcfclk(&pp_smu->pp_smu, (new_clocks->dcfclk_deep_sleep_khz + 999) / 1000); + pp_smu->set_hard_min_fclk_by_freq(&pp_smu->pp_smu, khz_to_mhz_ceil(new_clocks->fclk_khz)); + pp_smu->set_hard_min_dcfclk_by_freq(&pp_smu->pp_smu, khz_to_mhz_ceil(new_clocks->dcfclk_khz)); + pp_smu->set_min_deep_sleep_dcfclk(&pp_smu->pp_smu, khz_to_mhz_ceil(new_clocks->dcfclk_deep_sleep_khz)); } } @@ -284,9 +284,9 @@ static void rv1_update_clocks(struct clk_mgr *clk_mgr_base, if (pp_smu->set_hard_min_fclk_by_freq && pp_smu->set_hard_min_dcfclk_by_freq && pp_smu->set_min_deep_sleep_dcfclk) { - pp_smu->set_hard_min_fclk_by_freq(&pp_smu->pp_smu, new_clocks->fclk_khz / 1000); - pp_smu->set_hard_min_dcfclk_by_freq(&pp_smu->pp_smu, new_clocks->dcfclk_khz / 1000); - pp_smu->set_min_deep_sleep_dcfclk(&pp_smu->pp_smu, (new_clocks->dcfclk_deep_sleep_khz + 999) / 1000); + pp_smu->set_hard_min_fclk_by_freq(&pp_smu->pp_smu, khz_to_mhz_ceil(new_clocks->fclk_khz)); + pp_smu->set_hard_min_dcfclk_by_freq(&pp_smu->pp_smu, khz_to_mhz_ceil(new_clocks->dcfclk_khz)); + pp_smu->set_min_deep_sleep_dcfclk(&pp_smu->pp_smu, khz_to_mhz_ceil(new_clocks->dcfclk_deep_sleep_khz)); } } } diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn10/rv1_clk_mgr_vbios_smu.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn10/rv1_clk_mgr_vbios_smu.c index dbc7cde00433..fe18bb9e19aa 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn10/rv1_clk_mgr_vbios_smu.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn10/rv1_clk_mgr_vbios_smu.c @@ -130,7 +130,7 @@ int rv1_vbios_smu_set_dispclk(struct clk_mgr_internal *clk_mgr, int requested_di actual_dispclk_set_mhz = rv1_vbios_smu_send_msg_with_param( clk_mgr, VBIOSSMC_MSG_SetDispclkFreq, - requested_dispclk_khz / 1000); + khz_to_mhz_ceil(requested_dispclk_khz)); if (!IS_FPGA_MAXIMUS_DC(dc->ctx->dce_environment)) { if (dmcu && dmcu->funcs->is_dmcu_initialized(dmcu)) { @@ -150,7 +150,7 @@ int rv1_vbios_smu_set_dprefclk(struct clk_mgr_internal *clk_mgr) actual_dprefclk_set_mhz = rv1_vbios_smu_send_msg_with_param( clk_mgr, VBIOSSMC_MSG_SetDprefclkFreq, - clk_mgr->base.dprefclk_khz / 1000); + khz_to_mhz_ceil(clk_mgr->base.dprefclk_khz)); /* TODO: add code for programing DP DTO, currently this is down by command table */ diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn20/dcn20_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn20/dcn20_clk_mgr.c index a5331b96f551..0d01aa9f15a6 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn20/dcn20_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn20/dcn20_clk_mgr.c @@ -253,20 +253,20 @@ void dcn2_update_clocks(struct clk_mgr *clk_mgr_base, if (should_set_clock(safe_to_lower, new_clocks->dcfclk_khz, clk_mgr_base->clks.dcfclk_khz)) { clk_mgr_base->clks.dcfclk_khz = new_clocks->dcfclk_khz; if (pp_smu && pp_smu->set_hard_min_dcfclk_by_freq) - pp_smu->set_hard_min_dcfclk_by_freq(&pp_smu->pp_smu, clk_mgr_base->clks.dcfclk_khz / 1000); + pp_smu->set_hard_min_dcfclk_by_freq(&pp_smu->pp_smu, khz_to_mhz_ceil(clk_mgr_base->clks.dcfclk_khz)); } if (should_set_clock(safe_to_lower, new_clocks->dcfclk_deep_sleep_khz, clk_mgr_base->clks.dcfclk_deep_sleep_khz)) { clk_mgr_base->clks.dcfclk_deep_sleep_khz = new_clocks->dcfclk_deep_sleep_khz; if (pp_smu && pp_smu->set_min_deep_sleep_dcfclk) - pp_smu->set_min_deep_sleep_dcfclk(&pp_smu->pp_smu, clk_mgr_base->clks.dcfclk_deep_sleep_khz / 1000); + pp_smu->set_min_deep_sleep_dcfclk(&pp_smu->pp_smu, khz_to_mhz_ceil(clk_mgr_base->clks.dcfclk_deep_sleep_khz)); } if (should_set_clock(safe_to_lower, new_clocks->socclk_khz, clk_mgr_base->clks.socclk_khz)) { clk_mgr_base->clks.socclk_khz = new_clocks->socclk_khz; if (pp_smu && pp_smu->set_hard_min_socclk_by_freq) - pp_smu->set_hard_min_socclk_by_freq(&pp_smu->pp_smu, clk_mgr_base->clks.socclk_khz / 1000); + pp_smu->set_hard_min_socclk_by_freq(&pp_smu->pp_smu, khz_to_mhz_ceil(clk_mgr_base->clks.socclk_khz)); } total_plane_count = clk_mgr_helper_get_active_plane_cnt(dc, context); @@ -281,7 +281,7 @@ void dcn2_update_clocks(struct clk_mgr *clk_mgr_base, if (should_set_clock(safe_to_lower, new_clocks->dramclk_khz, clk_mgr_base->clks.dramclk_khz)) { clk_mgr_base->clks.dramclk_khz = new_clocks->dramclk_khz; if (pp_smu && pp_smu->set_hard_min_uclk_by_freq) - pp_smu->set_hard_min_uclk_by_freq(&pp_smu->pp_smu, clk_mgr_base->clks.dramclk_khz / 1000); + pp_smu->set_hard_min_uclk_by_freq(&pp_smu->pp_smu, khz_to_mhz_ceil(clk_mgr_base->clks.dramclk_khz)); } if (should_set_clock(safe_to_lower, new_clocks->dppclk_khz, clk_mgr->base.clks.dppclk_khz)) { @@ -306,7 +306,7 @@ void dcn2_update_clocks(struct clk_mgr *clk_mgr_base, clk_mgr_base->clks.disp_dpp_voltage_level_khz = new_clocks->disp_dpp_voltage_level_khz; if (pp_smu && pp_smu->set_voltage_by_freq) - pp_smu->set_voltage_by_freq(&pp_smu->pp_smu, PP_SMU_NV_DISPCLK, clk_mgr_base->clks.disp_dpp_voltage_level_khz / 1000); + pp_smu->set_voltage_by_freq(&pp_smu->pp_smu, PP_SMU_NV_DISPCLK, khz_to_mhz_ceil(clk_mgr_base->clks.disp_dpp_voltage_level_khz)); } if (dc->config.forced_clocks == false || (force_reset && safe_to_lower)) { @@ -502,7 +502,7 @@ static void dcn2_notify_link_rate_change(struct clk_mgr *clk_mgr_base, struct dc if (max_phyclk_req != clk_mgr_base->clks.phyclk_khz) { clk_mgr_base->clks.phyclk_khz = max_phyclk_req; - pp_smu->set_voltage_by_freq(&pp_smu->pp_smu, PP_SMU_NV_PHYCLK, clk_mgr_base->clks.phyclk_khz / 1000); + pp_smu->set_voltage_by_freq(&pp_smu->pp_smu, PP_SMU_NV_PHYCLK, khz_to_mhz_ceil(clk_mgr_base->clks.phyclk_khz)); } } diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr_vbios_smu.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr_vbios_smu.c index 7deeec9d1c7c..9f7eed6688c4 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr_vbios_smu.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr_vbios_smu.c @@ -126,7 +126,7 @@ int rn_vbios_smu_set_dispclk(struct clk_mgr_internal *clk_mgr, int requested_dis actual_dispclk_set_mhz = rn_vbios_smu_send_msg_with_param( clk_mgr, VBIOSSMC_MSG_SetDispclkFreq, - requested_dispclk_khz / 1000); + khz_to_mhz_ceil(requested_dispclk_khz)); if (!IS_FPGA_MAXIMUS_DC(dc->ctx->dce_environment)) { if (dmcu && dmcu->funcs->is_dmcu_initialized(dmcu)) { @@ -138,7 +138,7 @@ int rn_vbios_smu_set_dispclk(struct clk_mgr_internal *clk_mgr, int requested_dis // pmfw always set clock more than or equal requested clock if (!IS_DIAG_DC(dc->ctx->dce_environment)) - ASSERT(actual_dispclk_set_mhz >= requested_dispclk_khz / 1000); + ASSERT(actual_dispclk_set_mhz >= khz_to_mhz_ceil(requested_dispclk_khz)); return actual_dispclk_set_mhz * 1000; } @@ -150,7 +150,7 @@ int rn_vbios_smu_set_dprefclk(struct clk_mgr_internal *clk_mgr) actual_dprefclk_set_mhz = rn_vbios_smu_send_msg_with_param( clk_mgr, VBIOSSMC_MSG_SetDprefclkFreq, - clk_mgr->base.dprefclk_khz / 1000); + khz_to_mhz_ceil(clk_mgr->base.dprefclk_khz)); /* TODO: add code for programing DP DTO, currently this is down by command table */ @@ -167,7 +167,7 @@ int rn_vbios_smu_set_hard_min_dcfclk(struct clk_mgr_internal *clk_mgr, int reque actual_dcfclk_set_mhz = rn_vbios_smu_send_msg_with_param( clk_mgr, VBIOSSMC_MSG_SetHardMinDcfclkByFreq, - requested_dcfclk_khz / 1000); + khz_to_mhz_ceil(requested_dcfclk_khz)); return actual_dcfclk_set_mhz * 1000; } @@ -182,7 +182,7 @@ int rn_vbios_smu_set_min_deep_sleep_dcfclk(struct clk_mgr_internal *clk_mgr, int actual_min_ds_dcfclk_mhz = rn_vbios_smu_send_msg_with_param( clk_mgr, VBIOSSMC_MSG_SetMinDeepSleepDcfclk, - requested_min_ds_dcfclk_khz / 1000); + khz_to_mhz_ceil(requested_min_ds_dcfclk_khz)); return actual_min_ds_dcfclk_mhz * 1000; } @@ -192,7 +192,7 @@ void rn_vbios_smu_set_phyclk(struct clk_mgr_internal *clk_mgr, int requested_phy rn_vbios_smu_send_msg_with_param( clk_mgr, VBIOSSMC_MSG_SetPhyclkVoltageByFreq, - requested_phyclk_khz / 1000); + khz_to_mhz_ceil(requested_phyclk_khz)); } int rn_vbios_smu_set_dppclk(struct clk_mgr_internal *clk_mgr, int requested_dpp_khz) @@ -203,10 +203,10 @@ int rn_vbios_smu_set_dppclk(struct clk_mgr_internal *clk_mgr, int requested_dpp_ actual_dppclk_set_mhz = rn_vbios_smu_send_msg_with_param( clk_mgr, VBIOSSMC_MSG_SetDppclkFreq, - requested_dpp_khz / 1000); + khz_to_mhz_ceil(requested_dpp_khz)); if (!IS_DIAG_DC(dc->ctx->dce_environment)) - ASSERT(actual_dppclk_set_mhz >= requested_dpp_khz / 1000); + ASSERT(actual_dppclk_set_mhz >= khz_to_mhz_ceil(requested_dpp_khz)); return actual_dppclk_set_mhz * 1000; } diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr.c index af7004b770ae..1861a147a7fa 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr.c @@ -284,12 +284,12 @@ static void dcn3_update_clocks(struct clk_mgr *clk_mgr_base, if (should_set_clock(safe_to_lower, new_clocks->dcfclk_khz, clk_mgr_base->clks.dcfclk_khz)) { clk_mgr_base->clks.dcfclk_khz = new_clocks->dcfclk_khz; - dcn30_smu_set_hard_min_by_freq(clk_mgr, PPCLK_DCEFCLK, clk_mgr_base->clks.dcfclk_khz / 1000); + dcn30_smu_set_hard_min_by_freq(clk_mgr, PPCLK_DCEFCLK, khz_to_mhz_ceil(clk_mgr_base->clks.dcfclk_khz)); } if (should_set_clock(safe_to_lower, new_clocks->dcfclk_deep_sleep_khz, clk_mgr_base->clks.dcfclk_deep_sleep_khz)) { clk_mgr_base->clks.dcfclk_deep_sleep_khz = new_clocks->dcfclk_deep_sleep_khz; - dcn30_smu_set_min_deep_sleep_dcef_clk(clk_mgr, clk_mgr_base->clks.dcfclk_deep_sleep_khz / 1000); + dcn30_smu_set_min_deep_sleep_dcef_clk(clk_mgr, khz_to_mhz_ceil(clk_mgr_base->clks.dcfclk_deep_sleep_khz)); } if (should_set_clock(safe_to_lower, new_clocks->socclk_khz, clk_mgr_base->clks.socclk_khz)) @@ -317,20 +317,20 @@ static void dcn3_update_clocks(struct clk_mgr *clk_mgr_base, /* set UCLK to requested value if P-State switching is supported, or to re-enable P-State switching */ if (clk_mgr_base->clks.p_state_change_support && (update_uclk || !clk_mgr_base->clks.prev_p_state_change_support)) - dcn30_smu_set_hard_min_by_freq(clk_mgr, PPCLK_UCLK, clk_mgr_base->clks.dramclk_khz / 1000); + dcn30_smu_set_hard_min_by_freq(clk_mgr, PPCLK_UCLK, khz_to_mhz_ceil(clk_mgr_base->clks.dramclk_khz)); if (should_set_clock(safe_to_lower, new_clocks->dppclk_khz, clk_mgr_base->clks.dppclk_khz)) { if (clk_mgr_base->clks.dppclk_khz > new_clocks->dppclk_khz) dpp_clock_lowered = true; clk_mgr_base->clks.dppclk_khz = new_clocks->dppclk_khz; - dcn30_smu_set_hard_min_by_freq(clk_mgr, PPCLK_PIXCLK, clk_mgr_base->clks.dppclk_khz / 1000); + dcn30_smu_set_hard_min_by_freq(clk_mgr, PPCLK_PIXCLK, khz_to_mhz_ceil(clk_mgr_base->clks.dppclk_khz)); update_dppclk = true; } if (should_set_clock(safe_to_lower, new_clocks->dispclk_khz, clk_mgr_base->clks.dispclk_khz)) { clk_mgr_base->clks.dispclk_khz = new_clocks->dispclk_khz; - dcn30_smu_set_hard_min_by_freq(clk_mgr, PPCLK_DISPCLK, clk_mgr_base->clks.dispclk_khz / 1000); + dcn30_smu_set_hard_min_by_freq(clk_mgr, PPCLK_DISPCLK, khz_to_mhz_ceil(clk_mgr_base->clks.dispclk_khz)); update_dispclk = true; } @@ -396,12 +396,17 @@ static void dcn3_set_hard_min_memclk(struct clk_mgr *clk_mgr_base, bool current_ if (!clk_mgr->smu_present) return; - if (current_mode) - dcn30_smu_set_hard_min_by_freq(clk_mgr, PPCLK_UCLK, - clk_mgr_base->clks.dramclk_khz / 1000); - else + if (current_mode) { + if (clk_mgr_base->clks.p_state_change_support) + dcn30_smu_set_hard_min_by_freq(clk_mgr, PPCLK_UCLK, + khz_to_mhz_ceil(clk_mgr_base->clks.dramclk_khz)); + else + dcn30_smu_set_hard_min_by_freq(clk_mgr, PPCLK_UCLK, + clk_mgr_base->bw_params->clk_table.entries[clk_mgr_base->bw_params->clk_table.num_entries - 1].memclk_mhz); + } else { dcn30_smu_set_hard_min_by_freq(clk_mgr, PPCLK_UCLK, clk_mgr_base->bw_params->clk_table.entries[0].memclk_mhz); + } } /* Set max memclk to highest DPM value */ @@ -489,7 +494,7 @@ static void dcn30_notify_link_rate_change(struct clk_mgr *clk_mgr_base, struct d if (max_phyclk_req != clk_mgr_base->clks.phyclk_khz) { clk_mgr_base->clks.phyclk_khz = max_phyclk_req; - dcn30_smu_set_hard_min_by_freq(clk_mgr, PPCLK_PHYCLK, clk_mgr_base->clks.phyclk_khz / 1000); + dcn30_smu_set_hard_min_by_freq(clk_mgr, PPCLK_PHYCLK, khz_to_mhz_ceil(clk_mgr_base->clks.phyclk_khz)); } } diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn301/dcn301_smu.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn301/dcn301_smu.c index 07774fa2c2cf..6ea642615854 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn301/dcn301_smu.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn301/dcn301_smu.c @@ -133,7 +133,7 @@ int dcn301_smu_set_dispclk(struct clk_mgr_internal *clk_mgr, int requested_dispc actual_dispclk_set_mhz = dcn301_smu_send_msg_with_param( clk_mgr, VBIOSSMC_MSG_SetDispclkFreq, - requested_dispclk_khz / 1000); + khz_to_mhz_ceil(requested_dispclk_khz)); return actual_dispclk_set_mhz * 1000; } @@ -147,7 +147,7 @@ int dcn301_smu_set_dprefclk(struct clk_mgr_internal *clk_mgr) actual_dprefclk_set_mhz = dcn301_smu_send_msg_with_param( clk_mgr, VBIOSSMC_MSG_SetDprefclkFreq, - clk_mgr->base.dprefclk_khz / 1000); + khz_to_mhz_ceil(clk_mgr->base.dprefclk_khz)); /* TODO: add code for programing DP DTO, currently this is down by command table */ @@ -163,7 +163,7 @@ int dcn301_smu_set_hard_min_dcfclk(struct clk_mgr_internal *clk_mgr, int request actual_dcfclk_set_mhz = dcn301_smu_send_msg_with_param( clk_mgr, VBIOSSMC_MSG_SetHardMinDcfclkByFreq, - requested_dcfclk_khz / 1000); + khz_to_mhz_ceil(requested_dcfclk_khz)); return actual_dcfclk_set_mhz * 1000; } @@ -177,7 +177,7 @@ int dcn301_smu_set_min_deep_sleep_dcfclk(struct clk_mgr_internal *clk_mgr, int r actual_min_ds_dcfclk_mhz = dcn301_smu_send_msg_with_param( clk_mgr, VBIOSSMC_MSG_SetMinDeepSleepDcfclk, - requested_min_ds_dcfclk_khz / 1000); + khz_to_mhz_ceil(requested_min_ds_dcfclk_khz)); return actual_min_ds_dcfclk_mhz * 1000; } @@ -191,7 +191,7 @@ int dcn301_smu_set_dppclk(struct clk_mgr_internal *clk_mgr, int requested_dpp_kh actual_dppclk_set_mhz = dcn301_smu_send_msg_with_param( clk_mgr, VBIOSSMC_MSG_SetDppclkFreq, - requested_dpp_khz / 1000); + khz_to_mhz_ceil(requested_dpp_khz)); return actual_dppclk_set_mhz * 1000; } diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn301/vg_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn301/vg_clk_mgr.c index c636b589d69d..7046da14bb2a 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn301/vg_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn301/vg_clk_mgr.c @@ -114,7 +114,7 @@ void vg_update_clocks(struct clk_mgr *clk_mgr_base, display_count = vg_get_active_display_cnt_wa(dc, context); /* if we can go lower, go lower */ - if (display_count == 0) { + if (display_count == 0 && !IS_DIAG_DC(dc->ctx->dce_environment)) { union display_idle_optimization_u idle_info = { 0 }; idle_info.idle_info.df_request_disabled = 1; diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_smu.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_smu.c index dad4a4c18bcf..8c2b77eb9459 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_smu.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_smu.c @@ -147,7 +147,7 @@ int dcn31_smu_set_dispclk(struct clk_mgr_internal *clk_mgr, int requested_dispcl actual_dispclk_set_mhz = dcn31_smu_send_msg_with_param( clk_mgr, VBIOSSMC_MSG_SetDispclkFreq, - (requested_dispclk_khz + 999) / 1000); + khz_to_mhz_ceil(requested_dispclk_khz)); return actual_dispclk_set_mhz * 1000; } @@ -162,7 +162,7 @@ int dcn31_smu_set_dprefclk(struct clk_mgr_internal *clk_mgr) actual_dprefclk_set_mhz = dcn31_smu_send_msg_with_param( clk_mgr, VBIOSSMC_MSG_SetDprefclkFreq, - (clk_mgr->base.dprefclk_khz + 999) / 1000); + khz_to_mhz_ceil(clk_mgr->base.dprefclk_khz)); /* TODO: add code for programing DP DTO, currently this is down by command table */ @@ -182,7 +182,7 @@ int dcn31_smu_set_hard_min_dcfclk(struct clk_mgr_internal *clk_mgr, int requeste actual_dcfclk_set_mhz = dcn31_smu_send_msg_with_param( clk_mgr, VBIOSSMC_MSG_SetHardMinDcfclkByFreq, - (requested_dcfclk_khz + 999) / 1000); + khz_to_mhz_ceil(requested_dcfclk_khz)); return actual_dcfclk_set_mhz * 1000; } @@ -200,7 +200,7 @@ int dcn31_smu_set_min_deep_sleep_dcfclk(struct clk_mgr_internal *clk_mgr, int re actual_min_ds_dcfclk_mhz = dcn31_smu_send_msg_with_param( clk_mgr, VBIOSSMC_MSG_SetMinDeepSleepDcfclk, - (requested_min_ds_dcfclk_khz + 999) / 1000); + khz_to_mhz_ceil(requested_min_ds_dcfclk_khz)); return actual_min_ds_dcfclk_mhz * 1000; } @@ -215,7 +215,7 @@ int dcn31_smu_set_dppclk(struct clk_mgr_internal *clk_mgr, int requested_dpp_khz actual_dppclk_set_mhz = dcn31_smu_send_msg_with_param( clk_mgr, VBIOSSMC_MSG_SetDppclkFreq, - (requested_dpp_khz + 999) / 1000); + khz_to_mhz_ceil(requested_dpp_khz)); return actual_dppclk_set_mhz * 1000; } diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index a30283fa5173..c798c65d4276 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -1481,6 +1481,22 @@ bool dc_validate_seamless_boot_timing(const struct dc *dc, return true; } +static inline bool should_update_pipe_for_stream( + struct dc_state *context, + struct pipe_ctx *pipe_ctx, + struct dc_stream_state *stream) +{ + return (pipe_ctx->stream && pipe_ctx->stream == stream); +} + +static inline bool should_update_pipe_for_plane( + struct dc_state *context, + struct pipe_ctx *pipe_ctx, + struct dc_plane_state *plane_state) +{ + return (pipe_ctx->plane_state == plane_state); +} + void dc_enable_stereo( struct dc *dc, struct dc_state *context, @@ -1491,12 +1507,15 @@ void dc_enable_stereo( struct pipe_ctx *pipe; for (i = 0; i < MAX_PIPES; i++) { - if (context != NULL) + if (context != NULL) { pipe = &context->res_ctx.pipe_ctx[i]; - else + } else { + context = dc->current_state; pipe = &dc->current_state->res_ctx.pipe_ctx[i]; - for (j = 0 ; pipe && j < stream_count; j++) { - if (streams[j] && streams[j] == pipe->stream && + } + + for (j = 0; pipe && j < stream_count; j++) { + if (should_update_pipe_for_stream(context, pipe, streams[j]) && dc->hwss.setup_stereo) dc->hwss.setup_stereo(pipe, dc); } @@ -2629,6 +2648,7 @@ static void commit_planes_for_stream(struct dc *dc, { int i, j; struct pipe_ctx *top_pipe_to_program = NULL; + bool should_lock_all_pipes = (update_type != UPDATE_TYPE_FAST); #if defined(CONFIG_DRM_AMD_DC_DCN) dc_z10_restore(dc); @@ -2700,7 +2720,7 @@ static void commit_planes_for_stream(struct dc *dc, top_pipe_to_program->stream_res.tg); } - if ((update_type != UPDATE_TYPE_FAST) && dc->hwss.interdependent_update_lock) + if (should_lock_all_pipes && dc->hwss.interdependent_update_lock) dc->hwss.interdependent_update_lock(dc, context, true); else /* Lock the top pipe while updating plane addrs, since freesync requires @@ -2723,7 +2743,7 @@ static void commit_planes_for_stream(struct dc *dc, if (dc->hwss.program_front_end_for_ctx) dc->hwss.program_front_end_for_ctx(dc, context); - if ((update_type != UPDATE_TYPE_FAST) && dc->hwss.interdependent_update_lock) + if (should_lock_all_pipes && dc->hwss.interdependent_update_lock) dc->hwss.interdependent_update_lock(dc, context, false); else dc->hwss.pipe_control_lock(dc, top_pipe_to_program, false); @@ -2739,14 +2759,14 @@ static void commit_planes_for_stream(struct dc *dc, struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[j]; if (!pipe_ctx->plane_state) continue; - if (pipe_ctx->plane_state != plane_state) + if (should_update_pipe_for_plane(context, pipe_ctx, plane_state)) continue; - plane_state->triplebuffer_flips = false; + pipe_ctx->plane_state->triplebuffer_flips = false; if (update_type == UPDATE_TYPE_FAST && dc->hwss.program_triplebuffer != NULL && - !plane_state->flip_immediate && dc->debug.enable_tri_buf) { + !pipe_ctx->plane_state->flip_immediate && dc->debug.enable_tri_buf) { /*triple buffer for VUpdate only*/ - plane_state->triplebuffer_flips = true; + pipe_ctx->plane_state->triplebuffer_flips = true; } } if (update_type == UPDATE_TYPE_FULL) { @@ -2762,8 +2782,7 @@ static void commit_planes_for_stream(struct dc *dc, if (!pipe_ctx->top_pipe && !pipe_ctx->prev_odm_pipe && - pipe_ctx->stream && - pipe_ctx->stream == stream) { + should_update_pipe_for_stream(context, pipe_ctx, stream)) { struct dc_stream_status *stream_status = NULL; if (!pipe_ctx->plane_state) @@ -2816,15 +2835,15 @@ static void commit_planes_for_stream(struct dc *dc, for (j = 0; j < dc->res_pool->pipe_count; j++) { struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[j]; - if (pipe_ctx->stream != stream) + if (!should_update_pipe_for_stream(context, pipe_ctx, stream)) continue; - if (pipe_ctx->plane_state != plane_state) + if (!should_update_pipe_for_plane(context, pipe_ctx, plane_state)) continue; // GSL has to be used for flip immediate dc->hwss.set_flip_control_gsl(pipe_ctx, - plane_state->flip_immediate); + pipe_ctx->plane_state->flip_immediate); } } @@ -2835,25 +2854,26 @@ static void commit_planes_for_stream(struct dc *dc, for (j = 0; j < dc->res_pool->pipe_count; j++) { struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[j]; - if (pipe_ctx->stream != stream) + if (!should_update_pipe_for_stream(context, pipe_ctx, stream)) continue; - if (pipe_ctx->plane_state != plane_state) + if (!should_update_pipe_for_plane(context, pipe_ctx, plane_state)) continue; + /*program triple buffer after lock based on flip type*/ if (dc->hwss.program_triplebuffer != NULL && dc->debug.enable_tri_buf) { /*only enable triplebuffer for fast_update*/ dc->hwss.program_triplebuffer( - dc, pipe_ctx, plane_state->triplebuffer_flips); + dc, pipe_ctx, pipe_ctx->plane_state->triplebuffer_flips); } - if (srf_updates[i].flip_addr) + if (pipe_ctx->plane_state->update_flags.bits.addr_update) dc->hwss.update_plane_addr(dc, pipe_ctx); } } } - if ((update_type != UPDATE_TYPE_FAST) && dc->hwss.interdependent_update_lock) + if (should_lock_all_pipes && dc->hwss.interdependent_update_lock) dc->hwss.interdependent_update_lock(dc, context, false); else dc->hwss.pipe_control_lock(dc, top_pipe_to_program, false); @@ -2897,7 +2917,7 @@ static void commit_planes_for_stream(struct dc *dc, continue; if (pipe_ctx->bottom_pipe || pipe_ctx->next_odm_pipe || - !pipe_ctx->stream || pipe_ctx->stream != stream || + !pipe_ctx->stream || !should_update_pipe_for_stream(context, pipe_ctx, stream) || !pipe_ctx->plane_state->update_flags.bits.addr_update || pipe_ctx->plane_state->skip_manual_trigger) continue; diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link.c b/drivers/gpu/drm/amd/display/dc/core/dc_link.c index 6132b645bfd1..8bd7f42a8053 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link.c @@ -541,6 +541,7 @@ static void link_disconnect_sink(struct dc_link *link) } link->dpcd_sink_count = 0; + //link->dpcd_caps.dpcd_rev.raw = 0; } static void link_disconnect_remap(struct dc_sink *prev_sink, struct dc_link *link) @@ -742,6 +743,7 @@ static bool detect_dp(struct dc_link *link, sink_caps, audio_support); link->dpcd_caps.dongle_type = sink_caps->dongle_type; + link->dpcd_caps.dpcd_rev.raw = 0; } return true; @@ -1663,6 +1665,12 @@ struct dc_link *link_create(const struct link_init_data *init_params) if (false == dc_link_construct(link, init_params)) goto construct_fail; + /* + * Must use preferred_link_setting, not reported_link_cap or verified_link_cap, + * since struct preferred_link_setting won't be reset after S3. + */ + link->preferred_link_setting.dpcd_source_device_specific_field_support = true; + return link; construct_fail: @@ -3509,61 +3517,6 @@ void dc_link_enable_hpd_filter(struct dc_link *link, bool enable) } } -uint32_t dc_bandwidth_in_kbps_from_timing( - const struct dc_crtc_timing *timing) -{ - uint32_t bits_per_channel = 0; - uint32_t kbps; - -#if defined(CONFIG_DRM_AMD_DC_DCN) - if (timing->flags.DSC) - return dc_dsc_stream_bandwidth_in_kbps(timing, - timing->dsc_cfg.bits_per_pixel, - timing->dsc_cfg.num_slices_h, - timing->dsc_cfg.is_dp); -#endif - - switch (timing->display_color_depth) { - case COLOR_DEPTH_666: - bits_per_channel = 6; - break; - case COLOR_DEPTH_888: - bits_per_channel = 8; - break; - case COLOR_DEPTH_101010: - bits_per_channel = 10; - break; - case COLOR_DEPTH_121212: - bits_per_channel = 12; - break; - case COLOR_DEPTH_141414: - bits_per_channel = 14; - break; - case COLOR_DEPTH_161616: - bits_per_channel = 16; - break; - default: - ASSERT(bits_per_channel != 0); - bits_per_channel = 8; - break; - } - - kbps = timing->pix_clk_100hz / 10; - kbps *= bits_per_channel; - - if (timing->flags.Y_ONLY != 1) { - /*Only YOnly make reduce bandwidth by 1/3 compares to RGB*/ - kbps *= 3; - if (timing->pixel_encoding == PIXEL_ENCODING_YCBCR420) - kbps /= 2; - else if (timing->pixel_encoding == PIXEL_ENCODING_YCBCR422) - kbps = kbps * 2 / 3; - } - - return kbps; - -} - void dc_link_set_drive_settings(struct dc *dc, struct link_training_settings *lt_settings, const struct dc_link *link) @@ -3769,3 +3722,58 @@ bool dc_link_should_enable_fec(const struct dc_link *link) return ret; } + +uint32_t dc_bandwidth_in_kbps_from_timing( + const struct dc_crtc_timing *timing) +{ + uint32_t bits_per_channel = 0; + uint32_t kbps; + +#if defined(CONFIG_DRM_AMD_DC_DCN) + if (timing->flags.DSC) + return dc_dsc_stream_bandwidth_in_kbps(timing, + timing->dsc_cfg.bits_per_pixel, + timing->dsc_cfg.num_slices_h, + timing->dsc_cfg.is_dp); +#endif + + switch (timing->display_color_depth) { + case COLOR_DEPTH_666: + bits_per_channel = 6; + break; + case COLOR_DEPTH_888: + bits_per_channel = 8; + break; + case COLOR_DEPTH_101010: + bits_per_channel = 10; + break; + case COLOR_DEPTH_121212: + bits_per_channel = 12; + break; + case COLOR_DEPTH_141414: + bits_per_channel = 14; + break; + case COLOR_DEPTH_161616: + bits_per_channel = 16; + break; + default: + ASSERT(bits_per_channel != 0); + bits_per_channel = 8; + break; + } + + kbps = timing->pix_clk_100hz / 10; + kbps *= bits_per_channel; + + if (timing->flags.Y_ONLY != 1) { + /*Only YOnly make reduce bandwidth by 1/3 compares to RGB*/ + kbps *= 3; + if (timing->pixel_encoding == PIXEL_ENCODING_YCBCR420) + kbps /= 2; + else if (timing->pixel_encoding == PIXEL_ENCODING_YCBCR422) + kbps = kbps * 2 / 3; + } + + return kbps; + +} diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c index a6d0fd24fd02..cd025c12f17b 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c @@ -1241,29 +1241,15 @@ enum link_training_result dp_check_link_loss_status( static inline void decide_8b_10b_training_settings( struct dc_link *link, const struct dc_link_settings *link_setting, - const struct dc_link_training_overrides *overrides, struct link_training_settings *lt_settings) { - uint32_t lane; - memset(lt_settings, '\0', sizeof(struct link_training_settings)); /* Initialize link settings */ lt_settings->link_settings.use_link_rate_set = link_setting->use_link_rate_set; lt_settings->link_settings.link_rate_set = link_setting->link_rate_set; - - if (link->preferred_link_setting.link_rate != LINK_RATE_UNKNOWN) - lt_settings->link_settings.link_rate = link->preferred_link_setting.link_rate; - else - lt_settings->link_settings.link_rate = link_setting->link_rate; - - if (link->preferred_link_setting.lane_count != LANE_COUNT_UNKNOWN) - lt_settings->link_settings.lane_count = link->preferred_link_setting.lane_count; - else - lt_settings->link_settings.lane_count = link_setting->lane_count; - - /*@todo[vdevulap] move SS to LS, should not be handled by displaypath*/ - + lt_settings->link_settings.link_rate = link_setting->link_rate; + lt_settings->link_settings.lane_count = link_setting->lane_count; /* TODO hard coded to SS for now * lt_settings.link_settings.link_spread = * dal_display_path_is_ss_supported( @@ -1271,30 +1257,52 @@ static inline void decide_8b_10b_training_settings( * LINK_SPREAD_05_DOWNSPREAD_30KHZ : * LINK_SPREAD_DISABLED; */ - /* Initialize link spread */ - if (link->dp_ss_off) - lt_settings->link_settings.link_spread = LINK_SPREAD_DISABLED; - else if (overrides->downspread != NULL) - lt_settings->link_settings.link_spread - = *overrides->downspread - ? LINK_SPREAD_05_DOWNSPREAD_30KHZ - : LINK_SPREAD_DISABLED; - else - lt_settings->link_settings.link_spread = LINK_SPREAD_05_DOWNSPREAD_30KHZ; - + lt_settings->link_settings.link_spread = link->dp_ss_off ? + LINK_SPREAD_DISABLED : LINK_SPREAD_05_DOWNSPREAD_30KHZ; lt_settings->lttpr_mode = link->lttpr_mode; + lt_settings->cr_pattern_time = get_cr_training_aux_rd_interval(link, link_setting); + lt_settings->eq_pattern_time = get_eq_training_aux_rd_interval(link, link_setting); + lt_settings->pattern_for_cr = decide_cr_training_pattern(link_setting); + lt_settings->pattern_for_eq = decide_eq_training_pattern(link, link_setting); + lt_settings->enhanced_framing = 1; + lt_settings->should_set_fec_ready = true; +} + +void dp_decide_training_settings( + struct dc_link *link, + const struct dc_link_settings *link_settings, + struct link_training_settings *lt_settings) +{ + if (dp_get_link_encoding_format(link_settings) == DP_8b_10b_ENCODING) + decide_8b_10b_training_settings(link, link_settings, lt_settings); +} + +static void override_training_settings( + struct dc_link *link, + const struct dc_link_training_overrides *overrides, + struct link_training_settings *lt_settings) +{ + uint32_t lane; + + /* Override link settings */ + if (link->preferred_link_setting.link_rate != LINK_RATE_UNKNOWN) + lt_settings->link_settings.link_rate = link->preferred_link_setting.link_rate; + if (link->preferred_link_setting.lane_count != LANE_COUNT_UNKNOWN) + lt_settings->link_settings.lane_count = link->preferred_link_setting.lane_count; + + /* Override link spread */ + if (!link->dp_ss_off && overrides->downspread != NULL) + lt_settings->link_settings.link_spread = *overrides->downspread ? + LINK_SPREAD_05_DOWNSPREAD_30KHZ + : LINK_SPREAD_DISABLED; - /* Initialize lane settings overrides */ + /* Override lane settings */ if (overrides->voltage_swing != NULL) lt_settings->voltage_swing = overrides->voltage_swing; - if (overrides->pre_emphasis != NULL) lt_settings->pre_emphasis = overrides->pre_emphasis; - if (overrides->post_cursor2 != NULL) lt_settings->post_cursor2 = overrides->post_cursor2; - - /* Initialize lane settings (VS/PE/PC2) */ for (lane = 0; lane < LANE_COUNT_DP_MAX; lane++) { lt_settings->lane_settings[lane].VOLTAGE_SWING = lt_settings->voltage_swing != NULL ? @@ -1313,45 +1321,22 @@ static inline void decide_8b_10b_training_settings( /* Initialize training timings */ if (overrides->cr_pattern_time != NULL) lt_settings->cr_pattern_time = *overrides->cr_pattern_time; - else - lt_settings->cr_pattern_time = get_cr_training_aux_rd_interval(link, link_setting); if (overrides->eq_pattern_time != NULL) lt_settings->eq_pattern_time = *overrides->eq_pattern_time; - else - lt_settings->eq_pattern_time = get_eq_training_aux_rd_interval(link, link_setting); if (overrides->pattern_for_cr != NULL) lt_settings->pattern_for_cr = *overrides->pattern_for_cr; - else - lt_settings->pattern_for_cr = decide_cr_training_pattern(link_setting); if (overrides->pattern_for_eq != NULL) lt_settings->pattern_for_eq = *overrides->pattern_for_eq; - else - lt_settings->pattern_for_eq = decide_eq_training_pattern(link, link_setting); if (overrides->enhanced_framing != NULL) lt_settings->enhanced_framing = *overrides->enhanced_framing; - else - lt_settings->enhanced_framing = 1; if (link->preferred_training_settings.fec_enable != NULL) lt_settings->should_set_fec_ready = *link->preferred_training_settings.fec_enable; - else - lt_settings->should_set_fec_ready = true; -} - -void dp_decide_training_settings( - struct dc_link *link, - const struct dc_link_settings *link_settings, - const struct dc_link_training_overrides *overrides, - struct link_training_settings *lt_settings) -{ - if (dp_get_link_encoding_format(link_settings) == DP_8b_10b_ENCODING) - decide_8b_10b_training_settings(link, link_settings, overrides, lt_settings); } - uint8_t dp_convert_to_count(uint8_t lttpr_repeater_count) { switch (lttpr_repeater_count) { @@ -1581,6 +1566,9 @@ bool dc_link_dp_perform_link_training_skip_aux( dp_decide_training_settings( link, link_setting, + <_settings); + override_training_settings( + link, &link->preferred_training_settings, <_settings); @@ -1727,6 +1715,9 @@ enum link_training_result dc_link_dp_perform_link_training( dp_decide_training_settings( link, link_settings, + <_settings); + override_training_settings( + link, &link->preferred_training_settings, <_settings); @@ -1939,11 +1930,13 @@ enum link_training_result dc_link_dp_sync_lt_attempt( bool fec_enable = false; dp_decide_training_settings( - link, - link_settings, - lt_overrides, - <_settings); - + link, + link_settings, + <_settings); + override_training_settings( + link, + lt_overrides, + <_settings); /* Setup MST Mode */ if (lt_overrides->mst_enable) set_dp_mst_mode(link, *lt_overrides->mst_enable); @@ -4776,10 +4769,18 @@ void dpcd_set_source_specific_data(struct dc_link *link) uint8_t hblank_size = (uint8_t)link->dc->caps.min_horizontal_blanking_period; - result_write_min_hblank = core_link_write_dpcd(link, - DP_SOURCE_MINIMUM_HBLANK_SUPPORTED, (uint8_t *)(&hblank_size), - sizeof(hblank_size)); + if (link->preferred_link_setting.dpcd_source_device_specific_field_support) { + result_write_min_hblank = core_link_write_dpcd(link, + DP_SOURCE_MINIMUM_HBLANK_SUPPORTED, (uint8_t *)(&hblank_size), + sizeof(hblank_size)); + + if (result_write_min_hblank == DC_ERROR_UNEXPECTED) + link->preferred_link_setting.dpcd_source_device_specific_field_support = false; + } else { + DC_LOG_DC("Sink device does not support 00340h DPCD write. Skipping on purpose.\n"); + } } + DC_TRACE_LEVEL_MESSAGE(DAL_TRACE_LEVEL_INFORMATION, WPP_BIT_FLAG_DC_DETECTION_DP_CAPS, "result=%u link_index=%u enum dce_version=%d DPCD=0x%04X min_hblank=%u branch_dev_id=0x%x branch_dev_name='%c%c%c%c%c%c'", diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_dpcd.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_dpcd.c index fe234760a0f5..72970e49800a 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link_dpcd.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_dpcd.c @@ -1,3 +1,28 @@ +/* + * Copyright 2021 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + #include <inc/core_status.h> #include <dc_link.h> #include <inc/link_hwss.h> diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_enc_cfg.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_enc_cfg.c index 1a89d565c92e..de80a9ea4cfa 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link_enc_cfg.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_enc_cfg.c @@ -305,7 +305,7 @@ struct link_encoder *link_enc_cfg_get_next_avail_link_enc( const struct dc_state *state) { struct link_encoder *link_enc = NULL; - enum engine_id eng_id = ENGINE_ID_UNKNOWN; + enum engine_id eng_id; eng_id = find_first_avail_link_enc(dc->ctx, state); if (eng_id != ENGINE_ID_UNKNOWN) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c index 1596f6b7fed7..a60396d5be44 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c @@ -1030,7 +1030,7 @@ bool resource_build_scaling_params(struct pipe_ctx *pipe_ctx) /* Timing borders are part of vactive that we are also supposed to skip in addition * to any stream dst offset. Since dm logic assumes dst is in addressable - * space we need to add the the left and top borders to dst offsets temporarily. + * space we need to add the left and top borders to dst offsets temporarily. * TODO: fix in DM, stream dst is supposed to be in vactive */ pipe_ctx->stream->dst.x += timing->h_border_left; @@ -1051,6 +1051,11 @@ bool resource_build_scaling_params(struct pipe_ctx *pipe_ctx) /* depends on scaling ratios and recout, does not calculate offset yet */ calculate_viewport_size(pipe_ctx); + /* Stopgap for validation of ODM + MPO on one side of screen case */ + if (pipe_ctx->plane_res.scl_data.viewport.height < 1 || + pipe_ctx->plane_res.scl_data.viewport.width < 1) + return false; + /* * LB calculations depend on vp size, h/v_active and scaling ratios * Setting line buffer pixel depth to 24bpp yields banding diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_stat.c b/drivers/gpu/drm/amd/display/dc/core/dc_stat.c index 31761f3595a6..28ef9760fa34 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_stat.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_stat.c @@ -62,3 +62,27 @@ void dc_stat_get_dmub_notification(const struct dc *dc, struct dmub_notification status = dmub_srv_stat_get_notification(dmub, notify); ASSERT(status == DMUB_STATUS_OK); } + +/** + ***************************************************************************** + * Function: dc_stat_get_dmub_dataout + * + * @brief + * Calls dmub layer to retrieve dmub gpint dataout + * + * @param + * [in] dc: dc structure + * [in] dataout: dmub gpint dataout + * + * @return + * None + ***************************************************************************** + */ +void dc_stat_get_dmub_dataout(const struct dc *dc, uint32_t *dataout) +{ + struct dmub_srv *dmub = dc->ctx->dmub_srv->dmub; + enum dmub_status status; + + status = dmub_srv_get_gpint_dataout(dmub, dataout); + ASSERT(status == DMUB_STATUS_OK); +} diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_stream.c b/drivers/gpu/drm/amd/display/dc/core/dc_stream.c index 45931ee14a6e..f0f54f4d3d9b 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_stream.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_stream.c @@ -221,6 +221,9 @@ struct dc_stream_status *dc_stream_get_status_from_state( { uint8_t i; + if (state == NULL) + return NULL; + for (i = 0; i < state->stream_count; i++) { if (stream == state->streams[i]) return &state->stream_status[i]; @@ -243,6 +246,40 @@ struct dc_stream_status *dc_stream_get_status( return dc_stream_get_status_from_state(dc->current_state, stream); } +static void program_cursor_attributes( + struct dc *dc, + struct dc_stream_state *stream, + const struct dc_cursor_attributes *attributes) +{ + int i; + struct resource_context *res_ctx; + struct pipe_ctx *pipe_to_program = NULL; + + if (!stream) + return; + + res_ctx = &dc->current_state->res_ctx; + + for (i = 0; i < MAX_PIPES; i++) { + struct pipe_ctx *pipe_ctx = &res_ctx->pipe_ctx[i]; + + if (pipe_ctx->stream != stream) + continue; + + if (!pipe_to_program) { + pipe_to_program = pipe_ctx; + dc->hwss.cursor_lock(dc, pipe_to_program, true); + } + + dc->hwss.set_cursor_attribute(pipe_ctx); + if (dc->hwss.set_cursor_sdr_white_level) + dc->hwss.set_cursor_sdr_white_level(pipe_ctx); + } + + if (pipe_to_program) + dc->hwss.cursor_lock(dc, pipe_to_program, false); +} + #ifndef TRIM_FSFT /* * dc_optimize_timing_for_fsft() - dc to optimize timing @@ -267,10 +304,7 @@ bool dc_stream_set_cursor_attributes( struct dc_stream_state *stream, const struct dc_cursor_attributes *attributes) { - int i; struct dc *dc; - struct resource_context *res_ctx; - struct pipe_ctx *pipe_to_program = NULL; #if defined(CONFIG_DRM_AMD_DC_DCN) bool reset_idle_optimizations = false; #endif @@ -290,7 +324,6 @@ bool dc_stream_set_cursor_attributes( } dc = stream->ctx->dc; - res_ctx = &dc->current_state->res_ctx; stream->cursor_attributes = *attributes; #if defined(CONFIG_DRM_AMD_DC_DCN) @@ -302,11 +335,39 @@ bool dc_stream_set_cursor_attributes( } #endif + program_cursor_attributes(dc, stream, attributes); + +#if defined(CONFIG_DRM_AMD_DC_DCN) + /* re-enable idle optimizations if necessary */ + if (reset_idle_optimizations) + dc_allow_idle_optimizations(dc, true); + +#endif + return true; +} + +static void program_cursor_position( + struct dc *dc, + struct dc_stream_state *stream, + const struct dc_cursor_position *position) +{ + int i; + struct resource_context *res_ctx; + struct pipe_ctx *pipe_to_program = NULL; + + if (!stream) + return; + + res_ctx = &dc->current_state->res_ctx; for (i = 0; i < MAX_PIPES; i++) { struct pipe_ctx *pipe_ctx = &res_ctx->pipe_ctx[i]; - if (pipe_ctx->stream != stream) + if (pipe_ctx->stream != stream || + (!pipe_ctx->plane_res.mi && !pipe_ctx->plane_res.hubp) || + !pipe_ctx->plane_state || + (!pipe_ctx->plane_res.xfm && !pipe_ctx->plane_res.dpp) || + (!pipe_ctx->plane_res.ipp && !pipe_ctx->plane_res.dpp)) continue; if (!pipe_to_program) { @@ -314,31 +375,18 @@ bool dc_stream_set_cursor_attributes( dc->hwss.cursor_lock(dc, pipe_to_program, true); } - dc->hwss.set_cursor_attribute(pipe_ctx); - if (dc->hwss.set_cursor_sdr_white_level) - dc->hwss.set_cursor_sdr_white_level(pipe_ctx); + dc->hwss.set_cursor_position(pipe_ctx); } if (pipe_to_program) dc->hwss.cursor_lock(dc, pipe_to_program, false); - -#if defined(CONFIG_DRM_AMD_DC_DCN) - /* re-enable idle optimizations if necessary */ - if (reset_idle_optimizations) - dc_allow_idle_optimizations(dc, true); - -#endif - return true; } bool dc_stream_set_cursor_position( struct dc_stream_state *stream, const struct dc_cursor_position *position) { - int i; struct dc *dc; - struct resource_context *res_ctx; - struct pipe_ctx *pipe_to_program = NULL; #if defined(CONFIG_DRM_AMD_DC_DCN) bool reset_idle_optimizations = false; #endif @@ -354,7 +402,6 @@ bool dc_stream_set_cursor_position( } dc = stream->ctx->dc; - res_ctx = &dc->current_state->res_ctx; #if defined(CONFIG_DRM_AMD_DC_DCN) dc_z10_restore(dc); @@ -367,27 +414,7 @@ bool dc_stream_set_cursor_position( #endif stream->cursor_position = *position; - for (i = 0; i < MAX_PIPES; i++) { - struct pipe_ctx *pipe_ctx = &res_ctx->pipe_ctx[i]; - - if (pipe_ctx->stream != stream || - (!pipe_ctx->plane_res.mi && !pipe_ctx->plane_res.hubp) || - !pipe_ctx->plane_state || - (!pipe_ctx->plane_res.xfm && !pipe_ctx->plane_res.dpp) || - (!pipe_ctx->plane_res.ipp && !pipe_ctx->plane_res.dpp)) - continue; - - if (!pipe_to_program) { - pipe_to_program = pipe_ctx; - dc->hwss.cursor_lock(dc, pipe_to_program, true); - } - - dc->hwss.set_cursor_position(pipe_ctx); - } - - if (pipe_to_program) - dc->hwss.cursor_lock(dc, pipe_to_program, false); - + program_cursor_position(dc, stream, position); #if defined(CONFIG_DRM_AMD_DC_DCN) /* re-enable idle optimizations if necessary */ if (reset_idle_optimizations) diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index 21d78289b048..3ab52d9a82cf 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -45,7 +45,7 @@ /* forward declaration */ struct aux_payload; -#define DC_VER "3.2.141" +#define DC_VER "3.2.149" #define MAX_SURFACES 3 #define MAX_PLANES 6 @@ -460,7 +460,65 @@ union mem_low_power_enable_options { uint32_t u32All; }; +struct dc_debug_data { + uint32_t ltFailCount; + uint32_t i2cErrorCount; + uint32_t auxErrorCount; +}; + +struct dc_phy_addr_space_config { + struct { + uint64_t start_addr; + uint64_t end_addr; + uint64_t fb_top; + uint64_t fb_offset; + uint64_t fb_base; + uint64_t agp_top; + uint64_t agp_bot; + uint64_t agp_base; + } system_aperture; + + struct { + uint64_t page_table_start_addr; + uint64_t page_table_end_addr; + uint64_t page_table_base_addr; + bool base_addr_is_mc_addr; + } gart_config; + + bool valid; + bool is_hvm_enabled; + uint64_t page_table_default_page_addr; +}; + +struct dc_virtual_addr_space_config { + uint64_t page_table_base_addr; + uint64_t page_table_start_addr; + uint64_t page_table_end_addr; + uint32_t page_table_block_size_in_bytes; + uint8_t page_table_depth; // 1 = 1 level, 2 = 2 level, etc. 0 = invalid +}; + +struct dc_bounding_box_overrides { + int sr_exit_time_ns; + int sr_enter_plus_exit_time_ns; + int urgent_latency_ns; + int percent_of_ideal_drambw; + int dram_clock_change_latency_ns; + int dummy_clock_change_latency_ns; + /* This forces a hard min on the DCFCLK we use + * for DML. Unlike the debug option for forcing + * DCFCLK, this override affects watermark calculations + */ + int min_dcfclk_mhz; +}; + +struct dc_state; +struct resource_pool; +struct dce_hwseq; + struct dc_debug_options { + bool native422_support; + bool disable_dsc; enum visual_confirm visual_confirm; bool sanity_checks; bool max_disp_clk; @@ -486,7 +544,6 @@ struct dc_debug_options { bool disable_dsc_power_gate; int dsc_min_slice_height_override; int dsc_bpp_increment_div; - bool native422_support; bool disable_pplib_wm_range; enum wm_report_mode pplib_wm_report_mode; unsigned int min_disp_clk_khz; @@ -556,7 +613,6 @@ struct dc_debug_options { bool validate_dml_output; bool enable_dmcub_surface_flip; bool usbc_combo_phy_reset_wa; - bool disable_dsc; bool enable_dram_clock_change_one_display_vactive; union mem_low_power_enable_options enable_mem_low_power; bool force_vblank_alignment; @@ -574,69 +630,13 @@ struct dc_debug_options { #endif }; -struct dc_debug_data { - uint32_t ltFailCount; - uint32_t i2cErrorCount; - uint32_t auxErrorCount; -}; - -struct dc_phy_addr_space_config { - struct { - uint64_t start_addr; - uint64_t end_addr; - uint64_t fb_top; - uint64_t fb_offset; - uint64_t fb_base; - uint64_t agp_top; - uint64_t agp_bot; - uint64_t agp_base; - } system_aperture; - - struct { - uint64_t page_table_start_addr; - uint64_t page_table_end_addr; - uint64_t page_table_base_addr; -#if defined(CONFIG_DRM_AMD_DC_DCN) - bool base_addr_is_mc_addr; -#endif - } gart_config; - - bool valid; - bool is_hvm_enabled; - uint64_t page_table_default_page_addr; -}; - -struct dc_virtual_addr_space_config { - uint64_t page_table_base_addr; - uint64_t page_table_start_addr; - uint64_t page_table_end_addr; - uint32_t page_table_block_size_in_bytes; - uint8_t page_table_depth; // 1 = 1 level, 2 = 2 level, etc. 0 = invalid -}; - -struct dc_bounding_box_overrides { - int sr_exit_time_ns; - int sr_enter_plus_exit_time_ns; - int urgent_latency_ns; - int percent_of_ideal_drambw; - int dram_clock_change_latency_ns; - int dummy_clock_change_latency_ns; - /* This forces a hard min on the DCFCLK we use - * for DML. Unlike the debug option for forcing - * DCFCLK, this override affects watermark calculations - */ - int min_dcfclk_mhz; -}; - -struct resource_pool; -struct dce_hwseq; struct gpu_info_soc_bounding_box_v1_0; struct dc { + struct dc_debug_options debug; struct dc_versions versions; struct dc_caps caps; struct dc_cap_funcs cap_funcs; struct dc_config config; - struct dc_debug_options debug; struct dc_bounding_box_overrides bb_overrides; struct dc_bug_wa work_arounds; struct dc_context *ctx; diff --git a/drivers/gpu/drm/amd/display/dc/dc_dp_types.h b/drivers/gpu/drm/amd/display/dc/dc_dp_types.h index 1948cd9427d7..4f54bde1bb1c 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_dp_types.h +++ b/drivers/gpu/drm/amd/display/dc/dc_dp_types.h @@ -109,6 +109,7 @@ struct dc_link_settings { enum dc_link_spread link_spread; bool use_link_rate_set; uint8_t link_rate_set; + bool dpcd_source_device_specific_field_support; }; struct dc_lane_settings { diff --git a/drivers/gpu/drm/amd/display/dc/dc_stat.h b/drivers/gpu/drm/amd/display/dc/dc_stat.h index 2a000ba54ddb..aacbfd786c6c 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_stat.h +++ b/drivers/gpu/drm/amd/display/dc/dc_stat.h @@ -38,5 +38,6 @@ #include "dmub/dmub_srv.h" void dc_stat_get_dmub_notification(const struct dc *dc, struct dmub_notification *notify); +void dc_stat_get_dmub_dataout(const struct dc *dc, uint32_t *dataout); #endif /* _DC_STAT_H_ */ diff --git a/drivers/gpu/drm/amd/display/dc/dc_trace.h b/drivers/gpu/drm/amd/display/dc/dc_trace.h index d2615357269b..c711797e5c9e 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_trace.h +++ b/drivers/gpu/drm/amd/display/dc/dc_trace.h @@ -37,3 +37,6 @@ #define TRACE_DCN_CLOCK_STATE(dcn_clocks) \ trace_amdgpu_dm_dc_clocks_state(dcn_clocks) + +#define TRACE_DCN_FPU(begin, function, line, ref_count) \ + trace_dcn_fpu(begin, function, line, ref_count) diff --git a/drivers/gpu/drm/amd/display/dc/dc_types.h b/drivers/gpu/drm/amd/display/dc/dc_types.h index 8016e22114ce..c1532930169b 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_types.h +++ b/drivers/gpu/drm/amd/display/dc/dc_types.h @@ -75,18 +75,6 @@ enum dce_environment { #define IS_DIAG_DC(dce_environment) \ (IS_FPGA_MAXIMUS_DC(dce_environment) || (dce_environment == DCE_ENV_DIAG)) -struct hw_asic_id { - uint32_t chip_id; - uint32_t chip_family; - uint32_t pci_revision_id; - uint32_t hw_internal_rev; - uint32_t vram_type; - uint32_t vram_width; - uint32_t feature_flags; - uint32_t fake_paths_num; - void *atombios_base_address; -}; - struct dc_perf_trace { unsigned long read_count; unsigned long write_count; @@ -94,36 +82,7 @@ struct dc_perf_trace { unsigned long last_entry_write; }; -struct dc_context { - struct dc *dc; - - void *driver_context; /* e.g. amdgpu_device */ - struct dc_perf_trace *perf_trace; - void *cgs_device; - - enum dce_environment dce_environment; - struct hw_asic_id asic_id; - - /* todo: below should probably move to dc. to facilitate removal - * of AS we will store these here - */ - enum dce_version dce_version; - struct dc_bios *dc_bios; - bool created_bios; - struct gpio_service *gpio_service; - uint32_t dc_sink_id_count; - uint32_t dc_stream_id_count; - uint32_t dc_edp_id_count; - uint64_t fbc_gpu_addr; - struct dc_dmub_srv *dmub_srv; - -#ifdef CONFIG_DRM_AMD_DC_HDCP - struct cp_psp cp_psp; -#endif -}; - - -#define DC_MAX_EDID_BUFFER_SIZE 1280 +#define DC_MAX_EDID_BUFFER_SIZE 2048 #define DC_EDID_BLOCK_SIZE 128 #define MAX_SURFACE_NUM 4 #define NUM_PIXEL_FORMATS 10 @@ -836,6 +795,46 @@ struct dc_clock_config { uint32_t current_clock_khz;/*current clock in use*/ }; +struct hw_asic_id { + uint32_t chip_id; + uint32_t chip_family; + uint32_t pci_revision_id; + uint32_t hw_internal_rev; + uint32_t vram_type; + uint32_t vram_width; + uint32_t feature_flags; + uint32_t fake_paths_num; + void *atombios_base_address; +}; + +struct dc_context { + struct dc *dc; + + void *driver_context; /* e.g. amdgpu_device */ + struct dc_perf_trace *perf_trace; + void *cgs_device; + + enum dce_environment dce_environment; + struct hw_asic_id asic_id; + + /* todo: below should probably move to dc. to facilitate removal + * of AS we will store these here + */ + enum dce_version dce_version; + struct dc_bios *dc_bios; + bool created_bios; + struct gpio_service *gpio_service; + uint32_t dc_sink_id_count; + uint32_t dc_stream_id_count; + uint32_t dc_edp_id_count; + uint64_t fbc_gpu_addr; + struct dc_dmub_srv *dmub_srv; +#ifdef CONFIG_DRM_AMD_DC_HDCP + struct cp_psp cp_psp; +#endif + +}; + /* DSC DPCD capabilities */ union dsc_slice_caps1 { struct { diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_aux.c b/drivers/gpu/drm/amd/display/dc/dce/dce_aux.c index 2fb88e54a4bf..e14f99b4b0c3 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_aux.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_aux.c @@ -42,6 +42,11 @@ #define DC_LOGGER \ engine->ctx->logger +#define DC_TRACE_LEVEL_MESSAGE(...) /* do nothing */ +#define IS_DC_I2CAUX_LOGGING_ENABLED() (false) +#define LOG_FLAG_Error_I2cAux LOG_ERROR +#define LOG_FLAG_I2cAux_DceAux LOG_I2C_AUX + #include "reg_helper.h" #undef FN @@ -71,6 +76,8 @@ enum { #define DEFAULT_AUX_ENGINE_MULT 0 #define DEFAULT_AUX_ENGINE_LENGTH 69 +#define DC_TRACE_LEVEL_MESSAGE(...) /* do nothing */ + static void release_engine( struct dce_aux *engine) { @@ -621,6 +628,58 @@ int dce_aux_transfer_dmub_raw(struct ddc_service *ddc, #define AUX_MAX_INVALID_REPLY_RETRIES 2 #define AUX_MAX_TIMEOUT_RETRIES 3 +static void dce_aux_log_payload(const char *payload_name, + unsigned char *payload, uint32_t length, uint32_t max_length_to_log) +{ + if (!IS_DC_I2CAUX_LOGGING_ENABLED()) + return; + + if (payload && length) { + char hex_str[128] = {0}; + char *hex_str_ptr = &hex_str[0]; + uint32_t hex_str_remaining = sizeof(hex_str); + unsigned char *payload_ptr = payload; + unsigned char *payload_max_to_log_ptr = payload_ptr + min(max_length_to_log, length); + unsigned int count; + char *padding = ""; + + while (payload_ptr < payload_max_to_log_ptr) { + count = snprintf_count(hex_str_ptr, hex_str_remaining, "%s%02X", padding, *payload_ptr); + padding = " "; + hex_str_remaining -= count; + hex_str_ptr += count; + payload_ptr++; + } + + count = snprintf_count(hex_str_ptr, hex_str_remaining, " "); + hex_str_remaining -= count; + hex_str_ptr += count; + + payload_ptr = payload; + while (payload_ptr < payload_max_to_log_ptr) { + count = snprintf_count(hex_str_ptr, hex_str_remaining, "%c", + *payload_ptr >= ' ' ? *payload_ptr : '.'); + hex_str_remaining -= count; + hex_str_ptr += count; + payload_ptr++; + } + + DC_TRACE_LEVEL_MESSAGE(DAL_TRACE_LEVEL_VERBOSE, + LOG_FLAG_I2cAux_DceAux, + "dce_aux_log_payload: %s: length=%u: data: %s%s", + payload_name, + length, + hex_str, + (length > max_length_to_log ? " (...)" : " ")); + } else { + DC_TRACE_LEVEL_MESSAGE(DAL_TRACE_LEVEL_VERBOSE, + LOG_FLAG_I2cAux_DceAux, + "dce_aux_log_payload: %s: length=%u: data: <empty payload>", + payload_name, + length); + } +} + bool dce_aux_transfer_with_retries(struct ddc_service *ddc, struct aux_payload *payload) { @@ -646,7 +705,34 @@ bool dce_aux_transfer_with_retries(struct ddc_service *ddc, } for (i = 0; i < AUX_MAX_RETRIES; i++) { + DC_TRACE_LEVEL_MESSAGE(DAL_TRACE_LEVEL_INFORMATION, + LOG_FLAG_I2cAux_DceAux, + "dce_aux_transfer_with_retries: link_index=%u: START: retry %d of %d: address=0x%04x length=%u write=%d mot=%d", + ddc && ddc->link ? ddc->link->link_index : UINT_MAX, + i + 1, + (int)AUX_MAX_RETRIES, + payload->address, + payload->length, + (unsigned int) payload->write, + (unsigned int) payload->mot); + if (payload->write) + dce_aux_log_payload(" write", payload->data, payload->length, 16); ret = dce_aux_transfer_raw(ddc, payload, &operation_result); + DC_TRACE_LEVEL_MESSAGE(DAL_TRACE_LEVEL_INFORMATION, + LOG_FLAG_I2cAux_DceAux, + "dce_aux_transfer_with_retries: link_index=%u: END: retry %d of %d: address=0x%04x length=%u write=%d mot=%d: ret=%d operation_result=%d payload->reply=%u", + ddc && ddc->link ? ddc->link->link_index : UINT_MAX, + i + 1, + (int)AUX_MAX_RETRIES, + payload->address, + payload->length, + (unsigned int) payload->write, + (unsigned int) payload->mot, + ret, + (int)operation_result, + (unsigned int) *payload->reply); + if (!payload->write) + dce_aux_log_payload(" read", payload->data, ret > 0 ? ret : 0, 16); switch (operation_result) { case AUX_RET_SUCCESS: @@ -655,30 +741,64 @@ bool dce_aux_transfer_with_retries(struct ddc_service *ddc, switch (*payload->reply) { case AUX_TRANSACTION_REPLY_AUX_ACK: + DC_TRACE_LEVEL_MESSAGE(DAL_TRACE_LEVEL_INFORMATION, + LOG_FLAG_I2cAux_DceAux, + "dce_aux_transfer_with_retries: AUX_RET_SUCCESS: AUX_TRANSACTION_REPLY_AUX_ACK"); if (!payload->write && payload->length != ret) { - if (++aux_ack_retries >= AUX_MAX_RETRIES) + if (++aux_ack_retries >= AUX_MAX_RETRIES) { + DC_TRACE_LEVEL_MESSAGE(DAL_TRACE_LEVEL_ERROR, + LOG_FLAG_Error_I2cAux, + "dce_aux_transfer_with_retries: FAILURE: aux_ack_retries=%d >= AUX_MAX_RETRIES=%d", + aux_defer_retries, + AUX_MAX_RETRIES); goto fail; - else + } else { udelay(300); + } } else return true; break; case AUX_TRANSACTION_REPLY_AUX_DEFER: + DC_TRACE_LEVEL_MESSAGE(DAL_TRACE_LEVEL_INFORMATION, + LOG_FLAG_I2cAux_DceAux, + "dce_aux_transfer_with_retries: AUX_RET_SUCCESS: AUX_TRANSACTION_REPLY_AUX_DEFER"); + /* polling_timeout_period is in us */ defer_time_in_ms += aux110->polling_timeout_period / 1000; ++aux_defer_retries; fallthrough; case AUX_TRANSACTION_REPLY_I2C_OVER_AUX_DEFER: + if (*payload->reply == AUX_TRANSACTION_REPLY_I2C_OVER_AUX_DEFER) + DC_TRACE_LEVEL_MESSAGE(DAL_TRACE_LEVEL_INFORMATION, + LOG_FLAG_I2cAux_DceAux, + "dce_aux_transfer_with_retries: AUX_RET_SUCCESS: AUX_TRANSACTION_REPLY_I2C_OVER_AUX_DEFER"); + retry_on_defer = true; fallthrough; case AUX_TRANSACTION_REPLY_I2C_OVER_AUX_NACK: + if (*payload->reply == AUX_TRANSACTION_REPLY_I2C_OVER_AUX_NACK) + DC_TRACE_LEVEL_MESSAGE(DAL_TRACE_LEVEL_INFORMATION, + LOG_FLAG_I2cAux_DceAux, + "dce_aux_transfer_with_retries: AUX_RET_SUCCESS: AUX_TRANSACTION_REPLY_I2C_OVER_AUX_NACK"); + if (aux_defer_retries >= AUX_MIN_DEFER_RETRIES && defer_time_in_ms >= AUX_MAX_DEFER_TIMEOUT_MS) { + DC_TRACE_LEVEL_MESSAGE(DAL_TRACE_LEVEL_ERROR, + LOG_FLAG_Error_I2cAux, + "dce_aux_transfer_with_retries: FAILURE: aux_defer_retries=%d >= AUX_MIN_DEFER_RETRIES=%d && defer_time_in_ms=%d >= AUX_MAX_DEFER_TIMEOUT_MS=%d", + aux_defer_retries, + AUX_MIN_DEFER_RETRIES, + defer_time_in_ms, + AUX_MAX_DEFER_TIMEOUT_MS); goto fail; } else { if ((*payload->reply == AUX_TRANSACTION_REPLY_AUX_DEFER) || (*payload->reply == AUX_TRANSACTION_REPLY_I2C_OVER_AUX_DEFER)) { + DC_TRACE_LEVEL_MESSAGE(DAL_TRACE_LEVEL_INFORMATION, + LOG_FLAG_I2cAux_DceAux, + "dce_aux_transfer_with_retries: payload->defer_delay=%u", + payload->defer_delay); if (payload->defer_delay > 1) { msleep(payload->defer_delay); defer_time_in_ms += payload->defer_delay; @@ -691,37 +811,86 @@ bool dce_aux_transfer_with_retries(struct ddc_service *ddc, break; case AUX_TRANSACTION_REPLY_I2C_DEFER: + DC_TRACE_LEVEL_MESSAGE(DAL_TRACE_LEVEL_INFORMATION, + LOG_FLAG_I2cAux_DceAux, + "dce_aux_transfer_with_retries: AUX_RET_SUCCESS: AUX_TRANSACTION_REPLY_I2C_DEFER"); + aux_defer_retries = 0; - if (++aux_i2c_defer_retries >= AUX_MAX_I2C_DEFER_RETRIES) + if (++aux_i2c_defer_retries >= AUX_MAX_I2C_DEFER_RETRIES) { + DC_TRACE_LEVEL_MESSAGE(DAL_TRACE_LEVEL_ERROR, + LOG_FLAG_Error_I2cAux, + "dce_aux_transfer_with_retries: FAILURE: aux_i2c_defer_retries=%d >= AUX_MAX_I2C_DEFER_RETRIES=%d", + aux_i2c_defer_retries, + AUX_MAX_I2C_DEFER_RETRIES); goto fail; + } break; case AUX_TRANSACTION_REPLY_AUX_NACK: + DC_TRACE_LEVEL_MESSAGE(DAL_TRACE_LEVEL_INFORMATION, + LOG_FLAG_I2cAux_DceAux, + "dce_aux_transfer_with_retries: AUX_RET_SUCCESS: AUX_TRANSACTION_REPLY_AUX_NACK"); + goto fail; + case AUX_TRANSACTION_REPLY_HPD_DISCON: + DC_TRACE_LEVEL_MESSAGE(DAL_TRACE_LEVEL_INFORMATION, + LOG_FLAG_I2cAux_DceAux, + "dce_aux_transfer_with_retries: AUX_RET_SUCCESS: AUX_TRANSACTION_REPLY_HPD_DISCON"); + goto fail; + default: + DC_TRACE_LEVEL_MESSAGE(DAL_TRACE_LEVEL_ERROR, + LOG_FLAG_Error_I2cAux, + "dce_aux_transfer_with_retries: AUX_RET_SUCCESS: FAILURE: AUX_TRANSACTION_REPLY_* unknown, default case."); goto fail; } break; case AUX_RET_ERROR_INVALID_REPLY: - if (++aux_invalid_reply_retries >= AUX_MAX_INVALID_REPLY_RETRIES) + DC_TRACE_LEVEL_MESSAGE(DAL_TRACE_LEVEL_INFORMATION, + LOG_FLAG_I2cAux_DceAux, + "dce_aux_transfer_with_retries: AUX_RET_ERROR_INVALID_REPLY"); + if (++aux_invalid_reply_retries >= AUX_MAX_INVALID_REPLY_RETRIES) { + DC_TRACE_LEVEL_MESSAGE(DAL_TRACE_LEVEL_ERROR, + LOG_FLAG_Error_I2cAux, + "dce_aux_transfer_with_retries: FAILURE: aux_invalid_reply_retries=%d >= AUX_MAX_INVALID_REPLY_RETRIES=%d", + aux_invalid_reply_retries, + AUX_MAX_INVALID_REPLY_RETRIES); goto fail; - else + } else udelay(400); break; case AUX_RET_ERROR_TIMEOUT: + DC_TRACE_LEVEL_MESSAGE(DAL_TRACE_LEVEL_INFORMATION, + LOG_FLAG_I2cAux_DceAux, + "dce_aux_transfer_with_retries: AUX_RET_ERROR_TIMEOUT"); // Check whether a DEFER had occurred before the timeout. // If so, treat timeout as a DEFER. if (retry_on_defer) { - if (++aux_defer_retries >= AUX_MIN_DEFER_RETRIES) + if (++aux_defer_retries >= AUX_MIN_DEFER_RETRIES) { + DC_TRACE_LEVEL_MESSAGE(DAL_TRACE_LEVEL_ERROR, + LOG_FLAG_Error_I2cAux, + "dce_aux_transfer_with_retries: FAILURE: aux_defer_retries=%d >= AUX_MIN_DEFER_RETRIES=%d", + aux_defer_retries, + AUX_MIN_DEFER_RETRIES); goto fail; - else if (payload->defer_delay > 0) + } else if (payload->defer_delay > 0) { + DC_TRACE_LEVEL_MESSAGE(DAL_TRACE_LEVEL_INFORMATION, + LOG_FLAG_I2cAux_DceAux, + "dce_aux_transfer_with_retries: payload->defer_delay=%u", + payload->defer_delay); msleep(payload->defer_delay); + } } else { - if (++aux_timeout_retries >= AUX_MAX_TIMEOUT_RETRIES) + if (++aux_timeout_retries >= AUX_MAX_TIMEOUT_RETRIES) { + DC_TRACE_LEVEL_MESSAGE(DAL_TRACE_LEVEL_ERROR, + LOG_FLAG_Error_I2cAux, + "dce_aux_transfer_with_retries: FAILURE: aux_timeout_retries=%d >= AUX_MAX_TIMEOUT_RETRIES=%d", + aux_timeout_retries, + AUX_MAX_TIMEOUT_RETRIES); goto fail; - else { + } else { /* * DP 1.4, 2.8.2: AUX Transaction Response/Reply Timeouts * According to the DP spec there should be 3 retries total @@ -736,12 +905,25 @@ bool dce_aux_transfer_with_retries(struct ddc_service *ddc, case AUX_RET_ERROR_ENGINE_ACQUIRE: case AUX_RET_ERROR_UNKNOWN: default: + DC_TRACE_LEVEL_MESSAGE(DAL_TRACE_LEVEL_INFORMATION, + LOG_FLAG_I2cAux_DceAux, + "dce_aux_transfer_with_retries: Failure: operation_result=%d", + (int)operation_result); goto fail; } } fail: + DC_TRACE_LEVEL_MESSAGE(DAL_TRACE_LEVEL_ERROR, + LOG_FLAG_Error_I2cAux, + "dce_aux_transfer_with_retries: FAILURE"); if (!payload_reply) payload->reply = NULL; + + DC_TRACE_LEVEL_MESSAGE(DAL_TRACE_LEVEL_ERROR, + WPP_BIT_FLAG_DC_ERROR, + "AUX transaction failed. Result: %d", + operation_result); + return false; } diff --git a/drivers/gpu/drm/amd/display/dc/dce/dmub_psr.c b/drivers/gpu/drm/amd/display/dc/dce/dmub_psr.c index 10d42ae0cffe..aa8403bc4c83 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dmub_psr.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dmub_psr.c @@ -29,6 +29,8 @@ #include "dmub/dmub_srv.h" #include "core_types.h" +#define DC_TRACE_LEVEL_MESSAGE(...) do {} while (0) /* do nothing */ + #define MAX_PIPES 6 /* @@ -96,10 +98,19 @@ static void dmub_psr_get_state(struct dmub_psr *dmub, enum dc_psr_state *state, // Return invalid state when GPINT times out *state = PSR_STATE_INVALID; - // Assert if max retry hit - if (retry_count >= 1000) - ASSERT(0); } while (++retry_count <= 1000 && *state == PSR_STATE_INVALID); + + // Assert if max retry hit + if (retry_count >= 1000 && *state == PSR_STATE_INVALID) { + ASSERT(0); + DC_TRACE_LEVEL_MESSAGE(DAL_TRACE_LEVEL_ERROR, + WPP_BIT_FLAG_Firmware_PsrState, + "Unable to get PSR state from FW."); + } else + DC_TRACE_LEVEL_MESSAGE(DAL_TRACE_LEVEL_VERBOSE, + WPP_BIT_FLAG_Firmware_PsrState, + "Got PSR state from FW. PSR state: %d, Retry count: %d", + *state, retry_count); } /* @@ -207,7 +218,7 @@ static void dmub_psr_set_level(struct dmub_psr *dmub, uint16_t psr_level, uint8_ cmd.psr_set_level.header.sub_type = DMUB_CMD__PSR_SET_LEVEL; cmd.psr_set_level.header.payload_bytes = sizeof(struct dmub_cmd_psr_set_level_data); cmd.psr_set_level.psr_set_level_data.psr_level = psr_level; - cmd.psr_set_level.psr_set_level_data.cmd_version = PSR_VERSION_1; + cmd.psr_set_level.psr_set_level_data.cmd_version = DMUB_CMD_PSR_CONTROL_VERSION_1; cmd.psr_set_level.psr_set_level_data.panel_inst = panel_inst; dc_dmub_srv_cmd_queue(dc->dmub_srv, &cmd); dc_dmub_srv_cmd_execute(dc->dmub_srv); @@ -293,7 +304,7 @@ static bool dmub_psr_copy_settings(struct dmub_psr *dmub, copy_settings_data->debug.bitfields.use_hw_lock_mgr = 1; copy_settings_data->fec_enable_status = (link->fec_state == dc_link_fec_enabled); copy_settings_data->fec_enable_delay_in100us = link->dc->debug.fec_enable_delay_in100us; - copy_settings_data->cmd_version = PSR_VERSION_1; + copy_settings_data->cmd_version = DMUB_CMD_PSR_CONTROL_VERSION_1; copy_settings_data->panel_inst = panel_inst; dc_dmub_srv_cmd_queue(dc->dmub_srv, &cmd); diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.h b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.h index 8d7e92d5d3e4..39485bdeb90e 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.h +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.h @@ -121,6 +121,10 @@ struct dcn_hubbub_registers { uint32_t DCN_VM_AGP_BASE; uint32_t DCN_VM_PROTECTION_FAULT_DEFAULT_ADDR_MSB; uint32_t DCN_VM_PROTECTION_FAULT_DEFAULT_ADDR_LSB; + uint32_t DCN_VM_FAULT_ADDR_MSB; + uint32_t DCN_VM_FAULT_ADDR_LSB; + uint32_t DCN_VM_FAULT_CNTL; + uint32_t DCN_VM_FAULT_STATUS; uint32_t DCHUBBUB_ARB_FRAC_URG_BW_NOM_A; uint32_t DCHUBBUB_ARB_FRAC_URG_BW_NOM_B; uint32_t DCHUBBUB_ARB_FRAC_URG_BW_NOM_C; @@ -233,7 +237,19 @@ struct dcn_hubbub_registers { type DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_C;\ type DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_D;\ type DCN_VM_PROTECTION_FAULT_DEFAULT_ADDR_MSB;\ - type DCN_VM_PROTECTION_FAULT_DEFAULT_ADDR_LSB + type DCN_VM_PROTECTION_FAULT_DEFAULT_ADDR_LSB;\ + type DCN_VM_FAULT_ADDR_MSB;\ + type DCN_VM_FAULT_ADDR_LSB;\ + type DCN_VM_ERROR_STATUS_CLEAR;\ + type DCN_VM_ERROR_STATUS_MODE;\ + type DCN_VM_ERROR_INTERRUPT_ENABLE;\ + type DCN_VM_RANGE_FAULT_DISABLE;\ + type DCN_VM_PRQ_FAULT_DISABLE;\ + type DCN_VM_ERROR_STATUS;\ + type DCN_VM_ERROR_VMID;\ + type DCN_VM_ERROR_TABLE_LEVEL;\ + type DCN_VM_ERROR_PIPE;\ + type DCN_VM_ERROR_INTERRUPT_STATUS #define HUBBUB_STUTTER_REG_FIELD_LIST(type) \ type DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_A;\ @@ -303,6 +319,7 @@ struct dcn_hubbub_registers { type DET3_SIZE_CURRENT;\ type COMPBUF_SIZE;\ type COMPBUF_SIZE_CURRENT;\ + type CONFIG_ERROR;\ type COMPBUF_RESERVED_SPACE_64B;\ type COMPBUF_RESERVED_SPACE_ZS;\ type DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_Z8_A;\ diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.c index 04303fe9c659..ea185c877323 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.c @@ -871,6 +871,8 @@ void hubp1_read_state_common(struct hubp *hubp) struct _vcs_dpi_display_dlg_regs_st *dlg_attr = &s->dlg_attr; struct _vcs_dpi_display_ttu_regs_st *ttu_attr = &s->ttu_attr; struct _vcs_dpi_display_rq_regs_st *rq_regs = &s->rq_regs; + uint32_t aperture_low_msb, aperture_low_lsb; + uint32_t aperture_high_msb, aperture_high_lsb; /* Requester */ REG_GET(HUBPRET_CONTROL, @@ -881,6 +883,22 @@ void hubp1_read_state_common(struct hubp *hubp) MRQ_EXPANSION_MODE, &rq_regs->mrq_expansion_mode, CRQ_EXPANSION_MODE, &rq_regs->crq_expansion_mode); + REG_GET(DCN_VM_SYSTEM_APERTURE_LOW_ADDR_MSB, + MC_VM_SYSTEM_APERTURE_LOW_ADDR_MSB, &aperture_low_msb); + + REG_GET(DCN_VM_SYSTEM_APERTURE_LOW_ADDR_LSB, + MC_VM_SYSTEM_APERTURE_LOW_ADDR_LSB, &aperture_low_lsb); + + REG_GET(DCN_VM_SYSTEM_APERTURE_HIGH_ADDR_MSB, + MC_VM_SYSTEM_APERTURE_HIGH_ADDR_MSB, &aperture_high_msb); + + REG_GET(DCN_VM_SYSTEM_APERTURE_HIGH_ADDR_LSB, + MC_VM_SYSTEM_APERTURE_HIGH_ADDR_LSB, &aperture_high_lsb); + + // On DCN1, aperture is broken down into MSB and LSB; only keep bits [47:18] to match later DCN format + rq_regs->aperture_low_addr = (aperture_low_msb << 26) | (aperture_low_lsb >> 6); + rq_regs->aperture_high_addr = (aperture_high_msb << 26) | (aperture_high_lsb >> 6); + /* DLG - Per hubp */ REG_GET_2(BLANK_OFFSET_0, REFCYC_H_BLANK_END, &dlg_attr->refcyc_h_blank_end, @@ -1037,6 +1055,17 @@ void hubp1_read_state_common(struct hubp *hubp) QoS_LEVEL_LOW_WM, &s->qos_level_low_wm, QoS_LEVEL_HIGH_WM, &s->qos_level_high_wm); + REG_GET(DCSURF_PRIMARY_SURFACE_ADDRESS, + PRIMARY_SURFACE_ADDRESS, &s->primary_surface_addr_lo); + + REG_GET(DCSURF_PRIMARY_SURFACE_ADDRESS_HIGH, + PRIMARY_SURFACE_ADDRESS, &s->primary_surface_addr_hi); + + REG_GET(DCSURF_PRIMARY_META_SURFACE_ADDRESS, + PRIMARY_META_SURFACE_ADDRESS, &s->primary_meta_addr_lo); + + REG_GET(DCSURF_PRIMARY_META_SURFACE_ADDRESS_HIGH, + PRIMARY_META_SURFACE_ADDRESS, &s->primary_meta_addr_hi); } void hubp1_read_state(struct hubp *hubp) diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.h b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.h index e2f2f6995935..9cb8c383d673 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.h +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.h @@ -682,6 +682,10 @@ struct dcn_hubp_state { uint32_t min_ttu_vblank; uint32_t qos_level_low_wm; uint32_t qos_level_high_wm; + uint32_t primary_surface_addr_lo; + uint32_t primary_surface_addr_hi; + uint32_t primary_meta_addr_lo; + uint32_t primary_meta_addr_hi; }; struct dcn10_hubp { diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c index c545eddabdcc..df8a7718a85f 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c @@ -1502,25 +1502,22 @@ void dcn10_init_hw(struct dc *dc) void dcn10_power_down_on_boot(struct dc *dc) { struct dc_link *edp_links[MAX_NUM_EDP]; - struct dc_link *edp_link; + struct dc_link *edp_link = NULL; int edp_num; int i = 0; get_edp_links(dc, edp_links, &edp_num); - - if (edp_num) { - for (i = 0; i < edp_num; i++) { - edp_link = edp_links[i]; - if (edp_link->link_enc->funcs->is_dig_enabled && - edp_link->link_enc->funcs->is_dig_enabled(edp_link->link_enc) && - dc->hwseq->funcs.edp_backlight_control && - dc->hwss.power_down && - dc->hwss.edp_power_control) { - dc->hwseq->funcs.edp_backlight_control(edp_link, false); - dc->hwss.power_down(dc); - dc->hwss.edp_power_control(edp_link, false); - } - } + if (edp_num) + edp_link = edp_links[0]; + + if (edp_link && edp_link->link_enc->funcs->is_dig_enabled && + edp_link->link_enc->funcs->is_dig_enabled(edp_link->link_enc) && + dc->hwseq->funcs.edp_backlight_control && + dc->hwss.power_down && + dc->hwss.edp_power_control) { + dc->hwseq->funcs.edp_backlight_control(edp_link, false); + dc->hwss.power_down(dc); + dc->hwss.edp_power_control(edp_link, false); } else { for (i = 0; i < dc->link_count; i++) { struct dc_link *link = dc->links[i]; @@ -3180,8 +3177,12 @@ void dcn10_update_dchub(struct dce_hwseq *hws, struct dchub_init_data *dh_data) static bool dcn10_can_pipe_disable_cursor(struct pipe_ctx *pipe_ctx) { struct pipe_ctx *test_pipe; - const struct rect *r1 = &pipe_ctx->plane_res.scl_data.recout, *r2; + const struct scaler_data *scl_data = &pipe_ctx->plane_res.scl_data; + const struct rect *r1 = &scl_data->recout, *r2; int r1_r = r1->x + r1->width, r1_b = r1->y + r1->height, r2_r, r2_b; + int cur_layer = pipe_ctx->plane_state->layer_index; + bool upper_pipe_exists = false; + struct fixed31_32 one = dc_fixpt_from_int(1); /** * Disable the cursor if there's another pipe above this with a @@ -3199,8 +3200,17 @@ static bool dcn10_can_pipe_disable_cursor(struct pipe_ctx *pipe_ctx) if (r1->x >= r2->x && r1->y >= r2->y && r1_r <= r2_r && r1_b <= r2_b) return true; + + if (test_pipe->plane_state->layer_index < cur_layer) + upper_pipe_exists = true; } + // if plane scaled, assume an upper plane can handle cursor if it exists. + if (upper_pipe_exists && + (scl_data->ratios.horz.value != one.value || + scl_data->ratios.vert.value != one.value)) + return true; + return false; } @@ -3631,13 +3641,12 @@ enum dc_status dcn10_set_clock(struct dc *dc, struct dc_clock_config clock_cfg = {0}; struct dc_clocks *current_clocks = &context->bw_ctx.bw.dcn.clk; - if (dc->clk_mgr && dc->clk_mgr->funcs->get_clock) - dc->clk_mgr->funcs->get_clock(dc->clk_mgr, - context, clock_type, &clock_cfg); - - if (!dc->clk_mgr->funcs->get_clock) + if (!dc->clk_mgr || !dc->clk_mgr->funcs->get_clock) return DC_FAIL_UNSUPPORTED_1; + dc->clk_mgr->funcs->get_clock(dc->clk_mgr, + context, clock_type, &clock_cfg); + if (clk_khz > clock_cfg.max_clock_khz) return DC_FAIL_CLK_EXCEED_MAX; @@ -3655,7 +3664,7 @@ enum dc_status dcn10_set_clock(struct dc *dc, else return DC_ERROR_UNEXPECTED; - if (dc->clk_mgr && dc->clk_mgr->funcs->update_clocks) + if (dc->clk_mgr->funcs->update_clocks) dc->clk_mgr->funcs->update_clocks(dc->clk_mgr, context, true); return DC_OK; diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c index 3696faf12d86..37848f4577b1 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c @@ -1388,6 +1388,12 @@ void optc1_read_otg_state(struct optc *optc1, REG_GET(OPTC_INPUT_GLOBAL_CONTROL, OPTC_UNDERFLOW_OCCURRED_STATUS, &s->underflow_occurred_status); + + REG_GET(OTG_VERTICAL_INTERRUPT2_CONTROL, + OTG_VERTICAL_INTERRUPT2_INT_ENABLE, &s->vertical_interrupt2_en); + + REG_GET(OTG_VERTICAL_INTERRUPT2_POSITION, + OTG_VERTICAL_INTERRUPT2_LINE_START, &s->vertical_interrupt2_line); } bool optc1_get_otg_active_size(struct timing_generator *optc, diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.h b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.h index 29d6fbe0093a..c50c29984d51 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.h +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.h @@ -578,6 +578,8 @@ struct dcn_otg_state { uint32_t underflow_occurred_status; uint32_t otg_enabled; uint32_t blank_enabled; + uint32_t vertical_interrupt2_en; + uint32_t vertical_interrupt2_line; }; void optc1_read_otg_state(struct optc *optc1, diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.c index f1a08a7736ac..cf364ae93138 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.c @@ -522,16 +522,21 @@ void enc1_stream_encoder_hdmi_set_stream_attribute( switch (crtc_timing->display_color_depth) { case COLOR_DEPTH_888: REG_UPDATE(HDMI_CONTROL, HDMI_DEEP_COLOR_DEPTH, 0); + DC_LOG_DEBUG("HDMI source set to 24BPP deep color depth\n"); break; case COLOR_DEPTH_101010: if (crtc_timing->pixel_encoding == PIXEL_ENCODING_YCBCR422) { REG_UPDATE_2(HDMI_CONTROL, HDMI_DEEP_COLOR_DEPTH, 1, HDMI_DEEP_COLOR_ENABLE, 0); + DC_LOG_DEBUG("HDMI source 30BPP deep color depth" \ + "disabled for YCBCR422 pixel encoding\n"); } else { REG_UPDATE_2(HDMI_CONTROL, HDMI_DEEP_COLOR_DEPTH, 1, HDMI_DEEP_COLOR_ENABLE, 1); + DC_LOG_DEBUG("HDMI source 30BPP deep color depth" \ + "enabled for YCBCR422 non-pixel encoding\n"); } break; case COLOR_DEPTH_121212: @@ -539,16 +544,22 @@ void enc1_stream_encoder_hdmi_set_stream_attribute( REG_UPDATE_2(HDMI_CONTROL, HDMI_DEEP_COLOR_DEPTH, 2, HDMI_DEEP_COLOR_ENABLE, 0); + DC_LOG_DEBUG("HDMI source 36BPP deep color depth" \ + "disabled for YCBCR422 pixel encoding\n"); } else { REG_UPDATE_2(HDMI_CONTROL, HDMI_DEEP_COLOR_DEPTH, 2, HDMI_DEEP_COLOR_ENABLE, 1); + DC_LOG_DEBUG("HDMI source 36BPP deep color depth" \ + "enabled for non-pixel YCBCR422 encoding\n"); } break; case COLOR_DEPTH_161616: REG_UPDATE_2(HDMI_CONTROL, HDMI_DEEP_COLOR_DEPTH, 3, HDMI_DEEP_COLOR_ENABLE, 1); + DC_LOG_DEBUG("HDMI source deep color depth enabled in" \ + "reserved mode\n"); break; default: break; diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubbub.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubbub.c index 91a9305d42e8..aacb1fb5c73e 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubbub.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubbub.c @@ -605,6 +605,26 @@ static bool hubbub2_program_watermarks( return wm_pending; } +void hubbub2_read_state(struct hubbub *hubbub, struct dcn_hubbub_state *hubbub_state) +{ + struct dcn20_hubbub *hubbub1 = TO_DCN20_HUBBUB(hubbub); + + if (REG(DCN_VM_FAULT_ADDR_MSB)) + hubbub_state->vm_fault_addr_msb = REG_READ(DCN_VM_FAULT_ADDR_MSB); + + if (REG(DCN_VM_FAULT_ADDR_LSB)) + hubbub_state->vm_fault_addr_msb = REG_READ(DCN_VM_FAULT_ADDR_LSB); + + if (REG(DCN_VM_FAULT_CNTL)) + REG_GET(DCN_VM_FAULT_CNTL, DCN_VM_ERROR_STATUS_MODE, &hubbub_state->vm_error_mode); + + if (REG(DCN_VM_FAULT_STATUS)) { + REG_GET(DCN_VM_FAULT_STATUS, DCN_VM_ERROR_STATUS, &hubbub_state->vm_error_status); + REG_GET(DCN_VM_FAULT_STATUS, DCN_VM_ERROR_VMID, &hubbub_state->vm_error_vmid); + REG_GET(DCN_VM_FAULT_STATUS, DCN_VM_ERROR_PIPE, &hubbub_state->vm_error_pipe); + } +} + static const struct hubbub_funcs hubbub2_funcs = { .update_dchub = hubbub2_update_dchub, .init_dchub_sys_ctx = hubbub2_init_dchub_sys_ctx, @@ -617,6 +637,7 @@ static const struct hubbub_funcs hubbub2_funcs = { .program_watermarks = hubbub2_program_watermarks, .is_allow_self_refresh_enabled = hubbub1_is_allow_self_refresh_enabled, .allow_self_refresh_control = hubbub1_allow_self_refresh_control, + .hubbub_read_state = hubbub2_read_state, }; void hubbub2_construct(struct dcn20_hubbub *hubbub, diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubbub.h b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubbub.h index 10af257d90ef..2f6146bf1d32 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubbub.h +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubbub.h @@ -29,16 +29,6 @@ #include "dcn10/dcn10_hubbub.h" #include "dcn20_vmid.h" -#define HUBBUB_REG_LIST_DCN20_COMMON()\ - HUBBUB_REG_LIST_DCN_COMMON(), \ - SR(DCHUBBUB_CRC_CTRL), \ - SR(DCN_VM_FB_LOCATION_BASE),\ - SR(DCN_VM_FB_LOCATION_TOP),\ - SR(DCN_VM_FB_OFFSET),\ - SR(DCN_VM_AGP_BOT),\ - SR(DCN_VM_AGP_TOP),\ - SR(DCN_VM_AGP_BASE) - #define TO_DCN20_HUBBUB(hubbub)\ container_of(hubbub, struct dcn20_hubbub, base) @@ -50,7 +40,11 @@ SR(DCN_VM_FB_OFFSET),\ SR(DCN_VM_AGP_BOT),\ SR(DCN_VM_AGP_TOP),\ - SR(DCN_VM_AGP_BASE) + SR(DCN_VM_AGP_BASE),\ + SR(DCN_VM_FAULT_ADDR_MSB), \ + SR(DCN_VM_FAULT_ADDR_LSB), \ + SR(DCN_VM_FAULT_CNTL), \ + SR(DCN_VM_FAULT_STATUS) #define HUBBUB_REG_LIST_DCN20(id)\ HUBBUB_REG_LIST_DCN20_COMMON(), \ @@ -71,7 +65,19 @@ HUBBUB_SF(DCN_VM_AGP_TOP, AGP_TOP, mask_sh), \ HUBBUB_SF(DCN_VM_AGP_BASE, AGP_BASE, mask_sh), \ HUBBUB_SF(DCN_VM_PROTECTION_FAULT_DEFAULT_ADDR_MSB, DCN_VM_PROTECTION_FAULT_DEFAULT_ADDR_MSB, mask_sh), \ - HUBBUB_SF(DCN_VM_PROTECTION_FAULT_DEFAULT_ADDR_LSB, DCN_VM_PROTECTION_FAULT_DEFAULT_ADDR_LSB, mask_sh) + HUBBUB_SF(DCN_VM_PROTECTION_FAULT_DEFAULT_ADDR_LSB, DCN_VM_PROTECTION_FAULT_DEFAULT_ADDR_LSB, mask_sh), \ + HUBBUB_SF(DCN_VM_FAULT_ADDR_MSB, DCN_VM_FAULT_ADDR_MSB, mask_sh), \ + HUBBUB_SF(DCN_VM_FAULT_ADDR_LSB, DCN_VM_FAULT_ADDR_LSB, mask_sh), \ + HUBBUB_SF(DCN_VM_FAULT_CNTL, DCN_VM_ERROR_STATUS_CLEAR, mask_sh), \ + HUBBUB_SF(DCN_VM_FAULT_CNTL, DCN_VM_ERROR_STATUS_MODE, mask_sh), \ + HUBBUB_SF(DCN_VM_FAULT_CNTL, DCN_VM_ERROR_INTERRUPT_ENABLE, mask_sh), \ + HUBBUB_SF(DCN_VM_FAULT_CNTL, DCN_VM_RANGE_FAULT_DISABLE, mask_sh), \ + HUBBUB_SF(DCN_VM_FAULT_CNTL, DCN_VM_PRQ_FAULT_DISABLE, mask_sh), \ + HUBBUB_SF(DCN_VM_FAULT_STATUS, DCN_VM_ERROR_STATUS, mask_sh), \ + HUBBUB_SF(DCN_VM_FAULT_STATUS, DCN_VM_ERROR_VMID, mask_sh), \ + HUBBUB_SF(DCN_VM_FAULT_STATUS, DCN_VM_ERROR_TABLE_LEVEL, mask_sh), \ + HUBBUB_SF(DCN_VM_FAULT_STATUS, DCN_VM_ERROR_PIPE, mask_sh), \ + HUBBUB_SF(DCN_VM_FAULT_STATUS, DCN_VM_ERROR_INTERRUPT_STATUS, mask_sh) struct dcn20_hubbub { struct hubbub base; @@ -131,4 +137,7 @@ void hubbub2_get_dchub_ref_freq(struct hubbub *hubbub, void hubbub2_wm_read_state(struct hubbub *hubbub, struct dcn_hubbub_wm *wm); +void hubbub2_read_state(struct hubbub *hubbub, + struct dcn_hubbub_state *hubbub_state); + #endif diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubp.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubp.c index 7e54058715aa..5adf42a7cc27 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubp.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubp.c @@ -1080,6 +1080,12 @@ void hubp2_read_state_common(struct hubp *hubp) MRQ_EXPANSION_MODE, &rq_regs->mrq_expansion_mode, CRQ_EXPANSION_MODE, &rq_regs->crq_expansion_mode); + REG_GET(DCN_VM_SYSTEM_APERTURE_HIGH_ADDR, + MC_VM_SYSTEM_APERTURE_HIGH_ADDR, &rq_regs->aperture_high_addr); + + REG_GET(DCN_VM_SYSTEM_APERTURE_LOW_ADDR, + MC_VM_SYSTEM_APERTURE_LOW_ADDR, &rq_regs->aperture_low_addr); + /* DLG - Per hubp */ REG_GET_2(BLANK_OFFSET_0, REFCYC_H_BLANK_END, &dlg_attr->refcyc_h_blank_end, @@ -1236,6 +1242,17 @@ void hubp2_read_state_common(struct hubp *hubp) QoS_LEVEL_LOW_WM, &s->qos_level_low_wm, QoS_LEVEL_HIGH_WM, &s->qos_level_high_wm); + REG_GET(DCSURF_PRIMARY_SURFACE_ADDRESS, + PRIMARY_SURFACE_ADDRESS, &s->primary_surface_addr_lo); + + REG_GET(DCSURF_PRIMARY_SURFACE_ADDRESS_HIGH, + PRIMARY_SURFACE_ADDRESS, &s->primary_surface_addr_hi); + + REG_GET(DCSURF_PRIMARY_META_SURFACE_ADDRESS, + PRIMARY_META_SURFACE_ADDRESS, &s->primary_meta_addr_lo); + + REG_GET(DCSURF_PRIMARY_META_SURFACE_ADDRESS_HIGH, + PRIMARY_META_SURFACE_ADDRESS, &s->primary_meta_addr_hi); } void hubp2_read_state(struct hubp *hubp) diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c index 5c2853654cca..a47ba1d45be9 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c @@ -1723,13 +1723,15 @@ void dcn20_program_front_end_for_ctx( pipe = pipe->bottom_pipe; } - /* Program secondary blending tree and writeback pipes */ - pipe = &context->res_ctx.pipe_ctx[i]; - if (!pipe->prev_odm_pipe && pipe->stream->num_wb_info > 0 - && (pipe->update_flags.raw || pipe->plane_state->update_flags.raw || pipe->stream->update_flags.raw) - && hws->funcs.program_all_writeback_pipes_in_tree) - hws->funcs.program_all_writeback_pipes_in_tree(dc, pipe->stream, context); } + /* Program secondary blending tree and writeback pipes */ + pipe = &context->res_ctx.pipe_ctx[i]; + if (!pipe->top_pipe && !pipe->prev_odm_pipe + && pipe->stream && pipe->stream->num_wb_info > 0 + && (pipe->update_flags.raw || (pipe->plane_state && pipe->plane_state->update_flags.raw) + || pipe->stream->update_flags.raw) + && hws->funcs.program_all_writeback_pipes_in_tree) + hws->funcs.program_all_writeback_pipes_in_tree(dc, pipe->stream, context); } } diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c index b173fa3653b5..e3e01b17c164 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c @@ -35,6 +35,8 @@ #include "include/irq_service_interface.h" #include "dcn20/dcn20_resource.h" +#include "dml/dcn2x/dcn2x.h" + #include "dcn10/dcn10_hubp.h" #include "dcn10/dcn10_ipp.h" #include "dcn20_hubbub.h" @@ -1974,43 +1976,6 @@ void dcn20_split_stream_for_mpc( ASSERT(primary_pipe->plane_state); } -void dcn20_populate_dml_writeback_from_context( - struct dc *dc, struct resource_context *res_ctx, display_e2e_pipe_params_st *pipes) -{ - int pipe_cnt, i; - - for (i = 0, pipe_cnt = 0; i < dc->res_pool->pipe_count; i++) { - struct dc_writeback_info *wb_info = &res_ctx->pipe_ctx[i].stream->writeback_info[0]; - - if (!res_ctx->pipe_ctx[i].stream) - continue; - - /* Set writeback information */ - pipes[pipe_cnt].dout.wb_enable = (wb_info->wb_enabled == true) ? 1 : 0; - pipes[pipe_cnt].dout.num_active_wb++; - pipes[pipe_cnt].dout.wb.wb_src_height = wb_info->dwb_params.cnv_params.crop_height; - pipes[pipe_cnt].dout.wb.wb_src_width = wb_info->dwb_params.cnv_params.crop_width; - pipes[pipe_cnt].dout.wb.wb_dst_width = wb_info->dwb_params.dest_width; - pipes[pipe_cnt].dout.wb.wb_dst_height = wb_info->dwb_params.dest_height; - pipes[pipe_cnt].dout.wb.wb_htaps_luma = 1; - pipes[pipe_cnt].dout.wb.wb_vtaps_luma = 1; - pipes[pipe_cnt].dout.wb.wb_htaps_chroma = wb_info->dwb_params.scaler_taps.h_taps_c; - pipes[pipe_cnt].dout.wb.wb_vtaps_chroma = wb_info->dwb_params.scaler_taps.v_taps_c; - pipes[pipe_cnt].dout.wb.wb_hratio = 1.0; - pipes[pipe_cnt].dout.wb.wb_vratio = 1.0; - if (wb_info->dwb_params.out_format == dwb_scaler_mode_yuv420) { - if (wb_info->dwb_params.output_depth == DWB_OUTPUT_PIXEL_DEPTH_8BPC) - pipes[pipe_cnt].dout.wb.wb_pixel_format = dm_420_8; - else - pipes[pipe_cnt].dout.wb.wb_pixel_format = dm_420_10; - } else - pipes[pipe_cnt].dout.wb.wb_pixel_format = dm_444_32; - - pipe_cnt++; - } - -} - int dcn20_populate_dml_pipes_from_context( struct dc *dc, struct dc_state *context, @@ -2392,7 +2357,9 @@ int dcn20_populate_dml_pipes_from_context( } /* populate writeback information */ + DC_FP_START(); dc->res_pool->funcs->populate_dml_writeback_from_context(dc, res_ctx, pipes); + DC_FP_END(); return pipe_cnt; } @@ -2462,7 +2429,7 @@ void dcn20_set_mcif_arb_params( wb_arb_params->cli_watermark[k] = get_wm_writeback_urgent(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; wb_arb_params->pstate_watermark[k] = get_wm_writeback_dram_clock_change(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; } - wb_arb_params->time_per_pixel = 16.0 / context->res_ctx.pipe_ctx[i].stream->phy_pix_clk; /* 4 bit fraction, ms */ + wb_arb_params->time_per_pixel = 16.0 * 1000 / (context->res_ctx.pipe_ctx[i].stream->phy_pix_clk / 1000); /* 4 bit fraction, ms */ wb_arb_params->slice_lines = 32; wb_arb_params->arbitration_slice = 2; wb_arb_params->max_scaled_time = dcn20_calc_max_scaled_time(wb_arb_params->time_per_pixel, @@ -2531,16 +2498,16 @@ struct pipe_ctx *dcn20_find_secondary_pipe(struct dc *dc, * pick that pipe as secondary * Same logic applies for ODM pipes */ - if (dc->current_state->res_ctx.pipe_ctx[primary_pipe->pipe_idx].bottom_pipe) { - preferred_pipe_idx = dc->current_state->res_ctx.pipe_ctx[primary_pipe->pipe_idx].bottom_pipe->pipe_idx; + if (dc->current_state->res_ctx.pipe_ctx[primary_pipe->pipe_idx].next_odm_pipe) { + preferred_pipe_idx = dc->current_state->res_ctx.pipe_ctx[primary_pipe->pipe_idx].next_odm_pipe->pipe_idx; if (res_ctx->pipe_ctx[preferred_pipe_idx].stream == NULL) { secondary_pipe = &res_ctx->pipe_ctx[preferred_pipe_idx]; secondary_pipe->pipe_idx = preferred_pipe_idx; } } if (secondary_pipe == NULL && - dc->current_state->res_ctx.pipe_ctx[primary_pipe->pipe_idx].next_odm_pipe) { - preferred_pipe_idx = dc->current_state->res_ctx.pipe_ctx[primary_pipe->pipe_idx].next_odm_pipe->pipe_idx; + dc->current_state->res_ctx.pipe_ctx[primary_pipe->pipe_idx].bottom_pipe) { + preferred_pipe_idx = dc->current_state->res_ctx.pipe_ctx[primary_pipe->pipe_idx].bottom_pipe->pipe_idx; if (res_ctx->pipe_ctx[preferred_pipe_idx].stream == NULL) { secondary_pipe = &res_ctx->pipe_ctx[preferred_pipe_idx]; secondary_pipe->pipe_idx = preferred_pipe_idx; diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.h b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.h index c8f3127bbcdf..6ec8ff45f0f7 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.h +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.h @@ -58,8 +58,6 @@ struct pipe_ctx *dcn20_acquire_idle_pipe_for_layer( struct dc_state *state, const struct resource_pool *pool, struct dc_stream_state *stream); -void dcn20_populate_dml_writeback_from_context( - struct dc *dc, struct resource_context *res_ctx, display_e2e_pipe_params_st *pipes); struct stream_encoder *dcn20_stream_encoder_create( enum engine_id eng_id, diff --git a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hubbub.c b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hubbub.c index 42fbb5e6d505..36044cb8ec83 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hubbub.c +++ b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hubbub.c @@ -701,6 +701,7 @@ static const struct hubbub_funcs hubbub21_funcs = { .program_watermarks = hubbub21_program_watermarks, .allow_self_refresh_control = hubbub1_allow_self_refresh_control, .apply_DEDCN21_147_wa = hubbub21_apply_DEDCN21_147_wa, + .hubbub_read_state = hubbub2_read_state, }; void hubbub21_construct(struct dcn20_hubbub *hubbub, diff --git a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hubbub.h b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hubbub.h index ef3ef28509ed..d8eb2bb7282c 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hubbub.h +++ b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hubbub.h @@ -108,7 +108,19 @@ HUBBUB_SF(DCN_VM_FB_OFFSET, FB_OFFSET, mask_sh), \ HUBBUB_SF(DCN_VM_AGP_BOT, AGP_BOT, mask_sh), \ HUBBUB_SF(DCN_VM_AGP_TOP, AGP_TOP, mask_sh), \ - HUBBUB_SF(DCN_VM_AGP_BASE, AGP_BASE, mask_sh) + HUBBUB_SF(DCN_VM_AGP_BASE, AGP_BASE, mask_sh), \ + HUBBUB_SF(DCN_VM_FAULT_ADDR_MSB, DCN_VM_FAULT_ADDR_MSB, mask_sh), \ + HUBBUB_SF(DCN_VM_FAULT_ADDR_LSB, DCN_VM_FAULT_ADDR_LSB, mask_sh), \ + HUBBUB_SF(DCN_VM_FAULT_CNTL, DCN_VM_ERROR_STATUS_CLEAR, mask_sh), \ + HUBBUB_SF(DCN_VM_FAULT_CNTL, DCN_VM_ERROR_STATUS_MODE, mask_sh), \ + HUBBUB_SF(DCN_VM_FAULT_CNTL, DCN_VM_ERROR_INTERRUPT_ENABLE, mask_sh), \ + HUBBUB_SF(DCN_VM_FAULT_CNTL, DCN_VM_RANGE_FAULT_DISABLE, mask_sh), \ + HUBBUB_SF(DCN_VM_FAULT_CNTL, DCN_VM_PRQ_FAULT_DISABLE, mask_sh), \ + HUBBUB_SF(DCN_VM_FAULT_STATUS, DCN_VM_ERROR_STATUS, mask_sh), \ + HUBBUB_SF(DCN_VM_FAULT_STATUS, DCN_VM_ERROR_VMID, mask_sh), \ + HUBBUB_SF(DCN_VM_FAULT_STATUS, DCN_VM_ERROR_TABLE_LEVEL, mask_sh), \ + HUBBUB_SF(DCN_VM_FAULT_STATUS, DCN_VM_ERROR_PIPE, mask_sh), \ + HUBBUB_SF(DCN_VM_FAULT_STATUS, DCN_VM_ERROR_INTERRUPT_STATUS, mask_sh) void dcn21_dchvm_init(struct hubbub *hubbub); int hubbub21_init_dchub(struct hubbub *hubbub, diff --git a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hubp.c b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hubp.c index b0c9180b808f..3de1bcf9b3d8 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hubp.c +++ b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hubp.c @@ -833,7 +833,7 @@ static struct hubp_funcs dcn21_hubp_funcs = { .dmdata_set_attributes = hubp2_dmdata_set_attributes, .dmdata_load = hubp2_dmdata_load, .dmdata_status_done = hubp2_dmdata_status_done, - .hubp_read_state = hubp1_read_state, + .hubp_read_state = hubp2_read_state, .hubp_clear_underflow = hubp1_clear_underflow, .hubp_set_flip_control_surface_gsl = hubp2_set_flip_control_surface_gsl, .hubp_init = hubp21_init, diff --git a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c index bf0a198eae15..fbbdf9976183 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c @@ -35,6 +35,8 @@ #include "include/irq_service_interface.h" #include "dcn20/dcn20_resource.h" +#include "dml/dcn2x/dcn2x.h" + #include "clk_mgr.h" #include "dcn10/dcn10_hubp.h" #include "dcn10/dcn10_ipp.h" @@ -884,7 +886,8 @@ static const struct dc_debug_options debug_defaults_drv = { .disable_48mhz_pwrdwn = false, .usbc_combo_phy_reset_wa = true, .dmub_command_table = true, - .use_max_lb = true + .use_max_lb = true, + .optimize_edp_link_rate = true }; static const struct dc_debug_options debug_defaults_diags = { diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dwb_cm.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dwb_cm.c index 3fe9e41e4dbd..6a3d3a0ec0a3 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dwb_cm.c +++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dwb_cm.c @@ -49,6 +49,11 @@ static void dwb3_get_reg_field_ogam(struct dcn30_dwbc *dwbc30, struct dcn3_xfer_func_reg *reg) { + reg->shifts.field_region_start_base = dwbc30->dwbc_shift->DWB_OGAM_RAMA_EXP_REGION_START_BASE_B; + reg->masks.field_region_start_base = dwbc30->dwbc_mask->DWB_OGAM_RAMA_EXP_REGION_START_BASE_B; + reg->shifts.field_offset = dwbc30->dwbc_shift->DWB_OGAM_RAMA_OFFSET_B; + reg->masks.field_offset = dwbc30->dwbc_mask->DWB_OGAM_RAMA_OFFSET_B; + reg->shifts.exp_region0_lut_offset = dwbc30->dwbc_shift->DWB_OGAM_RAMA_EXP_REGION0_LUT_OFFSET; reg->masks.exp_region0_lut_offset = dwbc30->dwbc_mask->DWB_OGAM_RAMA_EXP_REGION0_LUT_OFFSET; reg->shifts.exp_region0_num_segments = dwbc30->dwbc_shift->DWB_OGAM_RAMA_EXP_REGION0_NUM_SEGMENTS; @@ -66,8 +71,6 @@ static void dwb3_get_reg_field_ogam(struct dcn30_dwbc *dwbc30, reg->masks.field_region_end_base = dwbc30->dwbc_mask->DWB_OGAM_RAMA_EXP_REGION_END_BASE_B; reg->shifts.field_region_linear_slope = dwbc30->dwbc_shift->DWB_OGAM_RAMA_EXP_REGION_START_SLOPE_B; reg->masks.field_region_linear_slope = dwbc30->dwbc_mask->DWB_OGAM_RAMA_EXP_REGION_START_SLOPE_B; - reg->masks.field_offset = dwbc30->dwbc_mask->DWB_OGAM_RAMA_OFFSET_B; - reg->shifts.field_offset = dwbc30->dwbc_shift->DWB_OGAM_RAMA_OFFSET_B; reg->shifts.exp_region_start = dwbc30->dwbc_shift->DWB_OGAM_RAMA_EXP_REGION_START_B; reg->masks.exp_region_start = dwbc30->dwbc_mask->DWB_OGAM_RAMA_EXP_REGION_START_B; reg->shifts.exp_resion_start_segment = dwbc30->dwbc_shift->DWB_OGAM_RAMA_EXP_REGION_START_SEGMENT_B; @@ -147,18 +150,19 @@ static enum dc_lut_mode dwb3_get_ogam_current( uint32_t state_mode; uint32_t ram_select; - REG_GET(DWB_OGAM_CONTROL, - DWB_OGAM_MODE, &state_mode); - REG_GET(DWB_OGAM_CONTROL, - DWB_OGAM_SELECT, &ram_select); + REG_GET_2(DWB_OGAM_CONTROL, + DWB_OGAM_MODE_CURRENT, &state_mode, + DWB_OGAM_SELECT_CURRENT, &ram_select); if (state_mode == 0) { mode = LUT_BYPASS; } else if (state_mode == 2) { if (ram_select == 0) mode = LUT_RAM_A; - else + else if (ram_select == 1) mode = LUT_RAM_B; + else + mode = LUT_BYPASS; } else { // Reserved value mode = LUT_BYPASS; @@ -172,10 +176,10 @@ static void dwb3_configure_ogam_lut( struct dcn30_dwbc *dwbc30, bool is_ram_a) { - REG_UPDATE(DWB_OGAM_LUT_CONTROL, - DWB_OGAM_LUT_READ_COLOR_SEL, 7); - REG_UPDATE(DWB_OGAM_CONTROL, - DWB_OGAM_SELECT, is_ram_a == true ? 0 : 1); + REG_UPDATE_2(DWB_OGAM_LUT_CONTROL, + DWB_OGAM_LUT_WRITE_COLOR_MASK, 7, + DWB_OGAM_LUT_HOST_SEL, (is_ram_a == true) ? 0 : 1); + REG_SET(DWB_OGAM_LUT_INDEX, 0, DWB_OGAM_LUT_INDEX, 0); } @@ -185,17 +189,45 @@ static void dwb3_program_ogam_pwl(struct dcn30_dwbc *dwbc30, { uint32_t i; - // triple base implementation - for (i = 0; i < num/2; i++) { - REG_SET(DWB_OGAM_LUT_DATA, 0, DWB_OGAM_LUT_DATA, rgb[2*i+0].red_reg); - REG_SET(DWB_OGAM_LUT_DATA, 0, DWB_OGAM_LUT_DATA, rgb[2*i+0].green_reg); - REG_SET(DWB_OGAM_LUT_DATA, 0, DWB_OGAM_LUT_DATA, rgb[2*i+0].blue_reg); - REG_SET(DWB_OGAM_LUT_DATA, 0, DWB_OGAM_LUT_DATA, rgb[2*i+1].red_reg); - REG_SET(DWB_OGAM_LUT_DATA, 0, DWB_OGAM_LUT_DATA, rgb[2*i+1].green_reg); - REG_SET(DWB_OGAM_LUT_DATA, 0, DWB_OGAM_LUT_DATA, rgb[2*i+1].blue_reg); - REG_SET(DWB_OGAM_LUT_DATA, 0, DWB_OGAM_LUT_DATA, rgb[2*i+2].red_reg); - REG_SET(DWB_OGAM_LUT_DATA, 0, DWB_OGAM_LUT_DATA, rgb[2*i+2].green_reg); - REG_SET(DWB_OGAM_LUT_DATA, 0, DWB_OGAM_LUT_DATA, rgb[2*i+2].blue_reg); + uint32_t last_base_value_red = rgb[num-1].red_reg + rgb[num-1].delta_red_reg; + uint32_t last_base_value_green = rgb[num-1].green_reg + rgb[num-1].delta_green_reg; + uint32_t last_base_value_blue = rgb[num-1].blue_reg + rgb[num-1].delta_blue_reg; + + if (is_rgb_equal(rgb, num)) { + for (i = 0 ; i < num; i++) + REG_SET(DWB_OGAM_LUT_DATA, 0, DWB_OGAM_LUT_DATA, rgb[i].red_reg); + + REG_SET(DWB_OGAM_LUT_DATA, 0, DWB_OGAM_LUT_DATA, last_base_value_red); + + } else { + + REG_UPDATE(DWB_OGAM_LUT_CONTROL, + DWB_OGAM_LUT_WRITE_COLOR_MASK, 4); + + for (i = 0 ; i < num; i++) + REG_SET(DWB_OGAM_LUT_DATA, 0, DWB_OGAM_LUT_DATA, rgb[i].red_reg); + + REG_SET(DWB_OGAM_LUT_DATA, 0, DWB_OGAM_LUT_DATA, last_base_value_red); + + REG_SET(DWB_OGAM_LUT_INDEX, 0, DWB_OGAM_LUT_INDEX, 0); + + REG_UPDATE(DWB_OGAM_LUT_CONTROL, + DWB_OGAM_LUT_WRITE_COLOR_MASK, 2); + + for (i = 0 ; i < num; i++) + REG_SET(DWB_OGAM_LUT_DATA, 0, DWB_OGAM_LUT_DATA, rgb[i].green_reg); + + REG_SET(DWB_OGAM_LUT_DATA, 0, DWB_OGAM_LUT_DATA, last_base_value_green); + + REG_SET(DWB_OGAM_LUT_INDEX, 0, DWB_OGAM_LUT_INDEX, 0); + + REG_UPDATE(DWB_OGAM_LUT_CONTROL, + DWB_OGAM_LUT_WRITE_COLOR_MASK, 1); + + for (i = 0 ; i < num; i++) + REG_SET(DWB_OGAM_LUT_DATA, 0, DWB_OGAM_LUT_DATA, rgb[i].blue_reg); + + REG_SET(DWB_OGAM_LUT_DATA, 0, DWB_OGAM_LUT_DATA, last_base_value_blue); } } @@ -211,6 +243,8 @@ static bool dwb3_program_ogam_lut( return false; } + REG_SET(DWB_OGAM_CONTROL, 0, DWB_OGAM_MODE, 2); + current_mode = dwb3_get_ogam_current(dwbc30); if (current_mode == LUT_BYPASS || current_mode == LUT_RAM_A) next_mode = LUT_RAM_B; @@ -227,8 +261,7 @@ static bool dwb3_program_ogam_lut( dwb3_program_ogam_pwl( dwbc30, params->rgb_resulted, params->hw_points_num); - REG_SET(DWB_OGAM_CONTROL, 0, DWB_OGAM_MODE, 2); - REG_SET(DWB_OGAM_CONTROL, 0, DWB_OGAM_SELECT, next_mode == LUT_RAM_A ? 0 : 1); + REG_UPDATE(DWB_OGAM_CONTROL, DWB_OGAM_SELECT, next_mode == LUT_RAM_A ? 0 : 1); return true; } @@ -271,14 +304,19 @@ static void dwb3_program_gamut_remap( struct color_matrices_reg gam_regs; - REG_UPDATE(DWB_GAMUT_REMAP_COEF_FORMAT, DWB_GAMUT_REMAP_COEF_FORMAT, coef_format); - if (regval == NULL || select == CM_GAMUT_REMAP_MODE_BYPASS) { REG_SET(DWB_GAMUT_REMAP_MODE, 0, DWB_GAMUT_REMAP_MODE, 0); return; } + REG_UPDATE(DWB_GAMUT_REMAP_COEF_FORMAT, DWB_GAMUT_REMAP_COEF_FORMAT, coef_format); + + gam_regs.shifts.csc_c11 = dwbc30->dwbc_shift->DWB_GAMUT_REMAPA_C11; + gam_regs.masks.csc_c11 = dwbc30->dwbc_mask->DWB_GAMUT_REMAPA_C11; + gam_regs.shifts.csc_c12 = dwbc30->dwbc_shift->DWB_GAMUT_REMAPA_C12; + gam_regs.masks.csc_c12 = dwbc30->dwbc_mask->DWB_GAMUT_REMAPA_C12; + switch (select) { case CM_GAMUT_REMAP_MODE_RAMA_COEFF: gam_regs.csc_c11_c12 = REG(DWB_GAMUT_REMAPA_C11_C12); diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hubbub.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hubbub.c index c0980da6dc49..f4414de96acc 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hubbub.c +++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hubbub.c @@ -451,6 +451,7 @@ static const struct hubbub_funcs hubbub30_funcs = { .force_wm_propagate_to_pipes = hubbub3_force_wm_propagate_to_pipes, .force_pstate_change_control = hubbub3_force_pstate_change_control, .init_watermarks = hubbub3_init_watermarks, + .hubbub_read_state = hubbub2_read_state, }; void hubbub3_construct(struct dcn20_hubbub *hubbub3, diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hubbub.h b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hubbub.h index c0bd0fb09455..7b597908b937 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hubbub.h +++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hubbub.h @@ -87,7 +87,19 @@ HUBBUB_SF(DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_A, DCHUBBUB_ARB_VM_ROW_ALLOW_DRAM_CLK_CHANGE_WATERMARK_A, mask_sh), \ HUBBUB_SF(DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_B, DCHUBBUB_ARB_VM_ROW_ALLOW_DRAM_CLK_CHANGE_WATERMARK_B, mask_sh), \ HUBBUB_SF(DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_C, DCHUBBUB_ARB_VM_ROW_ALLOW_DRAM_CLK_CHANGE_WATERMARK_C, mask_sh), \ - HUBBUB_SF(DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_D, DCHUBBUB_ARB_VM_ROW_ALLOW_DRAM_CLK_CHANGE_WATERMARK_D, mask_sh) + HUBBUB_SF(DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_D, DCHUBBUB_ARB_VM_ROW_ALLOW_DRAM_CLK_CHANGE_WATERMARK_D, mask_sh), \ + HUBBUB_SF(DCN_VM_FAULT_ADDR_MSB, DCN_VM_FAULT_ADDR_MSB, mask_sh), \ + HUBBUB_SF(DCN_VM_FAULT_ADDR_LSB, DCN_VM_FAULT_ADDR_LSB, mask_sh), \ + HUBBUB_SF(DCN_VM_FAULT_CNTL, DCN_VM_ERROR_STATUS_CLEAR, mask_sh), \ + HUBBUB_SF(DCN_VM_FAULT_CNTL, DCN_VM_ERROR_STATUS_MODE, mask_sh), \ + HUBBUB_SF(DCN_VM_FAULT_CNTL, DCN_VM_ERROR_INTERRUPT_ENABLE, mask_sh), \ + HUBBUB_SF(DCN_VM_FAULT_CNTL, DCN_VM_RANGE_FAULT_DISABLE, mask_sh), \ + HUBBUB_SF(DCN_VM_FAULT_CNTL, DCN_VM_PRQ_FAULT_DISABLE, mask_sh), \ + HUBBUB_SF(DCN_VM_FAULT_STATUS, DCN_VM_ERROR_STATUS, mask_sh), \ + HUBBUB_SF(DCN_VM_FAULT_STATUS, DCN_VM_ERROR_VMID, mask_sh), \ + HUBBUB_SF(DCN_VM_FAULT_STATUS, DCN_VM_ERROR_TABLE_LEVEL, mask_sh), \ + HUBBUB_SF(DCN_VM_FAULT_STATUS, DCN_VM_ERROR_PIPE, mask_sh), \ + HUBBUB_SF(DCN_VM_FAULT_STATUS, DCN_VM_ERROR_INTERRUPT_STATUS, mask_sh) void hubbub3_construct(struct dcn20_hubbub *hubbub3, struct dc_context *ctx, diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.c index c68e3a708a33..fafed1e4a998 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.c @@ -398,12 +398,22 @@ void dcn30_program_all_writeback_pipes_in_tree( for (i_pipe = 0; i_pipe < dc->res_pool->pipe_count; i_pipe++) { struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i_pipe]; + if (!pipe_ctx->plane_state) + continue; + if (pipe_ctx->plane_state == wb_info.writeback_source_plane) { wb_info.mpcc_inst = pipe_ctx->plane_res.mpcc_inst; break; } } - ASSERT(wb_info.mpcc_inst != -1); + + if (wb_info.mpcc_inst == -1) { + /* Disable writeback pipe and disconnect from MPCC + * if source plane has been removed + */ + dc->hwss.disable_writeback(dc, wb_info.dwb_pipe_inst); + continue; + } ASSERT(wb_info.dwb_pipe_inst < dc->res_pool->res_cap->num_dwb); dwb = dc->res_pool->dwbc[wb_info.dwb_pipe_inst]; @@ -580,22 +590,19 @@ void dcn30_init_hw(struct dc *dc) */ if (dc->config.power_down_display_on_boot) { struct dc_link *edp_links[MAX_NUM_EDP]; - struct dc_link *edp_link; + struct dc_link *edp_link = NULL; get_edp_links(dc, edp_links, &edp_num); - if (edp_num) { - for (i = 0; i < edp_num; i++) { - edp_link = edp_links[i]; - if (edp_link->link_enc->funcs->is_dig_enabled && - edp_link->link_enc->funcs->is_dig_enabled(edp_link->link_enc) && - dc->hwss.edp_backlight_control && - dc->hwss.power_down && - dc->hwss.edp_power_control) { - dc->hwss.edp_backlight_control(edp_link, false); - dc->hwss.power_down(dc); - dc->hwss.edp_power_control(edp_link, false); - } - } + if (edp_num) + edp_link = edp_links[0]; + if (edp_link && edp_link->link_enc->funcs->is_dig_enabled && + edp_link->link_enc->funcs->is_dig_enabled(edp_link->link_enc) && + dc->hwss.edp_backlight_control && + dc->hwss.power_down && + dc->hwss.edp_power_control) { + dc->hwss.edp_backlight_control(edp_link, false); + dc->hwss.power_down(dc); + dc->hwss.edp_power_control(edp_link, false); } else { for (i = 0; i < dc->link_count; i++) { struct dc_link *link = dc->links[i]; diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_optc.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_optc.c index f37e8254df21..089be7347591 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_optc.c +++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_optc.c @@ -109,11 +109,9 @@ void optc3_lock(struct timing_generator *optc) REG_SET(OTG_MASTER_UPDATE_LOCK, 0, OTG_MASTER_UPDATE_LOCK, 1); - /* Should be fast, status does not update on maximus */ - if (optc->ctx->dce_environment != DCE_ENV_FPGA_MAXIMUS) - REG_WAIT(OTG_MASTER_UPDATE_LOCK, - UPDATE_LOCK_STATUS, 1, - 1, 10); + REG_WAIT(OTG_MASTER_UPDATE_LOCK, + UPDATE_LOCK_STATUS, 1, + 1, 10); } void optc3_set_out_mux(struct timing_generator *optc, enum otg_out_mux_dest dest) diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c index 28e15ebf2f43..a0de309475a9 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c @@ -92,7 +92,7 @@ #define DC_LOGGER_INIT(logger) struct _vcs_dpi_ip_params_st dcn3_0_ip = { - .use_min_dcfclk = 1, + .use_min_dcfclk = 0, .clamp_min_dcfclk = 0, .odm_capable = 1, .gpuvm_enable = 0, @@ -2398,16 +2398,37 @@ void dcn30_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params dc->dml.soc.dispclk_dppclk_vco_speed_mhz = dc->clk_mgr->dentist_vco_freq_khz / 1000.0; if (bw_params->clk_table.entries[0].memclk_mhz) { + int max_dcfclk_mhz = 0, max_dispclk_mhz = 0, max_dppclk_mhz = 0, max_phyclk_mhz = 0; + + for (i = 0; i < MAX_NUM_DPM_LVL; i++) { + if (bw_params->clk_table.entries[i].dcfclk_mhz > max_dcfclk_mhz) + max_dcfclk_mhz = bw_params->clk_table.entries[i].dcfclk_mhz; + if (bw_params->clk_table.entries[i].dispclk_mhz > max_dispclk_mhz) + max_dispclk_mhz = bw_params->clk_table.entries[i].dispclk_mhz; + if (bw_params->clk_table.entries[i].dppclk_mhz > max_dppclk_mhz) + max_dppclk_mhz = bw_params->clk_table.entries[i].dppclk_mhz; + if (bw_params->clk_table.entries[i].phyclk_mhz > max_phyclk_mhz) + max_phyclk_mhz = bw_params->clk_table.entries[i].phyclk_mhz; + } + + if (!max_dcfclk_mhz) + max_dcfclk_mhz = dcn3_0_soc.clock_limits[0].dcfclk_mhz; + if (!max_dispclk_mhz) + max_dispclk_mhz = dcn3_0_soc.clock_limits[0].dispclk_mhz; + if (!max_dppclk_mhz) + max_dppclk_mhz = dcn3_0_soc.clock_limits[0].dppclk_mhz; + if (!max_phyclk_mhz) + max_phyclk_mhz = dcn3_0_soc.clock_limits[0].phyclk_mhz; - if (bw_params->clk_table.entries[1].dcfclk_mhz > dcfclk_sta_targets[num_dcfclk_sta_targets-1]) { + if (max_dcfclk_mhz > dcfclk_sta_targets[num_dcfclk_sta_targets-1]) { // If max DCFCLK is greater than the max DCFCLK STA target, insert into the DCFCLK STA target array - dcfclk_sta_targets[num_dcfclk_sta_targets] = bw_params->clk_table.entries[1].dcfclk_mhz; + dcfclk_sta_targets[num_dcfclk_sta_targets] = max_dcfclk_mhz; num_dcfclk_sta_targets++; - } else if (bw_params->clk_table.entries[1].dcfclk_mhz < dcfclk_sta_targets[num_dcfclk_sta_targets-1]) { + } else if (max_dcfclk_mhz < dcfclk_sta_targets[num_dcfclk_sta_targets-1]) { // If max DCFCLK is less than the max DCFCLK STA target, cap values and remove duplicates for (i = 0; i < num_dcfclk_sta_targets; i++) { - if (dcfclk_sta_targets[i] > bw_params->clk_table.entries[1].dcfclk_mhz) { - dcfclk_sta_targets[i] = bw_params->clk_table.entries[1].dcfclk_mhz; + if (dcfclk_sta_targets[i] > max_dcfclk_mhz) { + dcfclk_sta_targets[i] = max_dcfclk_mhz; break; } } @@ -2447,7 +2468,7 @@ void dcn30_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params dcfclk_mhz[num_states] = dcfclk_sta_targets[i]; dram_speed_mts[num_states++] = optimal_uclk_for_dcfclk_sta_targets[i++]; } else { - if (j < num_uclk_states && optimal_dcfclk_for_uclk[j] <= bw_params->clk_table.entries[1].dcfclk_mhz) { + if (j < num_uclk_states && optimal_dcfclk_for_uclk[j] <= max_dcfclk_mhz) { dcfclk_mhz[num_states] = optimal_dcfclk_for_uclk[j]; dram_speed_mts[num_states++] = bw_params->clk_table.entries[j++].memclk_mhz * 16; } else { @@ -2462,11 +2483,12 @@ void dcn30_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params } while (j < num_uclk_states && num_states < DC__VOLTAGE_STATES && - optimal_dcfclk_for_uclk[j] <= bw_params->clk_table.entries[1].dcfclk_mhz) { + optimal_dcfclk_for_uclk[j] <= max_dcfclk_mhz) { dcfclk_mhz[num_states] = optimal_dcfclk_for_uclk[j]; dram_speed_mts[num_states++] = bw_params->clk_table.entries[j++].memclk_mhz * 16; } + dcn3_0_soc.num_states = num_states; for (i = 0; i < dcn3_0_soc.num_states; i++) { dcn3_0_soc.clock_limits[i].state = i; dcn3_0_soc.clock_limits[i].dcfclk_mhz = dcfclk_mhz[i]; @@ -2474,9 +2496,9 @@ void dcn30_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params dcn3_0_soc.clock_limits[i].dram_speed_mts = dram_speed_mts[i]; /* Fill all states with max values of all other clocks */ - dcn3_0_soc.clock_limits[i].dispclk_mhz = bw_params->clk_table.entries[1].dispclk_mhz; - dcn3_0_soc.clock_limits[i].dppclk_mhz = bw_params->clk_table.entries[1].dppclk_mhz; - dcn3_0_soc.clock_limits[i].phyclk_mhz = bw_params->clk_table.entries[1].phyclk_mhz; + dcn3_0_soc.clock_limits[i].dispclk_mhz = max_dispclk_mhz; + dcn3_0_soc.clock_limits[i].dppclk_mhz = max_dppclk_mhz; + dcn3_0_soc.clock_limits[i].phyclk_mhz = max_phyclk_mhz; dcn3_0_soc.clock_limits[i].dtbclk_mhz = dcn3_0_soc.clock_limits[0].dtbclk_mhz; /* These clocks cannot come from bw_params, always fill from dcn3_0_soc[1] */ /* FCLK, PHYCLK_D18, SOCCLK, DSCCLK */ @@ -2489,11 +2511,6 @@ void dcn30_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params if (dc->current_state) dml_init_instance(&dc->current_state->bw_ctx.dml, &dcn3_0_soc, &dcn3_0_ip, DML_PROJECT_DCN30); } - - /* re-init DML with updated bb */ - dml_init_instance(&dc->dml, &dcn3_0_soc, &dcn3_0_ip, DML_PROJECT_DCN30); - if (dc->current_state) - dml_init_instance(&dc->current_state->bw_ctx.dml, &dcn3_0_soc, &dcn3_0_ip, DML_PROJECT_DCN30); } static const struct resource_funcs dcn30_res_pool_funcs = { diff --git a/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_hubbub.c b/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_hubbub.c index a0b96b3c083f..1e3bd2e9cdcc 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_hubbub.c +++ b/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_hubbub.c @@ -62,6 +62,7 @@ static const struct hubbub_funcs hubbub301_funcs = { .is_allow_self_refresh_enabled = hubbub1_is_allow_self_refresh_enabled, .force_wm_propagate_to_pipes = hubbub3_force_wm_propagate_to_pipes, .force_pstate_change_control = hubbub3_force_pstate_change_control, + .hubbub_read_state = hubbub2_read_state, }; void hubbub301_construct(struct dcn20_hubbub *hubbub3, diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hubbub.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hubbub.c index bb9648488900..90c73a1cb986 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hubbub.c +++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hubbub.c @@ -124,8 +124,8 @@ static void dcn31_program_compbuf_size(struct hubbub *hubbub, unsigned int compb ASSERT(hubbub2->det0_size + hubbub2->det1_size + hubbub2->det2_size + hubbub2->det3_size + compbuf_size_segments <= hubbub2->crb_size_segs); REG_UPDATE(DCHUBBUB_COMPBUF_CTRL, COMPBUF_SIZE, compbuf_size_segments); - REG_WAIT(DCHUBBUB_COMPBUF_CTRL, COMPBUF_SIZE_CURRENT, compbuf_size_segments, 1, 100); hubbub2->compbuf_size_segments = compbuf_size_segments; + ASSERT(REG_GET(DCHUBBUB_COMPBUF_CTRL, CONFIG_ERROR, &compbuf_size_segments) && !compbuf_size_segments); } } @@ -876,7 +876,33 @@ static bool hubbub31_get_dcc_compression_cap(struct hubbub *hubbub, static int hubbub31_init_dchub_sys_ctx(struct hubbub *hubbub, struct dcn_hubbub_phys_addr_config *pa_config) { - hubbub3_init_dchub_sys_ctx(hubbub, pa_config); + struct dcn20_hubbub *hubbub2 = TO_DCN20_HUBBUB(hubbub); + struct dcn_vmid_page_table_config phys_config; + + REG_SET(DCN_VM_FB_LOCATION_BASE, 0, + FB_BASE, pa_config->system_aperture.fb_base >> 24); + REG_SET(DCN_VM_FB_LOCATION_TOP, 0, + FB_TOP, pa_config->system_aperture.fb_top >> 24); + REG_SET(DCN_VM_FB_OFFSET, 0, + FB_OFFSET, pa_config->system_aperture.fb_offset >> 24); + REG_SET(DCN_VM_AGP_BOT, 0, + AGP_BOT, pa_config->system_aperture.agp_bot >> 24); + REG_SET(DCN_VM_AGP_TOP, 0, + AGP_TOP, pa_config->system_aperture.agp_top >> 24); + REG_SET(DCN_VM_AGP_BASE, 0, + AGP_BASE, pa_config->system_aperture.agp_base >> 24); + + if (pa_config->gart_config.page_table_start_addr != pa_config->gart_config.page_table_end_addr) { + phys_config.page_table_start_addr = pa_config->gart_config.page_table_start_addr >> 12; + phys_config.page_table_end_addr = pa_config->gart_config.page_table_end_addr >> 12; + phys_config.page_table_base_addr = pa_config->gart_config.page_table_base_addr; + phys_config.depth = 0; + phys_config.block_size = 0; + // Init VMID 0 based on PA config + dcn20_vmid_setup(&hubbub2->vmid[0], &phys_config); + + dcn20_vmid_setup(&hubbub2->vmid[15], &phys_config); + } dcn21_dchvm_init(hubbub); @@ -934,7 +960,8 @@ static const struct hubbub_funcs hubbub31_funcs = { .is_allow_self_refresh_enabled = hubbub1_is_allow_self_refresh_enabled, .program_det_size = dcn31_program_det_size, .program_compbuf_size = dcn31_program_compbuf_size, - .init_crb = dcn31_init_crb + .init_crb = dcn31_init_crb, + .hubbub_read_state = hubbub2_read_state, }; void hubbub31_construct(struct dcn20_hubbub *hubbub31, diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hubbub.h b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hubbub.h index 8ec98cbcbd47..e3a654bf04e8 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hubbub.h +++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hubbub.h @@ -98,6 +98,7 @@ HUBBUB_SF(DCHUBBUB_DET3_CTRL, DET3_SIZE_CURRENT, mask_sh),\ HUBBUB_SF(DCHUBBUB_COMPBUF_CTRL, COMPBUF_SIZE, mask_sh),\ HUBBUB_SF(DCHUBBUB_COMPBUF_CTRL, COMPBUF_SIZE_CURRENT, mask_sh),\ + HUBBUB_SF(DCHUBBUB_COMPBUF_CTRL, CONFIG_ERROR, mask_sh),\ HUBBUB_SF(COMPBUF_RESERVED_SPACE, COMPBUF_RESERVED_SPACE_64B, mask_sh),\ HUBBUB_SF(COMPBUF_RESERVED_SPACE, COMPBUF_RESERVED_SPACE_ZS, mask_sh),\ HUBBUB_SF(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_Z8_A, DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_Z8_A, mask_sh), \ @@ -107,7 +108,19 @@ HUBBUB_SF(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_Z8_A, DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_Z8_A, mask_sh), \ HUBBUB_SF(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_Z8_B, DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_Z8_B, mask_sh), \ HUBBUB_SF(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_Z8_C, DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_Z8_C, mask_sh), \ - HUBBUB_SF(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_Z8_D, DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_Z8_D, mask_sh) + HUBBUB_SF(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_Z8_D, DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_Z8_D, mask_sh), \ + HUBBUB_SF(DCN_VM_FAULT_ADDR_MSB, DCN_VM_FAULT_ADDR_MSB, mask_sh), \ + HUBBUB_SF(DCN_VM_FAULT_ADDR_LSB, DCN_VM_FAULT_ADDR_LSB, mask_sh), \ + HUBBUB_SF(DCN_VM_FAULT_CNTL, DCN_VM_ERROR_STATUS_CLEAR, mask_sh), \ + HUBBUB_SF(DCN_VM_FAULT_CNTL, DCN_VM_ERROR_STATUS_MODE, mask_sh), \ + HUBBUB_SF(DCN_VM_FAULT_CNTL, DCN_VM_ERROR_INTERRUPT_ENABLE, mask_sh), \ + HUBBUB_SF(DCN_VM_FAULT_CNTL, DCN_VM_RANGE_FAULT_DISABLE, mask_sh), \ + HUBBUB_SF(DCN_VM_FAULT_CNTL, DCN_VM_PRQ_FAULT_DISABLE, mask_sh), \ + HUBBUB_SF(DCN_VM_FAULT_STATUS, DCN_VM_ERROR_STATUS, mask_sh), \ + HUBBUB_SF(DCN_VM_FAULT_STATUS, DCN_VM_ERROR_VMID, mask_sh), \ + HUBBUB_SF(DCN_VM_FAULT_STATUS, DCN_VM_ERROR_TABLE_LEVEL, mask_sh), \ + HUBBUB_SF(DCN_VM_FAULT_STATUS, DCN_VM_ERROR_PIPE, mask_sh), \ + HUBBUB_SF(DCN_VM_FAULT_STATUS, DCN_VM_ERROR_INTERRUPT_STATUS, mask_sh) void hubbub31_construct(struct dcn20_hubbub *hubbub3, diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hwseq.c index 8a2119d8ca0d..3f2333ec67e2 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hwseq.c @@ -226,6 +226,7 @@ void dcn31_init_hw(struct dc *dc) if (dc->config.power_down_display_on_boot) { struct dc_link *edp_links[MAX_NUM_EDP]; struct dc_link *edp_link; + bool power_down = false; get_edp_links(dc, edp_links, &edp_num); if (edp_num) { @@ -239,9 +240,11 @@ void dcn31_init_hw(struct dc *dc) dc->hwss.edp_backlight_control(edp_link, false); dc->hwss.power_down(dc); dc->hwss.edp_power_control(edp_link, false); + power_down = true; } } - } else { + } + if (!power_down) { for (i = 0; i < dc->link_count; i++) { struct dc_link *link = dc->links[i]; @@ -607,20 +610,3 @@ bool dcn31_is_abm_supported(struct dc *dc, } return false; } - -static void apply_riommu_invalidation_wa(struct dc *dc) -{ - struct dce_hwseq *hws = dc->hwseq; - - if (!hws->wa.early_riommu_invalidation) - return; - - REG_UPDATE(DCHUBBUB_ARB_HOSTVM_CNTL, DISABLE_HOSTVM_FORCE_ALLOW_PSTATE, 0); -} - -void dcn31_init_pipes(struct dc *dc, struct dc_state *context) -{ - dcn10_init_pipes(dc, context); - apply_riommu_invalidation_wa(dc); - -} diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_init.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_init.c index b30d923471cb..40011cd3c8ef 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_init.c +++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_init.c @@ -104,7 +104,7 @@ static const struct hw_sequencer_funcs dcn31_funcs = { }; static const struct hwseq_private_funcs dcn31_private_funcs = { - .init_pipes = dcn31_init_pipes, + .init_pipes = dcn10_init_pipes, .update_plane_addr = dcn20_update_plane_addr, .plane_atomic_disconnect = dcn10_plane_atomic_disconnect, .update_mpcc = dcn20_update_mpcc, diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c index cd3248dc31d8..a7702d3c75cd 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c @@ -1302,7 +1302,6 @@ static struct dce_hwseq *dcn31_hwseq_create( hws->regs = &hwseq_reg; hws->shifts = &hwseq_shift; hws->masks = &hwseq_mask; - hws->wa.early_riommu_invalidation = true; } return hws; } diff --git a/drivers/gpu/drm/amd/display/dc/dml/Makefile b/drivers/gpu/drm/amd/display/dc/dml/Makefile index 45862167e6ce..56055df2e8d2 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dml/Makefile @@ -58,6 +58,8 @@ CFLAGS_$(AMDDALPATH)/dc/dml/display_mode_lib.o := $(dml_ccflags) ifdef CONFIG_DRM_AMD_DC_DCN CFLAGS_$(AMDDALPATH)/dc/dml/display_mode_vba.o := $(dml_ccflags) +CFLAGS_$(AMDDALPATH)/dc/dml/dcn2x/dcn2x.o := $(dml_ccflags) +CFLAGS_$(AMDDALPATH)/dc/dml/dcn20/display_mode_vba_20.o := $(dml_ccflags) CFLAGS_$(AMDDALPATH)/dc/dml/dcn20/display_mode_vba_20.o := $(dml_ccflags) CFLAGS_$(AMDDALPATH)/dc/dml/dcn20/display_rq_dlg_calc_20.o := $(dml_ccflags) CFLAGS_$(AMDDALPATH)/dc/dml/dcn20/display_mode_vba_20v2.o := $(dml_ccflags) @@ -70,6 +72,7 @@ CFLAGS_$(AMDDALPATH)/dc/dml/dcn31/display_mode_vba_31.o := $(dml_ccflags) $(fram CFLAGS_$(AMDDALPATH)/dc/dml/dcn31/display_rq_dlg_calc_31.o := $(dml_ccflags) CFLAGS_$(AMDDALPATH)/dc/dml/display_mode_lib.o := $(dml_ccflags) CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/display_mode_vba.o := $(dml_rcflags) +CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn2x/dcn2x.o := $(dml_rcflags) CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn20/display_mode_vba_20.o := $(dml_rcflags) CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn20/display_rq_dlg_calc_20.o := $(dml_rcflags) CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn20/display_mode_vba_20v2.o := $(dml_rcflags) @@ -91,6 +94,7 @@ DML = display_mode_lib.o display_rq_dlg_helpers.o dml1_display_rq_dlg_calc.o \ ifdef CONFIG_DRM_AMD_DC_DCN DML += display_mode_vba.o dcn20/display_rq_dlg_calc_20.o dcn20/display_mode_vba_20.o +DML += dcn2x/dcn2x.o DML += dcn20/display_rq_dlg_calc_20v2.o dcn20/display_mode_vba_20v2.o DML += dcn21/display_rq_dlg_calc_21.o dcn21/display_mode_vba_21.o DML += dcn30/display_mode_vba_30.o dcn30/display_rq_dlg_calc_30.o diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20.c b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20.c index 799bae229e67..2091dd8c252d 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20.c @@ -488,7 +488,7 @@ static void get_meta_and_pte_attr(struct display_mode_lib *mode_lib, log2_meta_req_bytes = 6; // meta request is 64b and is 8x8byte meta element // each 64b meta request for dcn is 8x8 meta elements and - // a meta element covers one 256b block of the the data surface. + // a meta element covers one 256b block of the data surface. log2_meta_req_height = log2_blk256_height + 3; // meta req is 8x8 byte, each byte represent 1 blk256 log2_meta_req_width = log2_meta_req_bytes + 8 - log2_bytes_per_element - log2_meta_req_height; diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20v2.c b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20v2.c index 6a6d5970d1d5..1a0c14e465fa 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20v2.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20v2.c @@ -488,7 +488,7 @@ static void get_meta_and_pte_attr(struct display_mode_lib *mode_lib, log2_meta_req_bytes = 6; // meta request is 64b and is 8x8byte meta element // each 64b meta request for dcn is 8x8 meta elements and - // a meta element covers one 256b block of the the data surface. + // a meta element covers one 256b block of the data surface. log2_meta_req_height = log2_blk256_height + 3; // meta req is 8x8 byte, each byte represent 1 blk256 log2_meta_req_width = log2_meta_req_bytes + 8 - log2_bytes_per_element - log2_meta_req_height; diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_mode_vba_21.c b/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_mode_vba_21.c index 6655bb99fdfd..4136eb8256cb 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_mode_vba_21.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_mode_vba_21.c @@ -2270,7 +2270,7 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman &locals->UrgentBurstFactorLumaPre[k], &locals->UrgentBurstFactorChroma[k], &locals->UrgentBurstFactorChromaPre[k], - &locals->NotEnoughUrgentLatencyHiding, + &locals->NotEnoughUrgentLatencyHiding[0][0], &locals->NotEnoughUrgentLatencyHidingPre); if (mode_lib->vba.UseUrgentBurstBandwidth == false) { @@ -2303,7 +2303,8 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman } mode_lib->vba.FractionOfUrgentBandwidth = MaxTotalRDBandwidthNoUrgentBurst / mode_lib->vba.ReturnBW; - if (MaxTotalRDBandwidth <= mode_lib->vba.ReturnBW && locals->NotEnoughUrgentLatencyHiding == 0 && locals->NotEnoughUrgentLatencyHidingPre == 0 && !VRatioPrefetchMoreThan4 + if (MaxTotalRDBandwidth <= mode_lib->vba.ReturnBW && locals->NotEnoughUrgentLatencyHiding[0][0] == 0 && + locals->NotEnoughUrgentLatencyHidingPre == 0 && !VRatioPrefetchMoreThan4 && !DestinationLineTimesForPrefetchLessThan2) mode_lib->vba.PrefetchModeSupported = true; else { @@ -4824,7 +4825,7 @@ void dml21_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l &locals->UrgentBurstFactorLumaPre[k], &locals->UrgentBurstFactorChroma[k], &locals->UrgentBurstFactorChromaPre[k], - &locals->NotEnoughUrgentLatencyHiding, + &locals->NotEnoughUrgentLatencyHiding[0][0], &locals->NotEnoughUrgentLatencyHidingPre); if (mode_lib->vba.UseUrgentBurstBandwidth == false) { @@ -4851,13 +4852,13 @@ void dml21_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l } locals->BandwidthWithoutPrefetchSupported[i][0] = true; if (mode_lib->vba.MaximumReadBandwidthWithoutPrefetch > locals->ReturnBWPerState[i][0] - || locals->NotEnoughUrgentLatencyHiding == 1) { + || locals->NotEnoughUrgentLatencyHiding[0][0] == 1) { locals->BandwidthWithoutPrefetchSupported[i][0] = false; } locals->PrefetchSupported[i][j] = true; if (mode_lib->vba.MaximumReadBandwidthWithPrefetch > locals->ReturnBWPerState[i][0] - || locals->NotEnoughUrgentLatencyHiding == 1 + || locals->NotEnoughUrgentLatencyHiding[0][0] == 1 || locals->NotEnoughUrgentLatencyHidingPre == 1) { locals->PrefetchSupported[i][j] = false; } diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_rq_dlg_calc_21.c b/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_rq_dlg_calc_21.c index dc1c81a6e377..287e31052b30 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_rq_dlg_calc_21.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_rq_dlg_calc_21.c @@ -482,7 +482,7 @@ static void get_meta_and_pte_attr( log2_meta_req_bytes = 6; // meta request is 64b and is 8x8byte meta element // each 64b meta request for dcn is 8x8 meta elements and - // a meta element covers one 256b block of the the data surface. + // a meta element covers one 256b block of the data surface. log2_meta_req_height = log2_blk256_height + 3; // meta req is 8x8 byte, each byte represent 1 blk256 log2_meta_req_width = log2_meta_req_bytes + 8 - log2_bytes_per_element - log2_meta_req_height; diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn2x/dcn2x.c b/drivers/gpu/drm/amd/display/dc/dml/dcn2x/dcn2x.c new file mode 100644 index 000000000000..c58522436291 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn2x/dcn2x.c @@ -0,0 +1,102 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright 2021 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#include "resource.h" + +#include "dcn2x.h" + +/** + * DOC: DCN2x FPU manipulation Overview + * + * The DCN architecture relies on FPU operations, which require special + * compilation flags and the use of kernel_fpu_begin/end functions; ideally, we + * want to avoid spreading FPU access across multiple files. With this idea in + * mind, this file aims to centralize all DCN20 and DCN2.1 (DCN2x) functions + * that require FPU access in a single place. Code in this file follows the + * following code pattern: + * + * 1. Functions that use FPU operations should be isolated in static functions. + * 2. The FPU functions should have the noinline attribute to ensure anything + * that deals with FP register is contained within this call. + * 3. All function that needs to be accessed outside this file requires a + * public interface that not uses any FPU reference. + * 4. Developers **must not** use DC_FP_START/END in this file, but they need + * to ensure that the caller invokes it before access any function available + * in this file. For this reason, public functions in this file must invoke + * dc_assert_fp_enabled(); + * + * Let's expand a little bit more the idea in the code pattern. To fully + * isolate FPU operations in a single place, we must avoid situations where + * compilers spill FP values to registers due to FP enable in a specific C + * file. Note that even if we isolate all FPU functions in a single file and + * call its interface from other files, the compiler might enable the use of + * FPU before we call DC_FP_START. Nevertheless, it is the programmer's + * responsibility to invoke DC_FP_START/END in the correct place. To highlight + * situations where developers forgot to use the FP protection before calling + * the DC FPU interface functions, we introduce a helper that checks if the + * function is invoked under FP protection. If not, it will trigger a kernel + * warning. + */ + +void dcn20_populate_dml_writeback_from_context(struct dc *dc, + struct resource_context *res_ctx, + display_e2e_pipe_params_st *pipes) +{ + int pipe_cnt, i; + + dc_assert_fp_enabled(); + + for (i = 0, pipe_cnt = 0; i < dc->res_pool->pipe_count; i++) { + struct dc_writeback_info *wb_info = &res_ctx->pipe_ctx[i].stream->writeback_info[0]; + + if (!res_ctx->pipe_ctx[i].stream) + continue; + + /* Set writeback information */ + pipes[pipe_cnt].dout.wb_enable = (wb_info->wb_enabled == true) ? 1 : 0; + pipes[pipe_cnt].dout.num_active_wb++; + pipes[pipe_cnt].dout.wb.wb_src_height = wb_info->dwb_params.cnv_params.crop_height; + pipes[pipe_cnt].dout.wb.wb_src_width = wb_info->dwb_params.cnv_params.crop_width; + pipes[pipe_cnt].dout.wb.wb_dst_width = wb_info->dwb_params.dest_width; + pipes[pipe_cnt].dout.wb.wb_dst_height = wb_info->dwb_params.dest_height; + pipes[pipe_cnt].dout.wb.wb_htaps_luma = 1; + pipes[pipe_cnt].dout.wb.wb_vtaps_luma = 1; + pipes[pipe_cnt].dout.wb.wb_htaps_chroma = wb_info->dwb_params.scaler_taps.h_taps_c; + pipes[pipe_cnt].dout.wb.wb_vtaps_chroma = wb_info->dwb_params.scaler_taps.v_taps_c; + pipes[pipe_cnt].dout.wb.wb_hratio = 1.0; + pipes[pipe_cnt].dout.wb.wb_vratio = 1.0; + if (wb_info->dwb_params.out_format == dwb_scaler_mode_yuv420) { + if (wb_info->dwb_params.output_depth == DWB_OUTPUT_PIXEL_DEPTH_8BPC) + pipes[pipe_cnt].dout.wb.wb_pixel_format = dm_420_8; + else + pipes[pipe_cnt].dout.wb.wb_pixel_format = dm_420_10; + } else { + pipes[pipe_cnt].dout.wb.wb_pixel_format = dm_444_32; + } + + pipe_cnt++; + } +} diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn2x/dcn2x.h b/drivers/gpu/drm/amd/display/dc/dml/dcn2x/dcn2x.h new file mode 100644 index 000000000000..331547ba0713 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn2x/dcn2x.h @@ -0,0 +1,34 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright 2021 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#ifndef __DCN2X_H__ +#define __DCN2X_H__ + +void dcn20_populate_dml_writeback_from_context(struct dc *dc, + struct resource_context *res_ctx, + display_e2e_pipe_params_st *pipes); + +#endif /* __DCN2X_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.c b/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.c index 9d2016d8fafe..e3d9f1decdfc 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.c @@ -2596,7 +2596,7 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman } } - v->NotEnoughUrgentLatencyHiding = false; + v->NotEnoughUrgentLatencyHiding[0][0] = false; v->NotEnoughUrgentLatencyHidingPre = false; for (k = 0; k < v->NumberOfActivePlanes; ++k) { @@ -2681,7 +2681,7 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman if (v->VRatioPrefetchY[k] > 4 || v->VRatioPrefetchC[k] > 4) VRatioPrefetchMoreThan4 = true; if (v->NoUrgentLatencyHiding[k] == true) - v->NotEnoughUrgentLatencyHiding = true; + v->NotEnoughUrgentLatencyHiding[0][0] = true; if (v->NoUrgentLatencyHidingPre[k] == true) v->NotEnoughUrgentLatencyHidingPre = true; @@ -2689,7 +2689,8 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman v->FractionOfUrgentBandwidth = MaxTotalRDBandwidthNoUrgentBurst / v->ReturnBW; - if (MaxTotalRDBandwidth <= v->ReturnBW && v->NotEnoughUrgentLatencyHiding == 0 && v->NotEnoughUrgentLatencyHidingPre == 0 && !VRatioPrefetchMoreThan4 + if (MaxTotalRDBandwidth <= v->ReturnBW && v->NotEnoughUrgentLatencyHiding[0][0] == 0 + && v->NotEnoughUrgentLatencyHidingPre == 0 && !VRatioPrefetchMoreThan4 && !DestinationLineTimesForPrefetchLessThan2) v->PrefetchModeSupported = true; else { @@ -2794,8 +2795,9 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman } v->VStartupLines = v->VStartupLines + 1; - v->PrefetchAndImmediateFlipSupported = (v->PrefetchModeSupported == true && ((!v->ImmediateFlipSupport && !v->HostVMEnable && v->ImmediateFlipRequirement != dm_immediate_flip_required) || v->ImmediateFlipSupported)) ? true : false; - + v->PrefetchModeSupported = (v->PrefetchModeSupported == true && ((!v->ImmediateFlipSupport && + !v->HostVMEnable && v->ImmediateFlipRequirement[0] != dm_immediate_flip_required) || + v->ImmediateFlipSupported)) ? true : false; } while (!v->PrefetchModeSupported && v->VStartupLines <= v->MaximumMaxVStartupLines); ASSERT(v->PrefetchModeSupported); @@ -3642,8 +3644,7 @@ static double TruncToValidBPP( void dml30_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_lib) { struct vba_vars_st *v = &mode_lib->vba; - int MinPrefetchMode = 0; - int MaxPrefetchMode = 2; + int MinPrefetchMode, MaxPrefetchMode; int i; unsigned int j, k, m; bool EnoughWritebackUnits = true; @@ -3655,6 +3656,10 @@ void dml30_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l /*MODE SUPPORT, VOLTAGE STATE AND SOC CONFIGURATION*/ + CalculateMinAndMaxPrefetchMode( + mode_lib->vba.AllowDRAMSelfRefreshOrDRAMClockChangeInVblank, + &MinPrefetchMode, &MaxPrefetchMode); + /*Scale Ratio, taps Support Check*/ v->ScaleRatioAndTapsSupport = true; @@ -4753,7 +4758,7 @@ void dml30_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l v->HostVMMinPageSize, v->HostVMMaxNonCachedPageTableLevels, v->DynamicMetadataVMEnabled, - v->ImmediateFlipRequirement, + v->ImmediateFlipRequirement[0], v->ProgressiveToInterlaceUnitInOPP, v->MaxAveragePercentOfIdealSDPPortBWDisplayCanUseInNormalSystemOperation, v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData, @@ -5164,7 +5169,7 @@ void dml30_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l v->NextMaxVStartup = v->NextMaxVStartup - 1; } } while (!((v->PrefetchSupported[i][j] == true && v->DynamicMetadataSupported[i][j] == true && v->VRatioInPrefetchSupported[i][j] == true - && ((v->HostVMEnable == false && v->ImmediateFlipRequirement != dm_immediate_flip_required) + && ((v->HostVMEnable == false && v->ImmediateFlipRequirement[0] != dm_immediate_flip_required) || v->ImmediateFlipSupportedForState[i][j] == true)) || (v->NextMaxVStartup == v->MaxMaxVStartup[i][j] && NextPrefetchModeState > MaxPrefetchMode))); @@ -5305,7 +5310,7 @@ void dml30_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l && ViewportExceedsSurface == 0 && v->PrefetchSupported[i][j] == 1 && v->DynamicMetadataSupported[i][j] == 1 && v->TotalVerticalActiveBandwidthSupport[i][j] == 1 && v->VRatioInPrefetchSupported[i][j] == 1 && v->PTEBufferSizeNotExceeded[i][j] == 1 && v->NonsupportedDSCInputBPC == 0 - && ((v->HostVMEnable == 0 && v->ImmediateFlipRequirement != dm_immediate_flip_required) + && ((v->HostVMEnable == 0 && v->ImmediateFlipRequirement[0] != dm_immediate_flip_required) || v->ImmediateFlipSupportedForState[i][j] == true)) { v->ModeSupport[i][j] = true; } else { diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_rq_dlg_calc_30.c b/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_rq_dlg_calc_30.c index 04601a767a8f..0d934fae1c3a 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_rq_dlg_calc_30.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_rq_dlg_calc_30.c @@ -549,7 +549,7 @@ static void get_meta_and_pte_attr(struct display_mode_lib *mode_lib, log2_meta_req_bytes = 6; // meta request is 64b and is 8x8byte meta element // each 64b meta request for dcn is 8x8 meta elements and - // a meta element covers one 256b block of the the data surface. + // a meta element covers one 256b block of the data surface. log2_meta_req_height = log2_blk256_height + 3; // meta req is 8x8 byte, each byte represent 1 blk256 log2_meta_req_width = log2_meta_req_bytes + 8 - log2_bytes_per_element - log2_meta_req_height; diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c b/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c index a9667068c690..ce55c9caf9a2 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c @@ -3036,10 +3036,9 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman } v->PrefetchAndImmediateFlipSupported = - (v->PrefetchModeSupported == true - && ((!v->ImmediateFlipSupport && !v->HostVMEnable - && v->ImmediateFlipRequirement != dm_immediate_flip_required) || v->ImmediateFlipSupported)) ? - true : false; + (v->PrefetchModeSupported == true && ((!v->ImmediateFlipSupport && !v->HostVMEnable + && v->ImmediateFlipRequirement[0] != dm_immediate_flip_required) || + v->ImmediateFlipSupported)) ? true : false; #ifdef __DML_VBA_DEBUG__ dml_print("DML::%s: PrefetchModeSupported %d\n", __func__, v->PrefetchModeSupported); dml_print("DML::%s: ImmediateFlipRequirement %d\n", __func__, v->ImmediateFlipRequirement == dm_immediate_flip_required); @@ -5103,7 +5102,7 @@ void dml31_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l v->HostVMMinPageSize, v->HostVMMaxNonCachedPageTableLevels, v->DynamicMetadataVMEnabled, - v->ImmediateFlipRequirement, + v->ImmediateFlipRequirement[0], v->ProgressiveToInterlaceUnitInOPP, v->MaxAveragePercentOfIdealFabricAndSDPPortBWDisplayCanUseInNormalSystemOperation, v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency, @@ -5542,7 +5541,8 @@ void dml31_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l } v->NextPrefetchMode = v->NextPrefetchMode + 1; } while (!((v->PrefetchSupported[i][j] == true && v->DynamicMetadataSupported[i][j] == true && v->VRatioInPrefetchSupported[i][j] == true - && ((v->HostVMEnable == false && v->ImmediateFlipRequirement != dm_immediate_flip_required) + && ((v->HostVMEnable == false && + v->ImmediateFlipRequirement[0] != dm_immediate_flip_required) || v->ImmediateFlipSupportedForState[i][j] == true)) || (v->NextMaxVStartup == v->MaxMaxVStartup[i][j] && NextPrefetchModeState > MaxPrefetchMode))); @@ -5702,7 +5702,8 @@ void dml31_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l && v->PrefetchSupported[i][j] == true && v->DynamicMetadataSupported[i][j] == true && v->TotalVerticalActiveBandwidthSupport[i][j] == true && v->VRatioInPrefetchSupported[i][j] == true && v->PTEBufferSizeNotExceeded[i][j] == true && v->NonsupportedDSCInputBPC == false - && ((v->HostVMEnable == false && v->ImmediateFlipRequirement != dm_immediate_flip_required) + && ((v->HostVMEnable == false + && v->ImmediateFlipRequirement[0] != dm_immediate_flip_required) || v->ImmediateFlipSupportedForState[i][j] == true) && FMTBufferExceeded == false) { v->ModeSupport[i][j] = true; diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_rq_dlg_calc_31.c b/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_rq_dlg_calc_31.c index 3def093ef88e..c23905bc733a 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_rq_dlg_calc_31.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_rq_dlg_calc_31.c @@ -563,7 +563,7 @@ static void get_meta_and_pte_attr( log2_meta_req_bytes = 6; // meta request is 64b and is 8x8byte meta element // each 64b meta request for dcn is 8x8 meta elements and - // a meta element covers one 256b block of the the data surface. + // a meta element covers one 256b block of the data surface. log2_meta_req_height = log2_blk256_height + 3; // meta req is 8x8 byte, each byte represent 1 blk256 log2_meta_req_width = log2_meta_req_bytes + 8 - log2_bytes_per_element - log2_meta_req_height; meta_req_width = 1 << log2_meta_req_width; diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_enums.h b/drivers/gpu/drm/amd/display/dc/dml/display_mode_enums.h index 64f9c735f74d..1051ca1a23b8 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_enums.h +++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_enums.h @@ -109,7 +109,9 @@ enum clock_change_support { }; enum output_standard { - dm_std_uninitialized = 0, dm_std_cvtr2, dm_std_cvt + dm_std_uninitialized = 0, + dm_std_cvtr2, + dm_std_cvt }; enum mpc_combine_affinity { diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h b/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h index 64daa0507393..d46a2733024c 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h +++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h @@ -536,6 +536,8 @@ struct _vcs_dpi_display_rq_regs_st { unsigned int mrq_expansion_mode; unsigned int crq_expansion_mode; unsigned int plane1_base_address; + unsigned int aperture_low_addr; // bits [47:18] + unsigned int aperture_high_addr; // bits [47:18] }; struct _vcs_dpi_display_dlg_sys_params_st { diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.c b/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.c index d3b1b6d4ce2f..0fad15020c74 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.c +++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.c @@ -244,6 +244,8 @@ static void fetch_socbb_params(struct display_mode_lib *mode_lib) mode_lib->vba.DRAMClockChangeSupportsVActive = !soc->disable_dram_clock_change_vactive_support || mode_lib->vba.DummyPStateCheck; mode_lib->vba.AllowDramClockChangeOneDisplayVactive = soc->allow_dram_clock_one_display_vactive; + mode_lib->vba.AllowDRAMSelfRefreshOrDRAMClockChangeInVblank = + soc->allow_dram_self_refresh_or_dram_clock_change_in_vblank; mode_lib->vba.Downspreading = soc->downspread_percent; mode_lib->vba.DRAMChannelWidth = soc->dram_channel_width_bytes; // new! @@ -396,7 +398,6 @@ static void fetch_pipe_params(struct display_mode_lib *mode_lib) mode_lib->vba.NumberOfActivePlanes = 0; mode_lib->vba.ImmediateFlipSupport = false; - mode_lib->vba.ImmediateFlipRequirement = dm_immediate_flip_not_required; for (j = 0; j < mode_lib->vba.cache_num_pipes; ++j) { display_pipe_source_params_st *src = &pipes[j].pipe.src; display_pipe_dest_params_st *dst = &pipes[j].pipe.dest; @@ -409,6 +410,7 @@ static void fetch_pipe_params(struct display_mode_lib *mode_lib) continue; visited[j] = true; + mode_lib->vba.ImmediateFlipRequirement[j] = dm_immediate_flip_not_required; mode_lib->vba.pipe_plane[j] = mode_lib->vba.NumberOfActivePlanes; mode_lib->vba.DPPPerPlane[mode_lib->vba.NumberOfActivePlanes] = 1; mode_lib->vba.SourceScan[mode_lib->vba.NumberOfActivePlanes] = @@ -667,9 +669,9 @@ static void fetch_pipe_params(struct display_mode_lib *mode_lib) mode_lib->vba.ViewportHeightChroma[mode_lib->vba.NumberOfActivePlanes] = src->viewport_height_max / vdiv_c; } - if (pipes[k].pipe.src.immediate_flip) { + if (pipes[j].pipe.src.immediate_flip) { mode_lib->vba.ImmediateFlipSupport = true; - mode_lib->vba.ImmediateFlipRequirement = dm_immediate_flip_required; + mode_lib->vba.ImmediateFlipRequirement[j] = dm_immediate_flip_required; } mode_lib->vba.NumberOfActivePlanes++; @@ -733,8 +735,6 @@ static void fetch_pipe_params(struct display_mode_lib *mode_lib) mode_lib->vba.OverrideHostVMPageTableLevels; } - mode_lib->vba.AllowDRAMSelfRefreshOrDRAMClockChangeInVblank = dm_try_to_allow_self_refresh_and_mclk_switch; - if (mode_lib->vba.OverrideGPUVMPageTableLevels) mode_lib->vba.GPUVMMaxPageTableLevels = mode_lib->vba.OverrideGPUVMPageTableLevels; @@ -845,9 +845,10 @@ void PixelClockAdjustmentForProgressiveToInterlaceUnit(struct display_mode_lib * //Progressive To Interlace Unit Effect for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { + mode_lib->vba.PixelClockBackEnd[k] = mode_lib->vba.PixelClock[k]; if (mode_lib->vba.Interlace[k] == 1 && mode_lib->vba.ProgressiveToInterlaceUnitInOPP == true) { - mode_lib->vba.PixelClock[k] = 2 * mode_lib->vba.PixelClockBackEnd[k]; + mode_lib->vba.PixelClock[k] = 2 * mode_lib->vba.PixelClock[k]; } } } @@ -890,8 +891,9 @@ void ModeSupportAndSystemConfiguration(struct display_mode_lib *mode_lib) mode_lib->vba.DISPCLK = soc->clock_limits[mode_lib->vba.VoltageLevel].dispclk_mhz; // Total Available Pipes Support Check - for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) + for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { total_pipes += mode_lib->vba.DPPPerPlane[k]; + } ASSERT(total_pipes <= DC__NUM_DPP__MAX); } diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h b/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h index d18a021d4d32..90e87961fe3e 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h +++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h @@ -676,7 +676,7 @@ struct vba_vars_st { double AlignedDCCMetaPitchY[DC__NUM_DPP__MAX]; double AlignedDCCMetaPitchC[DC__NUM_DPP__MAX]; - unsigned int NotEnoughUrgentLatencyHiding; + unsigned int NotEnoughUrgentLatencyHiding[DC__VOLTAGE_STATES][2]; unsigned int NotEnoughUrgentLatencyHidingPre; int PTEBufferSizeInRequestsForLuma; int PTEBufferSizeInRequestsForChroma; @@ -877,7 +877,7 @@ struct vba_vars_st { int PercentMarginOverMinimumRequiredDCFCLK; bool DynamicMetadataSupported[DC__VOLTAGE_STATES][2]; - enum immediate_flip_requirement ImmediateFlipRequirement; + enum immediate_flip_requirement ImmediateFlipRequirement[DC__NUM_DPP__MAX]; unsigned int DETBufferSizeYThisState[DC__NUM_DPP__MAX]; unsigned int DETBufferSizeCThisState[DC__NUM_DPP__MAX]; bool NoUrgentLatencyHiding[DC__NUM_DPP__MAX]; diff --git a/drivers/gpu/drm/amd/display/dc/dml/dml1_display_rq_dlg_calc.c b/drivers/gpu/drm/amd/display/dc/dml/dml1_display_rq_dlg_calc.c index 414da64f5734..8f2b1684c231 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dml1_display_rq_dlg_calc.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dml1_display_rq_dlg_calc.c @@ -456,7 +456,7 @@ static void dml1_rq_dlg_get_row_heights( log2_meta_req_bytes = 6; /* meta request is 64b and is 8x8byte meta element */ /* each 64b meta request for dcn is 8x8 meta elements and - * a meta element covers one 256b block of the the data surface. + * a meta element covers one 256b block of the data surface. */ log2_meta_req_height = log2_blk256_height + 3; /* meta req is 8x8 */ log2_meta_req_width = log2_meta_req_bytes + 8 - log2_bytes_per_element @@ -718,7 +718,7 @@ static void get_surf_rq_param( log2_meta_req_bytes = 6; /* meta request is 64b and is 8x8byte meta element */ /* each 64b meta request for dcn is 8x8 meta elements and - * a meta element covers one 256b block of the the data surface. + * a meta element covers one 256b block of the data surface. */ log2_meta_req_height = log2_blk256_height + 3; /* meta req is 8x8 byte, each byte represent 1 blk256 */ log2_meta_req_width = log2_meta_req_bytes + 8 - log2_bytes_per_element diff --git a/drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c b/drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c index f403d8e84a8c..f5b7da0e64c0 100644 --- a/drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c +++ b/drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c @@ -28,6 +28,7 @@ #include <drm/drm_dp_helper.h> #include "dc.h" #include "rc_calc.h" +#include "fixed31_32.h" /* This module's internal functions */ @@ -39,6 +40,47 @@ static bool dsc_policy_enable_dsc_when_not_needed; static bool dsc_policy_disable_dsc_stream_overhead; +/* Forward Declerations */ +static void get_dsc_bandwidth_range( + const uint32_t min_bpp_x16, + const uint32_t max_bpp_x16, + const uint32_t num_slices_h, + const struct dsc_enc_caps *dsc_caps, + const struct dc_crtc_timing *timing, + struct dc_dsc_bw_range *range); + +static uint32_t compute_bpp_x16_from_target_bandwidth( + const uint32_t bandwidth_in_kbps, + const struct dc_crtc_timing *timing, + const uint32_t num_slices_h, + const uint32_t bpp_increment_div, + const bool is_dp); + +static void get_dsc_enc_caps( + const struct display_stream_compressor *dsc, + struct dsc_enc_caps *dsc_enc_caps, + int pixel_clock_100Hz); + +static bool intersect_dsc_caps( + const struct dsc_dec_dpcd_caps *dsc_sink_caps, + const struct dsc_enc_caps *dsc_enc_caps, + enum dc_pixel_encoding pixel_encoding, + struct dsc_enc_caps *dsc_common_caps); + +static bool setup_dsc_config( + const struct dsc_dec_dpcd_caps *dsc_sink_caps, + const struct dsc_enc_caps *dsc_enc_caps, + int target_bandwidth_kbps, + const struct dc_crtc_timing *timing, + int min_slice_height_override, + int max_dsc_target_bpp_limit_override_x16, + struct dc_dsc_config *dsc_cfg); + +static struct fixed31_32 compute_dsc_max_bandwidth_overhead( + const struct dc_crtc_timing *timing, + const int num_slices_h, + const bool is_dp); + static bool dsc_buff_block_size_from_dpcd(int dpcd_buff_block_size, int *buff_block_size) { @@ -171,10 +213,164 @@ static bool dsc_bpp_increment_div_from_dpcd(uint8_t bpp_increment_dpcd, uint32_t return true; } + + +bool dc_dsc_parse_dsc_dpcd(const struct dc *dc, + const uint8_t *dpcd_dsc_basic_data, + const uint8_t *dpcd_dsc_branch_decoder_caps, + struct dsc_dec_dpcd_caps *dsc_sink_caps) +{ + if (!dpcd_dsc_basic_data) + return false; + + dsc_sink_caps->is_dsc_supported = + (dpcd_dsc_basic_data[DP_DSC_SUPPORT - DP_DSC_SUPPORT] & DP_DSC_DECOMPRESSION_IS_SUPPORTED) != 0; + if (!dsc_sink_caps->is_dsc_supported) + return false; + + dsc_sink_caps->dsc_version = dpcd_dsc_basic_data[DP_DSC_REV - DP_DSC_SUPPORT]; + + { + int buff_block_size; + int buff_size; + + if (!dsc_buff_block_size_from_dpcd(dpcd_dsc_basic_data[DP_DSC_RC_BUF_BLK_SIZE - DP_DSC_SUPPORT], + &buff_block_size)) + return false; + + buff_size = dpcd_dsc_basic_data[DP_DSC_RC_BUF_SIZE - DP_DSC_SUPPORT] + 1; + dsc_sink_caps->rc_buffer_size = buff_size * buff_block_size; + } + + dsc_sink_caps->slice_caps1.raw = dpcd_dsc_basic_data[DP_DSC_SLICE_CAP_1 - DP_DSC_SUPPORT]; + if (!dsc_line_buff_depth_from_dpcd(dpcd_dsc_basic_data[DP_DSC_LINE_BUF_BIT_DEPTH - DP_DSC_SUPPORT], + &dsc_sink_caps->lb_bit_depth)) + return false; + + dsc_sink_caps->is_block_pred_supported = + (dpcd_dsc_basic_data[DP_DSC_BLK_PREDICTION_SUPPORT - DP_DSC_SUPPORT] & + DP_DSC_BLK_PREDICTION_IS_SUPPORTED) != 0; + + dsc_sink_caps->edp_max_bits_per_pixel = + dpcd_dsc_basic_data[DP_DSC_MAX_BITS_PER_PIXEL_LOW - DP_DSC_SUPPORT] | + dpcd_dsc_basic_data[DP_DSC_MAX_BITS_PER_PIXEL_HI - DP_DSC_SUPPORT] << 8; + + dsc_sink_caps->color_formats.raw = dpcd_dsc_basic_data[DP_DSC_DEC_COLOR_FORMAT_CAP - DP_DSC_SUPPORT]; + dsc_sink_caps->color_depth.raw = dpcd_dsc_basic_data[DP_DSC_DEC_COLOR_DEPTH_CAP - DP_DSC_SUPPORT]; + + { + int dpcd_throughput = dpcd_dsc_basic_data[DP_DSC_PEAK_THROUGHPUT - DP_DSC_SUPPORT]; + + if (!dsc_throughput_from_dpcd(dpcd_throughput & DP_DSC_THROUGHPUT_MODE_0_MASK, + &dsc_sink_caps->throughput_mode_0_mps)) + return false; + + dpcd_throughput = (dpcd_throughput & DP_DSC_THROUGHPUT_MODE_1_MASK) >> DP_DSC_THROUGHPUT_MODE_1_SHIFT; + if (!dsc_throughput_from_dpcd(dpcd_throughput, &dsc_sink_caps->throughput_mode_1_mps)) + return false; + } + + dsc_sink_caps->max_slice_width = dpcd_dsc_basic_data[DP_DSC_MAX_SLICE_WIDTH - DP_DSC_SUPPORT] * 320; + dsc_sink_caps->slice_caps2.raw = dpcd_dsc_basic_data[DP_DSC_SLICE_CAP_2 - DP_DSC_SUPPORT]; + + if (!dsc_bpp_increment_div_from_dpcd(dpcd_dsc_basic_data[DP_DSC_BITS_PER_PIXEL_INC - DP_DSC_SUPPORT], + &dsc_sink_caps->bpp_increment_div)) + return false; + + if (dc->debug.dsc_bpp_increment_div) { + /* dsc_bpp_increment_div should onl be 1, 2, 4, 8 or 16, but rather than rejecting invalid values, + * we'll accept all and get it into range. This also makes the above check against 0 redundant, + * but that one stresses out the override will be only used if it's not 0. + */ + if (dc->debug.dsc_bpp_increment_div >= 1) + dsc_sink_caps->bpp_increment_div = 1; + if (dc->debug.dsc_bpp_increment_div >= 2) + dsc_sink_caps->bpp_increment_div = 2; + if (dc->debug.dsc_bpp_increment_div >= 4) + dsc_sink_caps->bpp_increment_div = 4; + if (dc->debug.dsc_bpp_increment_div >= 8) + dsc_sink_caps->bpp_increment_div = 8; + if (dc->debug.dsc_bpp_increment_div >= 16) + dsc_sink_caps->bpp_increment_div = 16; + } + + /* Extended caps */ + if (dpcd_dsc_branch_decoder_caps == NULL) { // branch decoder DPCD DSC data can be null for non branch device + dsc_sink_caps->branch_overall_throughput_0_mps = 0; + dsc_sink_caps->branch_overall_throughput_1_mps = 0; + dsc_sink_caps->branch_max_line_width = 0; + return true; + } + + dsc_sink_caps->branch_overall_throughput_0_mps = + dpcd_dsc_branch_decoder_caps[DP_DSC_BRANCH_OVERALL_THROUGHPUT_0 - DP_DSC_BRANCH_OVERALL_THROUGHPUT_0]; + if (dsc_sink_caps->branch_overall_throughput_0_mps == 0) + dsc_sink_caps->branch_overall_throughput_0_mps = 0; + else if (dsc_sink_caps->branch_overall_throughput_0_mps == 1) + dsc_sink_caps->branch_overall_throughput_0_mps = 680; + else { + dsc_sink_caps->branch_overall_throughput_0_mps *= 50; + dsc_sink_caps->branch_overall_throughput_0_mps += 600; + } + + dsc_sink_caps->branch_overall_throughput_1_mps = + dpcd_dsc_branch_decoder_caps[DP_DSC_BRANCH_OVERALL_THROUGHPUT_1 - DP_DSC_BRANCH_OVERALL_THROUGHPUT_0]; + if (dsc_sink_caps->branch_overall_throughput_1_mps == 0) + dsc_sink_caps->branch_overall_throughput_1_mps = 0; + else if (dsc_sink_caps->branch_overall_throughput_1_mps == 1) + dsc_sink_caps->branch_overall_throughput_1_mps = 680; + else { + dsc_sink_caps->branch_overall_throughput_1_mps *= 50; + dsc_sink_caps->branch_overall_throughput_1_mps += 600; + } + + dsc_sink_caps->branch_max_line_width = + dpcd_dsc_branch_decoder_caps[DP_DSC_BRANCH_MAX_LINE_WIDTH - DP_DSC_BRANCH_OVERALL_THROUGHPUT_0] * 320; + ASSERT(dsc_sink_caps->branch_max_line_width == 0 || dsc_sink_caps->branch_max_line_width >= 5120); + + dsc_sink_caps->is_dp = true; + return true; +} + + +/* If DSC is possbile, get DSC bandwidth range based on [min_bpp, max_bpp] target bitrate range and + * timing's pixel clock and uncompressed bandwidth. + * If DSC is not possible, leave '*range' untouched. + */ +bool dc_dsc_compute_bandwidth_range( + const struct display_stream_compressor *dsc, + uint32_t dsc_min_slice_height_override, + uint32_t min_bpp_x16, + uint32_t max_bpp_x16, + const struct dsc_dec_dpcd_caps *dsc_sink_caps, + const struct dc_crtc_timing *timing, + struct dc_dsc_bw_range *range) +{ + bool is_dsc_possible = false; + struct dsc_enc_caps dsc_enc_caps; + struct dsc_enc_caps dsc_common_caps; + struct dc_dsc_config config; + + get_dsc_enc_caps(dsc, &dsc_enc_caps, timing->pix_clk_100hz); + + is_dsc_possible = intersect_dsc_caps(dsc_sink_caps, &dsc_enc_caps, + timing->pixel_encoding, &dsc_common_caps); + + if (is_dsc_possible) + is_dsc_possible = setup_dsc_config(dsc_sink_caps, &dsc_enc_caps, 0, timing, + dsc_min_slice_height_override, max_bpp_x16, &config); + + if (is_dsc_possible) + get_dsc_bandwidth_range(min_bpp_x16, max_bpp_x16, + config.num_slices_h, &dsc_common_caps, timing, range); + + return is_dsc_possible; +} + static void get_dsc_enc_caps( - const struct display_stream_compressor *dsc, - struct dsc_enc_caps *dsc_enc_caps, - int pixel_clock_100Hz) + const struct display_stream_compressor *dsc, + struct dsc_enc_caps *dsc_enc_caps, + int pixel_clock_100Hz) { // This is a static HW query, so we can use any DSC @@ -187,14 +383,14 @@ static void get_dsc_enc_caps( } } -/* Returns 'false' if no intersection was found for at least one capablity. +/* Returns 'false' if no intersection was found for at least one capability. * It also implicitly validates some sink caps against invalid value of zero. */ static bool intersect_dsc_caps( - const struct dsc_dec_dpcd_caps *dsc_sink_caps, - const struct dsc_enc_caps *dsc_enc_caps, - enum dc_pixel_encoding pixel_encoding, - struct dsc_enc_caps *dsc_common_caps) + const struct dsc_dec_dpcd_caps *dsc_sink_caps, + const struct dsc_enc_caps *dsc_enc_caps, + enum dc_pixel_encoding pixel_encoding, + struct dsc_enc_caps *dsc_common_caps) { int32_t max_slices; int32_t total_sink_throughput; @@ -205,10 +401,14 @@ static bool intersect_dsc_caps( if (!dsc_common_caps->dsc_version) return false; - dsc_common_caps->slice_caps.bits.NUM_SLICES_1 = dsc_sink_caps->slice_caps1.bits.NUM_SLICES_1 && dsc_enc_caps->slice_caps.bits.NUM_SLICES_1; - dsc_common_caps->slice_caps.bits.NUM_SLICES_2 = dsc_sink_caps->slice_caps1.bits.NUM_SLICES_2 && dsc_enc_caps->slice_caps.bits.NUM_SLICES_2; - dsc_common_caps->slice_caps.bits.NUM_SLICES_4 = dsc_sink_caps->slice_caps1.bits.NUM_SLICES_4 && dsc_enc_caps->slice_caps.bits.NUM_SLICES_4; - dsc_common_caps->slice_caps.bits.NUM_SLICES_8 = dsc_sink_caps->slice_caps1.bits.NUM_SLICES_8 && dsc_enc_caps->slice_caps.bits.NUM_SLICES_8; + dsc_common_caps->slice_caps.bits.NUM_SLICES_1 = + dsc_sink_caps->slice_caps1.bits.NUM_SLICES_1 && dsc_enc_caps->slice_caps.bits.NUM_SLICES_1; + dsc_common_caps->slice_caps.bits.NUM_SLICES_2 = + dsc_sink_caps->slice_caps1.bits.NUM_SLICES_2 && dsc_enc_caps->slice_caps.bits.NUM_SLICES_2; + dsc_common_caps->slice_caps.bits.NUM_SLICES_4 = + dsc_sink_caps->slice_caps1.bits.NUM_SLICES_4 && dsc_enc_caps->slice_caps.bits.NUM_SLICES_4; + dsc_common_caps->slice_caps.bits.NUM_SLICES_8 = + dsc_sink_caps->slice_caps1.bits.NUM_SLICES_8 && dsc_enc_caps->slice_caps.bits.NUM_SLICES_8; if (!dsc_common_caps->slice_caps.raw) return false; @@ -216,7 +416,8 @@ static bool intersect_dsc_caps( if (!dsc_common_caps->lb_bit_depth) return false; - dsc_common_caps->is_block_pred_supported = dsc_sink_caps->is_block_pred_supported && dsc_enc_caps->is_block_pred_supported; + dsc_common_caps->is_block_pred_supported = + dsc_sink_caps->is_block_pred_supported && dsc_enc_caps->is_block_pred_supported; dsc_common_caps->color_formats.raw = dsc_sink_caps->color_formats.raw & dsc_enc_caps->color_formats.raw; if (!dsc_common_caps->color_formats.raw) @@ -288,11 +489,11 @@ static struct fixed31_32 compute_dsc_max_bandwidth_overhead( } static uint32_t compute_bpp_x16_from_target_bandwidth( - const uint32_t bandwidth_in_kbps, - const struct dc_crtc_timing *timing, - const uint32_t num_slices_h, - const uint32_t bpp_increment_div, - const bool is_dp) + const uint32_t bandwidth_in_kbps, + const struct dc_crtc_timing *timing, + const uint32_t num_slices_h, + const uint32_t bpp_increment_div, + const bool is_dp) { struct fixed31_32 overhead_in_kbps; struct fixed31_32 effective_bandwidth_in_kbps; @@ -769,146 +970,6 @@ done: return is_dsc_possible; } -bool dc_dsc_parse_dsc_dpcd(const struct dc *dc, const uint8_t *dpcd_dsc_basic_data, const uint8_t *dpcd_dsc_branch_decoder_caps, struct dsc_dec_dpcd_caps *dsc_sink_caps) -{ - if (!dpcd_dsc_basic_data) - return false; - - dsc_sink_caps->is_dsc_supported = (dpcd_dsc_basic_data[DP_DSC_SUPPORT - DP_DSC_SUPPORT] & DP_DSC_DECOMPRESSION_IS_SUPPORTED) != 0; - if (!dsc_sink_caps->is_dsc_supported) - return false; - - dsc_sink_caps->dsc_version = dpcd_dsc_basic_data[DP_DSC_REV - DP_DSC_SUPPORT]; - - { - int buff_block_size; - int buff_size; - - if (!dsc_buff_block_size_from_dpcd(dpcd_dsc_basic_data[DP_DSC_RC_BUF_BLK_SIZE - DP_DSC_SUPPORT], &buff_block_size)) - return false; - - buff_size = dpcd_dsc_basic_data[DP_DSC_RC_BUF_SIZE - DP_DSC_SUPPORT] + 1; - dsc_sink_caps->rc_buffer_size = buff_size * buff_block_size; - } - - dsc_sink_caps->slice_caps1.raw = dpcd_dsc_basic_data[DP_DSC_SLICE_CAP_1 - DP_DSC_SUPPORT]; - if (!dsc_line_buff_depth_from_dpcd(dpcd_dsc_basic_data[DP_DSC_LINE_BUF_BIT_DEPTH - DP_DSC_SUPPORT], &dsc_sink_caps->lb_bit_depth)) - return false; - - dsc_sink_caps->is_block_pred_supported = - (dpcd_dsc_basic_data[DP_DSC_BLK_PREDICTION_SUPPORT - DP_DSC_SUPPORT] & DP_DSC_BLK_PREDICTION_IS_SUPPORTED) != 0; - - dsc_sink_caps->edp_max_bits_per_pixel = - dpcd_dsc_basic_data[DP_DSC_MAX_BITS_PER_PIXEL_LOW - DP_DSC_SUPPORT] | - dpcd_dsc_basic_data[DP_DSC_MAX_BITS_PER_PIXEL_HI - DP_DSC_SUPPORT] << 8; - - dsc_sink_caps->color_formats.raw = dpcd_dsc_basic_data[DP_DSC_DEC_COLOR_FORMAT_CAP - DP_DSC_SUPPORT]; - dsc_sink_caps->color_depth.raw = dpcd_dsc_basic_data[DP_DSC_DEC_COLOR_DEPTH_CAP - DP_DSC_SUPPORT]; - - { - int dpcd_throughput = dpcd_dsc_basic_data[DP_DSC_PEAK_THROUGHPUT - DP_DSC_SUPPORT]; - - if (!dsc_throughput_from_dpcd(dpcd_throughput & DP_DSC_THROUGHPUT_MODE_0_MASK, &dsc_sink_caps->throughput_mode_0_mps)) - return false; - - dpcd_throughput = (dpcd_throughput & DP_DSC_THROUGHPUT_MODE_1_MASK) >> DP_DSC_THROUGHPUT_MODE_1_SHIFT; - if (!dsc_throughput_from_dpcd(dpcd_throughput, &dsc_sink_caps->throughput_mode_1_mps)) - return false; - } - - dsc_sink_caps->max_slice_width = dpcd_dsc_basic_data[DP_DSC_MAX_SLICE_WIDTH - DP_DSC_SUPPORT] * 320; - dsc_sink_caps->slice_caps2.raw = dpcd_dsc_basic_data[DP_DSC_SLICE_CAP_2 - DP_DSC_SUPPORT]; - - if (!dsc_bpp_increment_div_from_dpcd(dpcd_dsc_basic_data[DP_DSC_BITS_PER_PIXEL_INC - DP_DSC_SUPPORT], &dsc_sink_caps->bpp_increment_div)) - return false; - - if (dc->debug.dsc_bpp_increment_div) { - /* dsc_bpp_increment_div should onl be 1, 2, 4, 8 or 16, but rather than rejecting invalid values, - * we'll accept all and get it into range. This also makes the above check against 0 redundant, - * but that one stresses out the override will be only used if it's not 0. - */ - if (dc->debug.dsc_bpp_increment_div >= 1) - dsc_sink_caps->bpp_increment_div = 1; - if (dc->debug.dsc_bpp_increment_div >= 2) - dsc_sink_caps->bpp_increment_div = 2; - if (dc->debug.dsc_bpp_increment_div >= 4) - dsc_sink_caps->bpp_increment_div = 4; - if (dc->debug.dsc_bpp_increment_div >= 8) - dsc_sink_caps->bpp_increment_div = 8; - if (dc->debug.dsc_bpp_increment_div >= 16) - dsc_sink_caps->bpp_increment_div = 16; - } - - /* Extended caps */ - if (dpcd_dsc_branch_decoder_caps == NULL) { // branch decoder DPCD DSC data can be null for non branch device - dsc_sink_caps->branch_overall_throughput_0_mps = 0; - dsc_sink_caps->branch_overall_throughput_1_mps = 0; - dsc_sink_caps->branch_max_line_width = 0; - return true; - } - - dsc_sink_caps->branch_overall_throughput_0_mps = dpcd_dsc_branch_decoder_caps[DP_DSC_BRANCH_OVERALL_THROUGHPUT_0 - DP_DSC_BRANCH_OVERALL_THROUGHPUT_0]; - if (dsc_sink_caps->branch_overall_throughput_0_mps == 0) - dsc_sink_caps->branch_overall_throughput_0_mps = 0; - else if (dsc_sink_caps->branch_overall_throughput_0_mps == 1) - dsc_sink_caps->branch_overall_throughput_0_mps = 680; - else { - dsc_sink_caps->branch_overall_throughput_0_mps *= 50; - dsc_sink_caps->branch_overall_throughput_0_mps += 600; - } - - dsc_sink_caps->branch_overall_throughput_1_mps = dpcd_dsc_branch_decoder_caps[DP_DSC_BRANCH_OVERALL_THROUGHPUT_1 - DP_DSC_BRANCH_OVERALL_THROUGHPUT_0]; - if (dsc_sink_caps->branch_overall_throughput_1_mps == 0) - dsc_sink_caps->branch_overall_throughput_1_mps = 0; - else if (dsc_sink_caps->branch_overall_throughput_1_mps == 1) - dsc_sink_caps->branch_overall_throughput_1_mps = 680; - else { - dsc_sink_caps->branch_overall_throughput_1_mps *= 50; - dsc_sink_caps->branch_overall_throughput_1_mps += 600; - } - - dsc_sink_caps->branch_max_line_width = dpcd_dsc_branch_decoder_caps[DP_DSC_BRANCH_MAX_LINE_WIDTH - DP_DSC_BRANCH_OVERALL_THROUGHPUT_0] * 320; - ASSERT(dsc_sink_caps->branch_max_line_width == 0 || dsc_sink_caps->branch_max_line_width >= 5120); - - dsc_sink_caps->is_dp = true; - return true; -} - - -/* If DSC is possbile, get DSC bandwidth range based on [min_bpp, max_bpp] target bitrate range and - * timing's pixel clock and uncompressed bandwidth. - * If DSC is not possible, leave '*range' untouched. - */ -bool dc_dsc_compute_bandwidth_range( - const struct display_stream_compressor *dsc, - uint32_t dsc_min_slice_height_override, - uint32_t min_bpp_x16, - uint32_t max_bpp_x16, - const struct dsc_dec_dpcd_caps *dsc_sink_caps, - const struct dc_crtc_timing *timing, - struct dc_dsc_bw_range *range) -{ - bool is_dsc_possible = false; - struct dsc_enc_caps dsc_enc_caps; - struct dsc_enc_caps dsc_common_caps; - struct dc_dsc_config config; - - get_dsc_enc_caps(dsc, &dsc_enc_caps, timing->pix_clk_100hz); - - is_dsc_possible = intersect_dsc_caps(dsc_sink_caps, &dsc_enc_caps, - timing->pixel_encoding, &dsc_common_caps); - - if (is_dsc_possible) - is_dsc_possible = setup_dsc_config(dsc_sink_caps, &dsc_enc_caps, 0, timing, - dsc_min_slice_height_override, max_bpp_x16, &config); - - if (is_dsc_possible) - get_dsc_bandwidth_range(min_bpp_x16, max_bpp_x16, - config.num_slices_h, &dsc_common_caps, timing, range); - - return is_dsc_possible; -} - bool dc_dsc_compute_config( const struct display_stream_compressor *dsc, const struct dsc_dec_dpcd_caps *dsc_sink_caps, @@ -923,22 +984,22 @@ bool dc_dsc_compute_config( get_dsc_enc_caps(dsc, &dsc_enc_caps, timing->pix_clk_100hz); is_dsc_possible = setup_dsc_config(dsc_sink_caps, - &dsc_enc_caps, - target_bandwidth_kbps, - timing, dsc_min_slice_height_override, - max_target_bpp_limit_override * 16, dsc_cfg); + &dsc_enc_caps, + target_bandwidth_kbps, + timing, dsc_min_slice_height_override, + max_target_bpp_limit_override * 16, dsc_cfg); return is_dsc_possible; } uint32_t dc_dsc_stream_bandwidth_in_kbps(const struct dc_crtc_timing *timing, - uint32_t bpp_x16, uint32_t num_slices_h, bool is_dp) + uint32_t bpp_x16, uint32_t num_slices_h, bool is_dp) { struct fixed31_32 overhead_in_kbps; struct fixed31_32 bpp; struct fixed31_32 actual_bandwidth_in_kbps; overhead_in_kbps = compute_dsc_max_bandwidth_overhead( - timing, num_slices_h, is_dp); + timing, num_slices_h, is_dp); bpp = dc_fixpt_from_fraction(bpp_x16, 16); actual_bandwidth_in_kbps = dc_fixpt_from_fraction(timing->pix_clk_100hz, 10); actual_bandwidth_in_kbps = dc_fixpt_mul(actual_bandwidth_in_kbps, bpp); @@ -946,7 +1007,9 @@ uint32_t dc_dsc_stream_bandwidth_in_kbps(const struct dc_crtc_timing *timing, return dc_fixpt_ceil(actual_bandwidth_in_kbps); } -void dc_dsc_get_policy_for_timing(const struct dc_crtc_timing *timing, uint32_t max_target_bpp_limit_override_x16, struct dc_dsc_policy *policy) +void dc_dsc_get_policy_for_timing(const struct dc_crtc_timing *timing, + uint32_t max_target_bpp_limit_override_x16, + struct dc_dsc_policy *policy) { uint32_t bpc = 0; diff --git a/drivers/gpu/drm/amd/display/dc/inc/dc_link_dp.h b/drivers/gpu/drm/amd/display/dc/inc/dc_link_dp.h index e2b58ec9912d..01c3a31be191 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/dc_link_dp.h +++ b/drivers/gpu/drm/amd/display/dc/inc/dc_link_dp.h @@ -174,7 +174,6 @@ bool dp_set_dsc_on_rx(struct pipe_ctx *pipe_ctx, bool enable); void dp_decide_training_settings( struct dc_link *link, const struct dc_link_settings *link_setting, - const struct dc_link_training_overrides *overrides, struct link_training_settings *lt_settings); /* Convert PHY repeater count read from DPCD uint8_t. */ diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr_internal.h b/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr_internal.h index 316301fc1e30..a262f3278c21 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr_internal.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr_internal.h @@ -318,6 +318,11 @@ static inline bool should_update_pstate_support(bool safe_to_lower, bool calc_su return false; } +static inline int khz_to_mhz_ceil(int khz) +{ + return (khz + 999) / 1000; +} + int clk_mgr_helper_get_active_display_cnt( struct dc *dc, struct dc_state *context); diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/dchubbub.h b/drivers/gpu/drm/amd/display/dc/inc/hw/dchubbub.h index 0638b337f143..713f5558f5e1 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/dchubbub.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/dchubbub.h @@ -102,6 +102,15 @@ struct hubbub_addr_config { } default_addrs; }; +struct dcn_hubbub_state { + uint32_t vm_fault_addr_msb; + uint32_t vm_fault_addr_lsb; + uint32_t vm_error_status; + uint32_t vm_error_vmid; + uint32_t vm_error_pipe; + uint32_t vm_error_mode; +}; + struct hubbub_funcs { void (*update_dchub)( struct hubbub *hubbub, @@ -149,6 +158,8 @@ struct hubbub_funcs { void (*force_wm_propagate_to_pipes)(struct hubbub *hubbub); + void (*hubbub_read_state)(struct hubbub *hubbub, struct dcn_hubbub_state *hubbub_state); + void (*force_pstate_change_control)(struct hubbub *hubbub, bool force, bool allow); void (*init_watermarks)(struct hubbub *hubbub); diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer_private.h b/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer_private.h index 082549f75978..f7f7e4fff0c2 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer_private.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer_private.h @@ -41,7 +41,6 @@ struct dce_hwseq_wa { bool DEGVIDCN10_254; bool DEGVIDCN21; bool disallow_self_refresh_during_multi_plane_transition; - bool early_riommu_invalidation; }; struct hwseq_wa_state { diff --git a/drivers/gpu/drm/amd/display/dc/inc/link_dpcd.h b/drivers/gpu/drm/amd/display/dc/inc/link_dpcd.h index d4d52ef1b165..3f12b1600d2a 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/link_dpcd.h +++ b/drivers/gpu/drm/amd/display/dc/inc/link_dpcd.h @@ -1,3 +1,28 @@ +/* + * Copyright 2021 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + #ifndef __LINK_DPCD_H__ #define __LINK_DPCD_H__ #include <inc/core_status.h> diff --git a/drivers/gpu/drm/amd/display/dc/irq_types.h b/drivers/gpu/drm/amd/display/dc/irq_types.h index 1139b9eb9f6f..530c2578db40 100644 --- a/drivers/gpu/drm/amd/display/dc/irq_types.h +++ b/drivers/gpu/drm/amd/display/dc/irq_types.h @@ -152,7 +152,7 @@ enum dc_irq_source { DC_IRQ_SOURCE_DC6_VLINE1, DC_IRQ_SOURCE_DMCUB_OUTBOX, DC_IRQ_SOURCE_DMCUB_OUTBOX0, - + DC_IRQ_SOURCE_DMCUB_GENERAL_DATAOUT, DAL_IRQ_SOURCES_NUMBER }; diff --git a/drivers/gpu/drm/amd/display/dc/os_types.h b/drivers/gpu/drm/amd/display/dc/os_types.h index 126c2f3a4dd3..f50cae252de4 100644 --- a/drivers/gpu/drm/amd/display/dc/os_types.h +++ b/drivers/gpu/drm/amd/display/dc/os_types.h @@ -51,38 +51,9 @@ #define dm_error(fmt, ...) DRM_ERROR(fmt, ##__VA_ARGS__) #if defined(CONFIG_DRM_AMD_DC_DCN) -#if defined(CONFIG_X86) -#include <asm/fpu/api.h> -#define DC_FP_START() kernel_fpu_begin() -#define DC_FP_END() kernel_fpu_end() -#elif defined(CONFIG_PPC64) -#include <asm/switch_to.h> -#include <asm/cputable.h> -#define DC_FP_START() { \ - if (cpu_has_feature(CPU_FTR_VSX_COMP)) { \ - preempt_disable(); \ - enable_kernel_vsx(); \ - } else if (cpu_has_feature(CPU_FTR_ALTIVEC_COMP)) { \ - preempt_disable(); \ - enable_kernel_altivec(); \ - } else if (!cpu_has_feature(CPU_FTR_FPU_UNAVAILABLE)) { \ - preempt_disable(); \ - enable_kernel_fp(); \ - } \ -} -#define DC_FP_END() { \ - if (cpu_has_feature(CPU_FTR_VSX_COMP)) { \ - disable_kernel_vsx(); \ - preempt_enable(); \ - } else if (cpu_has_feature(CPU_FTR_ALTIVEC_COMP)) { \ - disable_kernel_altivec(); \ - preempt_enable(); \ - } else if (!cpu_has_feature(CPU_FTR_FPU_UNAVAILABLE)) { \ - disable_kernel_fp(); \ - preempt_enable(); \ - } \ -} -#endif +#include "amdgpu_dm/dc_fpu.h" +#define DC_FP_START() dc_fpu_begin(__func__, __LINE__) +#define DC_FP_END() dc_fpu_end(__func__, __LINE__) #endif /* diff --git a/drivers/gpu/drm/amd/display/dmub/dmub_srv.h b/drivers/gpu/drm/amd/display/dmub/dmub_srv.h index abbf7ae584c9..caf961bb633f 100644 --- a/drivers/gpu/drm/amd/display/dmub/dmub_srv.h +++ b/drivers/gpu/drm/amd/display/dmub/dmub_srv.h @@ -352,6 +352,8 @@ struct dmub_srv_hw_funcs { uint32_t (*get_gpint_response)(struct dmub_srv *dmub); + uint32_t (*get_gpint_dataout)(struct dmub_srv *dmub); + void (*send_inbox0_cmd)(struct dmub_srv *dmub, union dmub_inbox0_data_register data); uint32_t (*get_current_time)(struct dmub_srv *dmub); @@ -677,6 +679,22 @@ enum dmub_status dmub_srv_get_gpint_response(struct dmub_srv *dmub, uint32_t *response); /** + * dmub_srv_get_gpint_dataout() - Queries the GPINT DATAOUT. + * @dmub: the dmub service + * @dataout: the data for the GPINT DATAOUT + * + * Returns the response code for the last GPINT DATAOUT interrupt. + * + * Can be called after software initialization. + * + * Return: + * DMUB_STATUS_OK - success + * DMUB_STATUS_INVALID - unspecified error + */ +enum dmub_status dmub_srv_get_gpint_dataout(struct dmub_srv *dmub, + uint32_t *dataout); + +/** * dmub_flush_buffer_mem() - Read back entire frame buffer region. * This ensures that the write from x86 has been flushed and will not * hang the DMCUB. diff --git a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h index 7fafb8d6c1da..7b684e7f60df 100644 --- a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h +++ b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h @@ -23,8 +23,8 @@ * */ -#ifndef _DMUB_CMD_H_ -#define _DMUB_CMD_H_ +#ifndef DMUB_CMD_H +#define DMUB_CMD_H #if defined(_TEST_HARNESS) || defined(FPGA_USB4) #include "dmub_fw_types.h" @@ -47,10 +47,10 @@ /* Firmware versioning. */ #ifdef DMUB_EXPOSE_VERSION -#define DMUB_FW_VERSION_GIT_HASH 0xf3da2b656 +#define DMUB_FW_VERSION_GIT_HASH 0x7383caadc #define DMUB_FW_VERSION_MAJOR 0 #define DMUB_FW_VERSION_MINOR 0 -#define DMUB_FW_VERSION_REVISION 71 +#define DMUB_FW_VERSION_REVISION 79 #define DMUB_FW_VERSION_TEST 0 #define DMUB_FW_VERSION_VBIOS 0 #define DMUB_FW_VERSION_HOTFIX 0 @@ -322,6 +322,10 @@ union dmub_fw_boot_status { uint32_t mailbox_rdy : 1; /**< 1 if mailbox ready */ uint32_t optimized_init_done : 1; /**< 1 if optimized init done */ uint32_t restore_required : 1; /**< 1 if driver should call restore */ + uint32_t defer_load : 1; /**< 1 if VBIOS data is deferred programmed */ + uint32_t reserved : 1; + uint32_t detection_required: 1; /**< if detection need to be triggered by driver */ + } bits; /**< status bits */ uint32_t all; /**< 32-bit access to status bits */ }; @@ -334,6 +338,8 @@ enum dmub_fw_boot_status_bit { DMUB_FW_BOOT_STATUS_BIT_MAILBOX_READY = (1 << 1), /**< 1 if mailbox ready */ DMUB_FW_BOOT_STATUS_BIT_OPTIMIZED_INIT_DONE = (1 << 2), /**< 1 if init done */ DMUB_FW_BOOT_STATUS_BIT_RESTORE_REQUIRED = (1 << 3), /**< 1 if driver should call restore */ + DMUB_FW_BOOT_STATUS_BIT_DEFERRED_LOADED = (1 << 4), /**< 1 if VBIOS data is deferred programmed */ + DMUB_FW_BOOT_STATUS_BIT_DETECTION_REQUIRED = (1 << 6), /**< 1 if detection need to be triggered by driver*/ }; /* Register bit definition for SCRATCH5 */ @@ -352,7 +358,7 @@ enum dmub_lvtma_status_bit { }; /** - * union dmub_fw_boot_options - Boot option definitions for SCRATCH15 + * union dmub_fw_boot_options - Boot option definitions for SCRATCH14 */ union dmub_fw_boot_options { struct { @@ -363,7 +369,10 @@ union dmub_fw_boot_options { uint32_t disable_clk_gate: 1; /**< 1 if clock gating should be disabled */ uint32_t skip_phy_init_panel_sequence: 1; /**< 1 to skip panel init seq */ uint32_t z10_disable: 1; /**< 1 to disable z10 */ - uint32_t reserved : 25; /**< reserved */ + uint32_t reserved2: 1; /**< reserved for an unreleased feature */ + uint32_t reserved_unreleased1: 1; /**< reserved for an unreleased feature */ + uint32_t invalid_vbios_data: 1; /**< 1 if VBIOS data table is invalid */ + uint32_t reserved : 23; /**< reserved */ } bits; /**< boot bits */ uint32_t all; /**< 32-bit access to bits */ }; @@ -485,6 +494,11 @@ enum dmub_gpint_command { * RETURN: PSR residency in milli-percent. */ DMUB_GPINT__PSR_RESIDENCY = 9, + + /** + * DESC: Notifies DMCUB detection is done so detection required can be cleared. + */ + DMUB_GPINT__NOTIFY_DETECTION_DONE = 12, }; /** @@ -1411,6 +1425,10 @@ struct dmub_cmd_psr_copy_settings_data { * Currently the support is only for 0 or 1 */ uint8_t panel_inst; + /** + * Explicit padding to 4 byte boundary. + */ + uint8_t pad3[4]; }; /** @@ -1435,7 +1453,7 @@ struct dmub_cmd_psr_set_level_data { * 16-bit value dicated by driver that will enable/disable different functionality. */ uint16_t psr_level; - /** + /** * PSR control version. */ uint8_t cmd_version; @@ -2467,16 +2485,14 @@ static inline bool dmub_rb_full(struct dmub_rb *rb) static inline bool dmub_rb_push_front(struct dmub_rb *rb, const union dmub_rb_cmd *cmd) { - uint64_t volatile *dst = (uint64_t volatile *)(rb->base_address) + rb->wrpt / sizeof(uint64_t); - const uint64_t *src = (const uint64_t *)cmd; - uint8_t i; + uint8_t *dst = (uint8_t *)(rb->base_address) + rb->wrpt; + const uint8_t *src = (const uint8_t *)cmd; if (dmub_rb_full(rb)) return false; // copying data - for (i = 0; i < DMUB_RB_CMD_SIZE / sizeof(uint64_t); i++) - *dst++ = *src++; + dmub_memcpy(dst, src, DMUB_RB_CMD_SIZE); rb->wrpt += DMUB_RB_CMD_SIZE; @@ -2498,7 +2514,7 @@ static inline bool dmub_rb_out_push_front(struct dmub_rb *rb, const union dmub_rb_out_cmd *cmd) { uint8_t *dst = (uint8_t *)(rb->base_address) + rb->wrpt; - const uint8_t *src = (uint8_t *)cmd; + const uint8_t *src = (const uint8_t *)cmd; if (dmub_rb_full(rb)) return false; @@ -2583,18 +2599,16 @@ static inline bool dmub_rb_peek_offset(struct dmub_rb *rb, * @return false otherwise */ static inline bool dmub_rb_out_front(struct dmub_rb *rb, - union dmub_rb_out_cmd *cmd) + union dmub_rb_out_cmd *cmd) { - const uint64_t volatile *src = (const uint64_t volatile *)(rb->base_address) + rb->rptr / sizeof(uint64_t); - uint64_t *dst = (uint64_t *)cmd; - uint8_t i; + const uint8_t *src = (const uint8_t *)(rb->base_address) + rb->rptr; + uint8_t *dst = (uint8_t *)cmd; if (dmub_rb_empty(rb)) return false; // copying data - for (i = 0; i < DMUB_RB_CMD_SIZE / sizeof(uint64_t); i++) - *dst++ = *src++; + dmub_memcpy(dst, src, DMUB_RB_CMD_SIZE); return true; } @@ -2629,15 +2643,14 @@ static inline bool dmub_rb_pop_front(struct dmub_rb *rb) */ static inline void dmub_rb_flush_pending(const struct dmub_rb *rb) { + uint8_t buf[DMUB_RB_CMD_SIZE]; uint32_t rptr = rb->rptr; uint32_t wptr = rb->wrpt; while (rptr != wptr) { - uint64_t volatile *data = (uint64_t volatile *)rb->base_address + rptr / sizeof(uint64_t); - uint8_t i; + const uint8_t *data = (const uint8_t *)rb->base_address + rptr; - for (i = 0; i < DMUB_RB_CMD_SIZE / sizeof(uint64_t); i++) - *data++; + dmub_memcpy(buf, data, DMUB_RB_CMD_SIZE); rptr += DMUB_RB_CMD_SIZE; if (rptr >= rb->capacity) diff --git a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn31.c b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn31.c index 27c7fa3110c8..fc667cb17eb0 100644 --- a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn31.c +++ b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn31.c @@ -38,7 +38,10 @@ const struct dmub_srv_dcn31_regs dmub_srv_dcn31_regs = { #define DMUB_SR(reg) REG_OFFSET_EXP(reg), - { DMUB_DCN31_REGS() }, + { + DMUB_DCN31_REGS() + DMCUB_INTERNAL_REGS() + }, #undef DMUB_SR #define DMUB_SF(reg, field) FD_MASK(reg, field), @@ -80,7 +83,7 @@ static inline void dmub_dcn31_translate_addr(const union dmub_addr *addr_in, void dmub_dcn31_reset(struct dmub_srv *dmub) { union dmub_gpint_data_register cmd; - const uint32_t timeout = 30; + const uint32_t timeout = 100; uint32_t in_reset, scratch, i; REG_GET(DMCUB_CNTL2, DMCUB_SOFT_RESET, &in_reset); @@ -95,26 +98,22 @@ void dmub_dcn31_reset(struct dmub_srv *dmub) /** * Timeout covers both the ACK and the wait * for remaining work to finish. - * - * This is mostly bound by the PHY disable sequence. - * Each register check will be greater than 1us, so - * don't bother using udelay. */ for (i = 0; i < timeout; ++i) { if (dmub->hw_funcs.is_gpint_acked(dmub, cmd)) break; + + udelay(1); } for (i = 0; i < timeout; ++i) { scratch = dmub->hw_funcs.get_gpint_response(dmub); if (scratch == DMUB_GPINT__STOP_FW_RESPONSE) break; - } - /* Clear the GPINT command manually so we don't reset again. */ - cmd.all = 0; - dmub->hw_funcs.set_gpint(dmub, cmd); + udelay(1); + } /* Force reset in case we timed out, DMCUB is likely hung. */ } @@ -127,6 +126,10 @@ void dmub_dcn31_reset(struct dmub_srv *dmub) REG_WRITE(DMCUB_OUTBOX1_RPTR, 0); REG_WRITE(DMCUB_OUTBOX1_WPTR, 0); REG_WRITE(DMCUB_SCRATCH0, 0); + + /* Clear the GPINT command manually so we don't send anything during boot. */ + cmd.all = 0; + dmub->hw_funcs.set_gpint(dmub, cmd); } void dmub_dcn31_reset_release(struct dmub_srv *dmub) @@ -307,6 +310,21 @@ uint32_t dmub_dcn31_get_gpint_response(struct dmub_srv *dmub) return REG_READ(DMCUB_SCRATCH7); } +uint32_t dmub_dcn31_get_gpint_dataout(struct dmub_srv *dmub) +{ + uint32_t dataout = REG_READ(DMCUB_GPINT_DATAOUT); + + REG_UPDATE(DMCUB_INTERRUPT_ENABLE, DMCUB_GPINT_IH_INT_EN, 0); + + REG_WRITE(DMCUB_GPINT_DATAOUT, 0); + REG_UPDATE(DMCUB_INTERRUPT_ACK, DMCUB_GPINT_IH_INT_ACK, 1); + REG_UPDATE(DMCUB_INTERRUPT_ACK, DMCUB_GPINT_IH_INT_ACK, 0); + + REG_UPDATE(DMCUB_INTERRUPT_ENABLE, DMCUB_GPINT_IH_INT_EN, 1); + + return dataout; +} + union dmub_fw_boot_status dmub_dcn31_get_fw_boot_status(struct dmub_srv *dmub) { union dmub_fw_boot_status status; diff --git a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn31.h b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn31.h index 9456a6a2d518..bb62605d2ac8 100644 --- a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn31.h +++ b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn31.h @@ -114,7 +114,9 @@ struct dmub_srv; DMUB_SR(DMCUB_TIMER_CURRENT) \ DMUB_SR(DMCUB_INST_FETCH_FAULT_ADDR) \ DMUB_SR(DMCUB_UNDEFINED_ADDRESS_FAULT_ADDR) \ - DMUB_SR(DMCUB_DATA_WRITE_FAULT_ADDR) + DMUB_SR(DMCUB_DATA_WRITE_FAULT_ADDR) \ + DMUB_SR(DMCUB_INTERRUPT_ENABLE) \ + DMUB_SR(DMCUB_INTERRUPT_ACK) #define DMUB_DCN31_FIELDS() \ DMUB_SF(DMCUB_CNTL, DMCUB_ENABLE) \ @@ -147,7 +149,9 @@ struct dmub_srv; DMUB_SF(MMHUBBUB_SOFT_RESET, DMUIF_SOFT_RESET) \ DMUB_SF(DCN_VM_FB_LOCATION_BASE, FB_BASE) \ DMUB_SF(DCN_VM_FB_OFFSET, FB_OFFSET) \ - DMUB_SF(DMCUB_INBOX0_WPTR, DMCUB_INBOX0_WPTR) + DMUB_SF(DMCUB_INBOX0_WPTR, DMCUB_INBOX0_WPTR) \ + DMUB_SF(DMCUB_INTERRUPT_ENABLE, DMCUB_GPINT_IH_INT_EN) \ + DMUB_SF(DMCUB_INTERRUPT_ACK, DMCUB_GPINT_IH_INT_ACK) struct dmub_srv_dcn31_reg_offset { #define DMUB_SR(reg) uint32_t reg; @@ -222,6 +226,8 @@ bool dmub_dcn31_is_gpint_acked(struct dmub_srv *dmub, uint32_t dmub_dcn31_get_gpint_response(struct dmub_srv *dmub); +uint32_t dmub_dcn31_get_gpint_dataout(struct dmub_srv *dmub); + void dmub_dcn31_enable_dmub_boot_options(struct dmub_srv *dmub, const struct dmub_srv_hw_params *params); void dmub_dcn31_skip_dmub_panel_power_sequence(struct dmub_srv *dmub, bool skip); diff --git a/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c b/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c index 2bdbd7406f56..75a91cfaf036 100644 --- a/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c +++ b/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c @@ -224,6 +224,7 @@ static bool dmub_srv_hw_setup(struct dmub_srv *dmub, enum dmub_asic asic) funcs->set_gpint = dmub_dcn31_set_gpint; funcs->is_gpint_acked = dmub_dcn31_is_gpint_acked; funcs->get_gpint_response = dmub_dcn31_get_gpint_response; + funcs->get_gpint_dataout = dmub_dcn31_get_gpint_dataout; funcs->get_fw_status = dmub_dcn31_get_fw_boot_status; funcs->enable_dmub_boot_options = dmub_dcn31_enable_dmub_boot_options; funcs->skip_dmub_panel_power_sequence = dmub_dcn31_skip_dmub_panel_power_sequence; @@ -719,6 +720,22 @@ enum dmub_status dmub_srv_get_gpint_response(struct dmub_srv *dmub, return DMUB_STATUS_OK; } +enum dmub_status dmub_srv_get_gpint_dataout(struct dmub_srv *dmub, + uint32_t *dataout) +{ + *dataout = 0; + + if (!dmub->sw_init) + return DMUB_STATUS_INVALID; + + if (!dmub->hw_funcs.get_gpint_dataout) + return DMUB_STATUS_INVALID; + + *dataout = dmub->hw_funcs.get_gpint_dataout(dmub); + + return DMUB_STATUS_OK; +} + enum dmub_status dmub_srv_get_fw_boot_status(struct dmub_srv *dmub, union dmub_fw_boot_status *status) { diff --git a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp.c b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp.c index b963226e8af4..3e81850a7ffe 100644 --- a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp.c +++ b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp.c @@ -39,8 +39,12 @@ static void push_error_status(struct mod_hdcp *hdcp, if (is_hdcp1(hdcp)) { hdcp->connection.hdcp1_retry_count++; + if (hdcp->connection.hdcp1_retry_count == MAX_NUM_OF_ATTEMPTS) + hdcp->connection.link.adjust.hdcp1.disable = 1; } else if (is_hdcp2(hdcp)) { hdcp->connection.hdcp2_retry_count++; + if (hdcp->connection.hdcp2_retry_count == MAX_NUM_OF_ATTEMPTS) + hdcp->connection.link.adjust.hdcp2.disable = 1; } } @@ -59,8 +63,7 @@ static uint8_t is_cp_desired_hdcp1(struct mod_hdcp *hdcp) } } - return (hdcp->connection.hdcp1_retry_count < MAX_NUM_OF_ATTEMPTS) && - is_auth_needed && + return is_auth_needed && !hdcp->connection.link.adjust.hdcp1.disable && !hdcp->connection.is_hdcp1_revoked; } @@ -80,8 +83,7 @@ static uint8_t is_cp_desired_hdcp2(struct mod_hdcp *hdcp) } } - return (hdcp->connection.hdcp2_retry_count < MAX_NUM_OF_ATTEMPTS) && - is_auth_needed && + return is_auth_needed && !hdcp->connection.link.adjust.hdcp2.disable && !hdcp->connection.is_hdcp2_revoked; } @@ -143,6 +145,7 @@ static enum mod_hdcp_status transition(struct mod_hdcp *hdcp, } else { callback_in_ms(0, output); set_state_id(hdcp, output, HDCP_CP_NOT_DESIRED); + set_auth_complete(hdcp, output); } else if (is_hdmi_dvi_sl_hdcp(hdcp)) if (is_cp_desired_hdcp2(hdcp)) { @@ -154,10 +157,12 @@ static enum mod_hdcp_status transition(struct mod_hdcp *hdcp, } else { callback_in_ms(0, output); set_state_id(hdcp, output, HDCP_CP_NOT_DESIRED); + set_auth_complete(hdcp, output); } else { callback_in_ms(0, output); set_state_id(hdcp, output, HDCP_CP_NOT_DESIRED); + set_auth_complete(hdcp, output); } } else if (is_in_cp_not_desired_state(hdcp)) { increment_stay_counter(hdcp); @@ -313,9 +318,6 @@ enum mod_hdcp_status mod_hdcp_add_display(struct mod_hdcp *hdcp, goto out; } - /* save current encryption states to restore after next authentication */ - mod_hdcp_save_current_encryption_states(hdcp); - /* reset existing authentication status */ status = reset_authentication(hdcp, output); if (status != MOD_HDCP_STATUS_SUCCESS) @@ -362,9 +364,6 @@ enum mod_hdcp_status mod_hdcp_remove_display(struct mod_hdcp *hdcp, goto out; } - /* save current encryption states to restore after next authentication */ - mod_hdcp_save_current_encryption_states(hdcp); - /* stop current authentication */ status = reset_authentication(hdcp, output); if (status != MOD_HDCP_STATUS_SUCCESS) @@ -392,6 +391,60 @@ out: return status; } +enum mod_hdcp_status mod_hdcp_update_authentication(struct mod_hdcp *hdcp, + uint8_t index, + struct mod_hdcp_link_adjustment *link_adjust, + struct mod_hdcp_display_adjustment *display_adjust, + struct mod_hdcp_output *output) +{ + enum mod_hdcp_status status = MOD_HDCP_STATUS_SUCCESS; + struct mod_hdcp_display *display = NULL; + + HDCP_TOP_INTERFACE_TRACE_WITH_INDEX(hdcp, index); + memset(output, 0, sizeof(struct mod_hdcp_output)); + + /* find display in connection */ + display = get_active_display_at_index(hdcp, index); + if (!display) { + status = MOD_HDCP_STATUS_DISPLAY_NOT_FOUND; + goto out; + } + + /* skip if no changes */ + if (memcmp(link_adjust, &hdcp->connection.link.adjust, + sizeof(struct mod_hdcp_link_adjustment)) == 0 && + memcmp(display_adjust, &display->adjust, + sizeof(struct mod_hdcp_display_adjustment)) == 0) { + status = MOD_HDCP_STATUS_SUCCESS; + goto out; + } + + /* stop current authentication */ + status = reset_authentication(hdcp, output); + if (status != MOD_HDCP_STATUS_SUCCESS) + goto out; + + /* clear retry counters */ + reset_retry_counts(hdcp); + + /* reset error trace */ + memset(&hdcp->connection.trace, 0, sizeof(hdcp->connection.trace)); + + /* set new adjustment */ + hdcp->connection.link.adjust = *link_adjust; + display->adjust = *display_adjust; + + /* request authentication when connection is not reset */ + if (current_state(hdcp) != HDCP_UNINITIALIZED) + /* wait 100ms to debounce simultaneous updates for different indices */ + callback_in_ms(100, output); + +out: + if (status != MOD_HDCP_STATUS_SUCCESS) + push_error_status(hdcp, status); + return status; +} + enum mod_hdcp_status mod_hdcp_query_display(struct mod_hdcp *hdcp, uint8_t index, struct mod_hdcp_display_query *query) { @@ -470,7 +523,7 @@ enum mod_hdcp_status mod_hdcp_process_event(struct mod_hdcp *hdcp, /* reset authentication if needed */ if (trans_status == MOD_HDCP_STATUS_RESET_NEEDED) { - HDCP_FULL_DDC_TRACE(hdcp); + mod_hdcp_log_ddc_trace(hdcp); reset_status = reset_authentication(hdcp, output); if (reset_status != MOD_HDCP_STATUS_SUCCESS) push_error_status(hdcp, reset_status); diff --git a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp.h b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp.h index 3ce91db560d1..399fbca8947b 100644 --- a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp.h +++ b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp.h @@ -324,6 +324,7 @@ enum mod_hdcp_status mod_hdcp_hdcp2_dp_transition(struct mod_hdcp *hdcp, /* log functions */ void mod_hdcp_dump_binary_message(uint8_t *msg, uint32_t msg_size, uint8_t *buf, uint32_t buf_size); +void mod_hdcp_log_ddc_trace(struct mod_hdcp *hdcp); /* TODO: add adjustment log */ /* psp functions */ @@ -331,8 +332,6 @@ enum mod_hdcp_status mod_hdcp_add_display_to_topology( struct mod_hdcp *hdcp, struct mod_hdcp_display *display); enum mod_hdcp_status mod_hdcp_remove_display_from_topology( struct mod_hdcp *hdcp, uint8_t index); -bool mod_hdcp_is_link_encryption_enabled(struct mod_hdcp *hdcp); -void mod_hdcp_save_current_encryption_states(struct mod_hdcp *hdcp); enum mod_hdcp_status mod_hdcp_hdcp1_create_session(struct mod_hdcp *hdcp); enum mod_hdcp_status mod_hdcp_hdcp1_destroy_session(struct mod_hdcp *hdcp); enum mod_hdcp_status mod_hdcp_hdcp1_validate_rx(struct mod_hdcp *hdcp); @@ -496,6 +495,13 @@ static inline void set_watchdog_in_ms(struct mod_hdcp *hdcp, uint16_t time, output->watchdog_timer_delay = time; } +static inline void set_auth_complete(struct mod_hdcp *hdcp, + struct mod_hdcp_output *output) +{ + output->auth_complete = 1; + mod_hdcp_log_ddc_trace(hdcp); +} + /* connection topology helpers */ static inline uint8_t is_display_active(struct mod_hdcp_display *display) { diff --git a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp1_execution.c b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp1_execution.c index de872e7958b0..6ec918af3bff 100644 --- a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp1_execution.c +++ b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp1_execution.c @@ -266,9 +266,6 @@ static enum mod_hdcp_status authenticated(struct mod_hdcp *hdcp, mod_hdcp_execute_and_set(mod_hdcp_hdcp1_link_maintenance, &input->link_maintenance, &status, hdcp, "link_maintenance"); - - if (status != MOD_HDCP_STATUS_SUCCESS) - mod_hdcp_save_current_encryption_states(hdcp); out: return status; } @@ -447,9 +444,6 @@ static enum mod_hdcp_status authenticated_dp(struct mod_hdcp *hdcp, mod_hdcp_execute_and_set(check_no_reauthentication_request_dp, &input->reauth_request_check, &status, hdcp, "reauth_request_check"); - - if (status != MOD_HDCP_STATUS_SUCCESS) - mod_hdcp_save_current_encryption_states(hdcp); out: return status; } diff --git a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp1_transition.c b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp1_transition.c index 3dda8c1d83fc..7f011196ce98 100644 --- a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp1_transition.c +++ b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp1_transition.c @@ -89,7 +89,7 @@ enum mod_hdcp_status mod_hdcp_hdcp1_transition(struct mod_hdcp *hdcp, } else { callback_in_ms(0, output); set_state_id(hdcp, output, H1_A45_AUTHENTICATED); - HDCP_FULL_DDC_TRACE(hdcp); + set_auth_complete(hdcp, output); } break; case H1_A45_AUTHENTICATED: @@ -137,7 +137,7 @@ enum mod_hdcp_status mod_hdcp_hdcp1_transition(struct mod_hdcp *hdcp, } callback_in_ms(0, output); set_state_id(hdcp, output, H1_A45_AUTHENTICATED); - HDCP_FULL_DDC_TRACE(hdcp); + set_auth_complete(hdcp, output); break; default: status = MOD_HDCP_STATUS_INVALID_STATE; @@ -239,7 +239,7 @@ enum mod_hdcp_status mod_hdcp_hdcp1_dp_transition(struct mod_hdcp *hdcp, set_state_id(hdcp, output, D1_A6_WAIT_FOR_READY); } else { set_state_id(hdcp, output, D1_A4_AUTHENTICATED); - HDCP_FULL_DDC_TRACE(hdcp); + set_auth_complete(hdcp, output); } break; case D1_A4_AUTHENTICATED: @@ -311,7 +311,7 @@ enum mod_hdcp_status mod_hdcp_hdcp1_dp_transition(struct mod_hdcp *hdcp, break; } set_state_id(hdcp, output, D1_A4_AUTHENTICATED); - HDCP_FULL_DDC_TRACE(hdcp); + set_auth_complete(hdcp, output); break; default: fail_and_restart_in_ms(0, &status, output); diff --git a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp2_execution.c b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp2_execution.c index 117c6b45f718..91c22b96ebde 100644 --- a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp2_execution.c +++ b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp2_execution.c @@ -571,9 +571,6 @@ static enum mod_hdcp_status authenticated(struct mod_hdcp *hdcp, } process_rxstatus(hdcp, event_ctx, input, &status); - - if (status != MOD_HDCP_STATUS_SUCCESS) - mod_hdcp_save_current_encryption_states(hdcp); out: return status; } diff --git a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp2_transition.c b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp2_transition.c index 70cb230d8f56..1f4095b26409 100644 --- a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp2_transition.c +++ b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp2_transition.c @@ -242,7 +242,7 @@ enum mod_hdcp_status mod_hdcp_hdcp2_transition(struct mod_hdcp *hdcp, } callback_in_ms(0, output); set_state_id(hdcp, output, H2_A5_AUTHENTICATED); - HDCP_FULL_DDC_TRACE(hdcp); + set_auth_complete(hdcp, output); break; case H2_A5_AUTHENTICATED: if (input->rxstatus_read == FAIL || @@ -559,7 +559,7 @@ enum mod_hdcp_status mod_hdcp_hdcp2_dp_transition(struct mod_hdcp *hdcp, break; } set_state_id(hdcp, output, D2_A5_AUTHENTICATED); - HDCP_FULL_DDC_TRACE(hdcp); + set_auth_complete(hdcp, output); break; case D2_A5_AUTHENTICATED: if (input->rxstatus_read == FAIL || diff --git a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_log.c b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_log.c index 1a0f7c3dc964..6b3b5f610907 100644 --- a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_log.c +++ b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_log.c @@ -51,6 +51,80 @@ void mod_hdcp_dump_binary_message(uint8_t *msg, uint32_t msg_size, } } +void mod_hdcp_log_ddc_trace(struct mod_hdcp *hdcp) +{ + if (is_hdcp1(hdcp)) { + HDCP_DDC_READ_TRACE(hdcp, "BKSV", hdcp->auth.msg.hdcp1.bksv, + sizeof(hdcp->auth.msg.hdcp1.bksv)); + HDCP_DDC_READ_TRACE(hdcp, "BCAPS", &hdcp->auth.msg.hdcp1.bcaps, + sizeof(hdcp->auth.msg.hdcp1.bcaps)); + HDCP_DDC_READ_TRACE(hdcp, "BSTATUS", + (uint8_t *)&hdcp->auth.msg.hdcp1.bstatus, + sizeof(hdcp->auth.msg.hdcp1.bstatus)); + HDCP_DDC_WRITE_TRACE(hdcp, "AN", hdcp->auth.msg.hdcp1.an, + sizeof(hdcp->auth.msg.hdcp1.an)); + HDCP_DDC_WRITE_TRACE(hdcp, "AKSV", hdcp->auth.msg.hdcp1.aksv, + sizeof(hdcp->auth.msg.hdcp1.aksv)); + HDCP_DDC_WRITE_TRACE(hdcp, "AINFO", &hdcp->auth.msg.hdcp1.ainfo, + sizeof(hdcp->auth.msg.hdcp1.ainfo)); + HDCP_DDC_READ_TRACE(hdcp, "RI' / R0'", + (uint8_t *)&hdcp->auth.msg.hdcp1.r0p, + sizeof(hdcp->auth.msg.hdcp1.r0p)); + HDCP_DDC_READ_TRACE(hdcp, "BINFO", + (uint8_t *)&hdcp->auth.msg.hdcp1.binfo_dp, + sizeof(hdcp->auth.msg.hdcp1.binfo_dp)); + HDCP_DDC_READ_TRACE(hdcp, "KSVLIST", hdcp->auth.msg.hdcp1.ksvlist, + hdcp->auth.msg.hdcp1.ksvlist_size); + HDCP_DDC_READ_TRACE(hdcp, "V'", hdcp->auth.msg.hdcp1.vp, + sizeof(hdcp->auth.msg.hdcp1.vp)); + } else if (is_hdcp2(hdcp)) { + HDCP_DDC_READ_TRACE(hdcp, "HDCP2Version", + &hdcp->auth.msg.hdcp2.hdcp2version_hdmi, + sizeof(hdcp->auth.msg.hdcp2.hdcp2version_hdmi)); + HDCP_DDC_READ_TRACE(hdcp, "Rx Caps", hdcp->auth.msg.hdcp2.rxcaps_dp, + sizeof(hdcp->auth.msg.hdcp2.rxcaps_dp)); + HDCP_DDC_WRITE_TRACE(hdcp, "AKE Init", hdcp->auth.msg.hdcp2.ake_init, + sizeof(hdcp->auth.msg.hdcp2.ake_init)); + HDCP_DDC_READ_TRACE(hdcp, "AKE Cert", hdcp->auth.msg.hdcp2.ake_cert, + sizeof(hdcp->auth.msg.hdcp2.ake_cert)); + HDCP_DDC_WRITE_TRACE(hdcp, "Stored KM", + hdcp->auth.msg.hdcp2.ake_stored_km, + sizeof(hdcp->auth.msg.hdcp2.ake_stored_km)); + HDCP_DDC_WRITE_TRACE(hdcp, "No Stored KM", + hdcp->auth.msg.hdcp2.ake_no_stored_km, + sizeof(hdcp->auth.msg.hdcp2.ake_no_stored_km)); + HDCP_DDC_READ_TRACE(hdcp, "H'", hdcp->auth.msg.hdcp2.ake_h_prime, + sizeof(hdcp->auth.msg.hdcp2.ake_h_prime)); + HDCP_DDC_READ_TRACE(hdcp, "Pairing Info", + hdcp->auth.msg.hdcp2.ake_pairing_info, + sizeof(hdcp->auth.msg.hdcp2.ake_pairing_info)); + HDCP_DDC_WRITE_TRACE(hdcp, "LC Init", hdcp->auth.msg.hdcp2.lc_init, + sizeof(hdcp->auth.msg.hdcp2.lc_init)); + HDCP_DDC_READ_TRACE(hdcp, "L'", hdcp->auth.msg.hdcp2.lc_l_prime, + sizeof(hdcp->auth.msg.hdcp2.lc_l_prime)); + HDCP_DDC_WRITE_TRACE(hdcp, "Exchange KS", hdcp->auth.msg.hdcp2.ske_eks, + sizeof(hdcp->auth.msg.hdcp2.ske_eks)); + HDCP_DDC_READ_TRACE(hdcp, "Rx Status", + (uint8_t *)&hdcp->auth.msg.hdcp2.rxstatus, + sizeof(hdcp->auth.msg.hdcp2.rxstatus)); + HDCP_DDC_READ_TRACE(hdcp, "Rx Id List", + hdcp->auth.msg.hdcp2.rx_id_list, + hdcp->auth.msg.hdcp2.rx_id_list_size); + HDCP_DDC_WRITE_TRACE(hdcp, "Rx Id List Ack", + hdcp->auth.msg.hdcp2.repeater_auth_ack, + sizeof(hdcp->auth.msg.hdcp2.repeater_auth_ack)); + HDCP_DDC_WRITE_TRACE(hdcp, "Content Stream Management", + hdcp->auth.msg.hdcp2.repeater_auth_stream_manage, + hdcp->auth.msg.hdcp2.stream_manage_size); + HDCP_DDC_READ_TRACE(hdcp, "Stream Ready", + hdcp->auth.msg.hdcp2.repeater_auth_stream_ready, + sizeof(hdcp->auth.msg.hdcp2.repeater_auth_stream_ready)); + HDCP_DDC_WRITE_TRACE(hdcp, "Content Stream Type", + hdcp->auth.msg.hdcp2.content_stream_type_dp, + sizeof(hdcp->auth.msg.hdcp2.content_stream_type_dp)); + } +} + char *mod_hdcp_status_to_str(int32_t status) { switch (status) { diff --git a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_log.h b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_log.h index 47f8ee2832ff..eb6f9b9c504a 100644 --- a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_log.h +++ b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_log.h @@ -106,78 +106,6 @@ hdcp->config.index, msg_name,\ hdcp->buf); \ } while (0) -#define HDCP_FULL_DDC_TRACE(hdcp) do { \ - if (is_hdcp1(hdcp)) { \ - HDCP_DDC_READ_TRACE(hdcp, "BKSV", hdcp->auth.msg.hdcp1.bksv, \ - sizeof(hdcp->auth.msg.hdcp1.bksv)); \ - HDCP_DDC_READ_TRACE(hdcp, "BCAPS", &hdcp->auth.msg.hdcp1.bcaps, \ - sizeof(hdcp->auth.msg.hdcp1.bcaps)); \ - HDCP_DDC_READ_TRACE(hdcp, "BSTATUS", \ - (uint8_t *)&hdcp->auth.msg.hdcp1.bstatus, \ - sizeof(hdcp->auth.msg.hdcp1.bstatus)); \ - HDCP_DDC_WRITE_TRACE(hdcp, "AN", hdcp->auth.msg.hdcp1.an, \ - sizeof(hdcp->auth.msg.hdcp1.an)); \ - HDCP_DDC_WRITE_TRACE(hdcp, "AKSV", hdcp->auth.msg.hdcp1.aksv, \ - sizeof(hdcp->auth.msg.hdcp1.aksv)); \ - HDCP_DDC_WRITE_TRACE(hdcp, "AINFO", &hdcp->auth.msg.hdcp1.ainfo, \ - sizeof(hdcp->auth.msg.hdcp1.ainfo)); \ - HDCP_DDC_READ_TRACE(hdcp, "RI' / R0'", \ - (uint8_t *)&hdcp->auth.msg.hdcp1.r0p, \ - sizeof(hdcp->auth.msg.hdcp1.r0p)); \ - HDCP_DDC_READ_TRACE(hdcp, "BINFO", \ - (uint8_t *)&hdcp->auth.msg.hdcp1.binfo_dp, \ - sizeof(hdcp->auth.msg.hdcp1.binfo_dp)); \ - HDCP_DDC_READ_TRACE(hdcp, "KSVLIST", hdcp->auth.msg.hdcp1.ksvlist, \ - hdcp->auth.msg.hdcp1.ksvlist_size); \ - HDCP_DDC_READ_TRACE(hdcp, "V'", hdcp->auth.msg.hdcp1.vp, \ - sizeof(hdcp->auth.msg.hdcp1.vp)); \ - } else { \ - HDCP_DDC_READ_TRACE(hdcp, "HDCP2Version", \ - &hdcp->auth.msg.hdcp2.hdcp2version_hdmi, \ - sizeof(hdcp->auth.msg.hdcp2.hdcp2version_hdmi)); \ - HDCP_DDC_READ_TRACE(hdcp, "Rx Caps", hdcp->auth.msg.hdcp2.rxcaps_dp, \ - sizeof(hdcp->auth.msg.hdcp2.rxcaps_dp)); \ - HDCP_DDC_WRITE_TRACE(hdcp, "AKE Init", hdcp->auth.msg.hdcp2.ake_init, \ - sizeof(hdcp->auth.msg.hdcp2.ake_init)); \ - HDCP_DDC_READ_TRACE(hdcp, "AKE Cert", hdcp->auth.msg.hdcp2.ake_cert, \ - sizeof(hdcp->auth.msg.hdcp2.ake_cert)); \ - HDCP_DDC_WRITE_TRACE(hdcp, "Stored KM", \ - hdcp->auth.msg.hdcp2.ake_stored_km, \ - sizeof(hdcp->auth.msg.hdcp2.ake_stored_km)); \ - HDCP_DDC_WRITE_TRACE(hdcp, "No Stored KM", \ - hdcp->auth.msg.hdcp2.ake_no_stored_km, \ - sizeof(hdcp->auth.msg.hdcp2.ake_no_stored_km)); \ - HDCP_DDC_READ_TRACE(hdcp, "H'", hdcp->auth.msg.hdcp2.ake_h_prime, \ - sizeof(hdcp->auth.msg.hdcp2.ake_h_prime)); \ - HDCP_DDC_READ_TRACE(hdcp, "Pairing Info", \ - hdcp->auth.msg.hdcp2.ake_pairing_info, \ - sizeof(hdcp->auth.msg.hdcp2.ake_pairing_info)); \ - HDCP_DDC_WRITE_TRACE(hdcp, "LC Init", hdcp->auth.msg.hdcp2.lc_init, \ - sizeof(hdcp->auth.msg.hdcp2.lc_init)); \ - HDCP_DDC_READ_TRACE(hdcp, "L'", hdcp->auth.msg.hdcp2.lc_l_prime, \ - sizeof(hdcp->auth.msg.hdcp2.lc_l_prime)); \ - HDCP_DDC_WRITE_TRACE(hdcp, "Exchange KS", hdcp->auth.msg.hdcp2.ske_eks, \ - sizeof(hdcp->auth.msg.hdcp2.ske_eks)); \ - HDCP_DDC_READ_TRACE(hdcp, "Rx Status", \ - (uint8_t *)&hdcp->auth.msg.hdcp2.rxstatus, \ - sizeof(hdcp->auth.msg.hdcp2.rxstatus)); \ - HDCP_DDC_READ_TRACE(hdcp, "Rx Id List", \ - hdcp->auth.msg.hdcp2.rx_id_list, \ - hdcp->auth.msg.hdcp2.rx_id_list_size); \ - HDCP_DDC_WRITE_TRACE(hdcp, "Rx Id List Ack", \ - hdcp->auth.msg.hdcp2.repeater_auth_ack, \ - sizeof(hdcp->auth.msg.hdcp2.repeater_auth_ack)); \ - HDCP_DDC_WRITE_TRACE(hdcp, "Content Stream Management", \ - hdcp->auth.msg.hdcp2.repeater_auth_stream_manage, \ - hdcp->auth.msg.hdcp2.stream_manage_size); \ - HDCP_DDC_READ_TRACE(hdcp, "Stream Ready", \ - hdcp->auth.msg.hdcp2.repeater_auth_stream_ready, \ - sizeof(hdcp->auth.msg.hdcp2.repeater_auth_stream_ready)); \ - HDCP_DDC_WRITE_TRACE(hdcp, "Content Stream Type", \ - hdcp->auth.msg.hdcp2.content_stream_type_dp, \ - sizeof(hdcp->auth.msg.hdcp2.content_stream_type_dp)); \ - } \ -} while (0) #define HDCP_TOP_ADD_DISPLAY_TRACE(hdcp, i) \ HDCP_LOG_TOP(hdcp, "[Link %d]\tadd display %d", \ hdcp->config.index, i) diff --git a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_psp.c b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_psp.c index 1b02056bc8bd..e9bd84ec027d 100644 --- a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_psp.c +++ b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_psp.c @@ -45,7 +45,7 @@ static void hdcp2_message_init(struct mod_hdcp *hdcp, in->process.msg3_desc.msg_size = 0; } -static enum mod_hdcp_status mod_hdcp_remove_display_from_topology_v2( +static enum mod_hdcp_status remove_display_from_topology_v2( struct mod_hdcp *hdcp, uint8_t index) { struct psp_context *psp = hdcp->config.psp.handle; @@ -54,7 +54,7 @@ static enum mod_hdcp_status mod_hdcp_remove_display_from_topology_v2( get_active_display_at_index(hdcp, index); enum mod_hdcp_status status = MOD_HDCP_STATUS_SUCCESS; - dtm_cmd = (struct ta_dtm_shared_memory *)psp->dtm_context.dtm_shared_buf; + dtm_cmd = (struct ta_dtm_shared_memory *)psp->dtm_context.context.mem_context.shared_buf; if (!display || !is_display_active(display)) return MOD_HDCP_STATUS_DISPLAY_NOT_FOUND; @@ -81,7 +81,7 @@ static enum mod_hdcp_status mod_hdcp_remove_display_from_topology_v2( return status; } -static enum mod_hdcp_status mod_hdcp_remove_display_from_topology_v3( +static enum mod_hdcp_status remove_display_from_topology_v3( struct mod_hdcp *hdcp, uint8_t index) { struct psp_context *psp = hdcp->config.psp.handle; @@ -90,7 +90,7 @@ static enum mod_hdcp_status mod_hdcp_remove_display_from_topology_v3( get_active_display_at_index(hdcp, index); enum mod_hdcp_status status = MOD_HDCP_STATUS_SUCCESS; - dtm_cmd = (struct ta_dtm_shared_memory *)psp->dtm_context.dtm_shared_buf; + dtm_cmd = (struct ta_dtm_shared_memory *)psp->dtm_context.context.mem_context.shared_buf; if (!display || !is_display_active(display)) return MOD_HDCP_STATUS_DISPLAY_NOT_FOUND; @@ -107,7 +107,7 @@ static enum mod_hdcp_status mod_hdcp_remove_display_from_topology_v3( psp_dtm_invoke(psp, dtm_cmd->cmd_id); if (dtm_cmd->dtm_status != TA_DTM_STATUS__SUCCESS) { - status = mod_hdcp_remove_display_from_topology_v2(hdcp, index); + status = remove_display_from_topology_v2(hdcp, index); if (status != MOD_HDCP_STATUS_SUCCESS) display->state = MOD_HDCP_DISPLAY_INACTIVE; } else { @@ -120,20 +120,7 @@ static enum mod_hdcp_status mod_hdcp_remove_display_from_topology_v3( return status; } -enum mod_hdcp_status mod_hdcp_remove_display_from_topology( - struct mod_hdcp *hdcp, uint8_t index) -{ - enum mod_hdcp_status status = MOD_HDCP_STATUS_UPDATE_TOPOLOGY_FAILURE; - - if (hdcp->config.psp.caps.dtm_v3_supported) - status = mod_hdcp_remove_display_from_topology_v3(hdcp, index); - else - status = mod_hdcp_remove_display_from_topology_v2(hdcp, index); - - return status; -} - -static enum mod_hdcp_status mod_hdcp_add_display_to_topology_v2( +static enum mod_hdcp_status add_display_to_topology_v2( struct mod_hdcp *hdcp, struct mod_hdcp_display *display) { struct psp_context *psp = hdcp->config.psp.handle; @@ -141,13 +128,13 @@ static enum mod_hdcp_status mod_hdcp_add_display_to_topology_v2( struct mod_hdcp_link *link = &hdcp->connection.link; enum mod_hdcp_status status = MOD_HDCP_STATUS_SUCCESS; - if (!psp->dtm_context.dtm_initialized) { + if (!psp->dtm_context.context.initialized) { DRM_INFO("Failed to add display topology, DTM TA is not initialized."); display->state = MOD_HDCP_DISPLAY_INACTIVE; return MOD_HDCP_STATUS_FAILURE; } - dtm_cmd = (struct ta_dtm_shared_memory *)psp->dtm_context.dtm_shared_buf; + dtm_cmd = (struct ta_dtm_shared_memory *)psp->dtm_context.context.mem_context.shared_buf; mutex_lock(&psp->dtm_context.mutex); memset(dtm_cmd, 0, sizeof(struct ta_dtm_shared_memory)); @@ -180,7 +167,7 @@ static enum mod_hdcp_status mod_hdcp_add_display_to_topology_v2( return status; } -static enum mod_hdcp_status mod_hdcp_add_display_to_topology_v3( +static enum mod_hdcp_status add_display_to_topology_v3( struct mod_hdcp *hdcp, struct mod_hdcp_display *display) { struct psp_context *psp = hdcp->config.psp.handle; @@ -188,13 +175,13 @@ static enum mod_hdcp_status mod_hdcp_add_display_to_topology_v3( struct mod_hdcp_link *link = &hdcp->connection.link; enum mod_hdcp_status status = MOD_HDCP_STATUS_SUCCESS; - if (!psp->dtm_context.dtm_initialized) { + if (!psp->dtm_context.context.initialized) { DRM_INFO("Failed to add display topology, DTM TA is not initialized."); display->state = MOD_HDCP_DISPLAY_INACTIVE; return MOD_HDCP_STATUS_FAILURE; } - dtm_cmd = (struct ta_dtm_shared_memory *)psp->dtm_context.dtm_shared_buf; + dtm_cmd = (struct ta_dtm_shared_memory *)psp->dtm_context.context.mem_context.shared_buf; mutex_lock(&psp->dtm_context.mutex); memset(dtm_cmd, 0, sizeof(struct ta_dtm_shared_memory)); @@ -220,7 +207,7 @@ static enum mod_hdcp_status mod_hdcp_add_display_to_topology_v3( psp_dtm_invoke(psp, dtm_cmd->cmd_id); if (dtm_cmd->dtm_status != TA_DTM_STATUS__SUCCESS) { - status = mod_hdcp_add_display_to_topology_v2(hdcp, display); + status = add_display_to_topology_v2(hdcp, display); if (status != MOD_HDCP_STATUS_SUCCESS) display->state = MOD_HDCP_DISPLAY_INACTIVE; } else { @@ -232,15 +219,28 @@ static enum mod_hdcp_status mod_hdcp_add_display_to_topology_v3( return status; } +enum mod_hdcp_status mod_hdcp_remove_display_from_topology( + struct mod_hdcp *hdcp, uint8_t index) +{ + enum mod_hdcp_status status = MOD_HDCP_STATUS_UPDATE_TOPOLOGY_FAILURE; + + if (hdcp->config.psp.caps.dtm_v3_supported) + status = remove_display_from_topology_v3(hdcp, index); + else + status = remove_display_from_topology_v2(hdcp, index); + + return status; +} + enum mod_hdcp_status mod_hdcp_add_display_to_topology(struct mod_hdcp *hdcp, struct mod_hdcp_display *display) { enum mod_hdcp_status status = MOD_HDCP_STATUS_SUCCESS; if (hdcp->config.psp.caps.dtm_v3_supported) - status = mod_hdcp_add_display_to_topology_v3(hdcp, display); + status = add_display_to_topology_v3(hdcp, display); else - status = mod_hdcp_add_display_to_topology_v2(hdcp, display); + status = add_display_to_topology_v2(hdcp, display); return status; } @@ -253,12 +253,12 @@ enum mod_hdcp_status mod_hdcp_hdcp1_create_session(struct mod_hdcp *hdcp) struct ta_hdcp_shared_memory *hdcp_cmd; enum mod_hdcp_status status = MOD_HDCP_STATUS_SUCCESS; - if (!psp->hdcp_context.hdcp_initialized) { + if (!psp->hdcp_context.context.initialized) { DRM_ERROR("Failed to create hdcp session. HDCP TA is not initialized."); return MOD_HDCP_STATUS_FAILURE; } - hdcp_cmd = (struct ta_hdcp_shared_memory *)psp->hdcp_context.hdcp_shared_buf; + hdcp_cmd = (struct ta_hdcp_shared_memory *)psp->hdcp_context.context.mem_context.shared_buf; mutex_lock(&psp->hdcp_context.mutex); memset(hdcp_cmd, 0, sizeof(struct ta_hdcp_shared_memory)); @@ -293,7 +293,7 @@ enum mod_hdcp_status mod_hdcp_hdcp1_destroy_session(struct mod_hdcp *hdcp) enum mod_hdcp_status status = MOD_HDCP_STATUS_SUCCESS; mutex_lock(&psp->hdcp_context.mutex); - hdcp_cmd = (struct ta_hdcp_shared_memory *)psp->hdcp_context.hdcp_shared_buf; + hdcp_cmd = (struct ta_hdcp_shared_memory *)psp->hdcp_context.context.mem_context.shared_buf; memset(hdcp_cmd, 0, sizeof(struct ta_hdcp_shared_memory)); hdcp_cmd->in_msg.hdcp1_destroy_session.session_handle = hdcp->auth.id; @@ -325,7 +325,7 @@ enum mod_hdcp_status mod_hdcp_hdcp1_validate_rx(struct mod_hdcp *hdcp) enum mod_hdcp_status status = MOD_HDCP_STATUS_SUCCESS; mutex_lock(&psp->hdcp_context.mutex); - hdcp_cmd = (struct ta_hdcp_shared_memory *)psp->hdcp_context.hdcp_shared_buf; + hdcp_cmd = (struct ta_hdcp_shared_memory *)psp->hdcp_context.context.mem_context.shared_buf; memset(hdcp_cmd, 0, sizeof(struct ta_hdcp_shared_memory)); hdcp_cmd->in_msg.hdcp1_first_part_authentication.session_handle = hdcp->auth.id; @@ -367,7 +367,7 @@ enum mod_hdcp_status mod_hdcp_hdcp1_enable_encryption(struct mod_hdcp *hdcp) enum mod_hdcp_status status = MOD_HDCP_STATUS_SUCCESS; mutex_lock(&psp->hdcp_context.mutex); - hdcp_cmd = (struct ta_hdcp_shared_memory *)psp->hdcp_context.hdcp_shared_buf; + hdcp_cmd = (struct ta_hdcp_shared_memory *)psp->hdcp_context.context.mem_context.shared_buf; memset(hdcp_cmd, 0, sizeof(struct ta_hdcp_shared_memory)); hdcp_cmd->in_msg.hdcp1_enable_encryption.session_handle = hdcp->auth.id; @@ -393,7 +393,7 @@ enum mod_hdcp_status mod_hdcp_hdcp1_validate_ksvlist_vp(struct mod_hdcp *hdcp) enum mod_hdcp_status status = MOD_HDCP_STATUS_SUCCESS; mutex_lock(&psp->hdcp_context.mutex); - hdcp_cmd = (struct ta_hdcp_shared_memory *)psp->hdcp_context.hdcp_shared_buf; + hdcp_cmd = (struct ta_hdcp_shared_memory *)psp->hdcp_context.context.mem_context.shared_buf; memset(hdcp_cmd, 0, sizeof(struct ta_hdcp_shared_memory)); hdcp_cmd->in_msg.hdcp1_second_part_authentication.session_handle = hdcp->auth.id; @@ -436,7 +436,7 @@ enum mod_hdcp_status mod_hdcp_hdcp1_enable_dp_stream_encryption(struct mod_hdcp enum mod_hdcp_status status = MOD_HDCP_STATUS_SUCCESS; mutex_lock(&psp->hdcp_context.mutex); - hdcp_cmd = (struct ta_hdcp_shared_memory *)psp->hdcp_context.hdcp_shared_buf; + hdcp_cmd = (struct ta_hdcp_shared_memory *)psp->hdcp_context.context.mem_context.shared_buf; for (i = 0; i < MAX_NUM_OF_DISPLAYS; i++) { @@ -471,7 +471,7 @@ enum mod_hdcp_status mod_hdcp_hdcp1_link_maintenance(struct mod_hdcp *hdcp) enum mod_hdcp_status status = MOD_HDCP_STATUS_SUCCESS; mutex_lock(&psp->hdcp_context.mutex); - hdcp_cmd = (struct ta_hdcp_shared_memory *)psp->hdcp_context.hdcp_shared_buf; + hdcp_cmd = (struct ta_hdcp_shared_memory *)psp->hdcp_context.context.mem_context.shared_buf; memset(hdcp_cmd, 0, sizeof(struct ta_hdcp_shared_memory)); @@ -498,7 +498,7 @@ enum mod_hdcp_status mod_hdcp_hdcp2_create_session(struct mod_hdcp *hdcp) enum mod_hdcp_status status = MOD_HDCP_STATUS_SUCCESS; - if (!psp->hdcp_context.hdcp_initialized) { + if (!psp->hdcp_context.context.initialized) { DRM_ERROR("Failed to create hdcp session, HDCP TA is not initialized"); return MOD_HDCP_STATUS_FAILURE; } @@ -508,7 +508,7 @@ enum mod_hdcp_status mod_hdcp_hdcp2_create_session(struct mod_hdcp *hdcp) mutex_lock(&psp->hdcp_context.mutex); - hdcp_cmd = (struct ta_hdcp_shared_memory *)psp->hdcp_context.hdcp_shared_buf; + hdcp_cmd = (struct ta_hdcp_shared_memory *)psp->hdcp_context.context.mem_context.shared_buf; memset(hdcp_cmd, 0, sizeof(struct ta_hdcp_shared_memory)); hdcp_cmd->in_msg.hdcp2_create_session_v2.display_handle = display->index; @@ -545,7 +545,7 @@ enum mod_hdcp_status mod_hdcp_hdcp2_destroy_session(struct mod_hdcp *hdcp) enum mod_hdcp_status status = MOD_HDCP_STATUS_SUCCESS; mutex_lock(&psp->hdcp_context.mutex); - hdcp_cmd = (struct ta_hdcp_shared_memory *)psp->hdcp_context.hdcp_shared_buf; + hdcp_cmd = (struct ta_hdcp_shared_memory *)psp->hdcp_context.context.mem_context.shared_buf; memset(hdcp_cmd, 0, sizeof(struct ta_hdcp_shared_memory)); hdcp_cmd->in_msg.hdcp2_destroy_session.session_handle = hdcp->auth.id; @@ -579,7 +579,7 @@ enum mod_hdcp_status mod_hdcp_hdcp2_prepare_ake_init(struct mod_hdcp *hdcp) enum mod_hdcp_status status = MOD_HDCP_STATUS_SUCCESS; mutex_lock(&psp->hdcp_context.mutex); - hdcp_cmd = (struct ta_hdcp_shared_memory *)psp->hdcp_context.hdcp_shared_buf; + hdcp_cmd = (struct ta_hdcp_shared_memory *)psp->hdcp_context.context.mem_context.shared_buf; memset(hdcp_cmd, 0, sizeof(struct ta_hdcp_shared_memory)); msg_in = &hdcp_cmd->in_msg.hdcp2_prepare_process_authentication_message_v2; @@ -611,7 +611,7 @@ enum mod_hdcp_status mod_hdcp_hdcp2_validate_ake_cert(struct mod_hdcp *hdcp) enum mod_hdcp_status status = MOD_HDCP_STATUS_SUCCESS; mutex_lock(&psp->hdcp_context.mutex); - hdcp_cmd = (struct ta_hdcp_shared_memory *)psp->hdcp_context.hdcp_shared_buf; + hdcp_cmd = (struct ta_hdcp_shared_memory *)psp->hdcp_context.context.mem_context.shared_buf; memset(hdcp_cmd, 0, sizeof(struct ta_hdcp_shared_memory)); msg_in = &hdcp_cmd->in_msg.hdcp2_prepare_process_authentication_message_v2; @@ -671,7 +671,7 @@ enum mod_hdcp_status mod_hdcp_hdcp2_validate_h_prime(struct mod_hdcp *hdcp) enum mod_hdcp_status status = MOD_HDCP_STATUS_SUCCESS; mutex_lock(&psp->hdcp_context.mutex); - hdcp_cmd = (struct ta_hdcp_shared_memory *)psp->hdcp_context.hdcp_shared_buf; + hdcp_cmd = (struct ta_hdcp_shared_memory *)psp->hdcp_context.context.mem_context.shared_buf; memset(hdcp_cmd, 0, sizeof(struct ta_hdcp_shared_memory)); msg_in = &hdcp_cmd->in_msg.hdcp2_prepare_process_authentication_message_v2; @@ -717,7 +717,7 @@ enum mod_hdcp_status mod_hdcp_hdcp2_prepare_lc_init(struct mod_hdcp *hdcp) enum mod_hdcp_status status = MOD_HDCP_STATUS_SUCCESS; mutex_lock(&psp->hdcp_context.mutex); - hdcp_cmd = (struct ta_hdcp_shared_memory *)psp->hdcp_context.hdcp_shared_buf; + hdcp_cmd = (struct ta_hdcp_shared_memory *)psp->hdcp_context.context.mem_context.shared_buf; memset(hdcp_cmd, 0, sizeof(struct ta_hdcp_shared_memory)); msg_in = &hdcp_cmd->in_msg.hdcp2_prepare_process_authentication_message_v2; @@ -750,7 +750,7 @@ enum mod_hdcp_status mod_hdcp_hdcp2_validate_l_prime(struct mod_hdcp *hdcp) enum mod_hdcp_status status = MOD_HDCP_STATUS_SUCCESS; mutex_lock(&psp->hdcp_context.mutex); - hdcp_cmd = (struct ta_hdcp_shared_memory *)psp->hdcp_context.hdcp_shared_buf; + hdcp_cmd = (struct ta_hdcp_shared_memory *)psp->hdcp_context.context.mem_context.shared_buf; memset(hdcp_cmd, 0, sizeof(struct ta_hdcp_shared_memory)); msg_in = &hdcp_cmd->in_msg.hdcp2_prepare_process_authentication_message_v2; @@ -785,7 +785,7 @@ enum mod_hdcp_status mod_hdcp_hdcp2_prepare_eks(struct mod_hdcp *hdcp) enum mod_hdcp_status status = MOD_HDCP_STATUS_SUCCESS; mutex_lock(&psp->hdcp_context.mutex); - hdcp_cmd = (struct ta_hdcp_shared_memory *)psp->hdcp_context.hdcp_shared_buf; + hdcp_cmd = (struct ta_hdcp_shared_memory *)psp->hdcp_context.context.mem_context.shared_buf; memset(hdcp_cmd, 0, sizeof(struct ta_hdcp_shared_memory)); msg_in = &hdcp_cmd->in_msg.hdcp2_prepare_process_authentication_message_v2; @@ -833,7 +833,7 @@ enum mod_hdcp_status mod_hdcp_hdcp2_enable_encryption(struct mod_hdcp *hdcp) mutex_lock(&psp->hdcp_context.mutex); - hdcp_cmd = (struct ta_hdcp_shared_memory *)psp->hdcp_context.hdcp_shared_buf; + hdcp_cmd = (struct ta_hdcp_shared_memory *)psp->hdcp_context.context.mem_context.shared_buf; memset(hdcp_cmd, 0, sizeof(struct ta_hdcp_shared_memory)); hdcp_cmd->in_msg.hdcp2_set_encryption.session_handle = hdcp->auth.id; @@ -862,7 +862,7 @@ enum mod_hdcp_status mod_hdcp_hdcp2_validate_rx_id_list(struct mod_hdcp *hdcp) mutex_lock(&psp->hdcp_context.mutex); - hdcp_cmd = (struct ta_hdcp_shared_memory *)psp->hdcp_context.hdcp_shared_buf; + hdcp_cmd = (struct ta_hdcp_shared_memory *)psp->hdcp_context.context.mem_context.shared_buf; memset(hdcp_cmd, 0, sizeof(struct ta_hdcp_shared_memory)); msg_in = &hdcp_cmd->in_msg.hdcp2_prepare_process_authentication_message_v2; @@ -914,7 +914,7 @@ enum mod_hdcp_status mod_hdcp_hdcp2_enable_dp_stream_encryption(struct mod_hdcp enum mod_hdcp_status status = MOD_HDCP_STATUS_SUCCESS; mutex_lock(&psp->hdcp_context.mutex); - hdcp_cmd = (struct ta_hdcp_shared_memory *)psp->hdcp_context.hdcp_shared_buf; + hdcp_cmd = (struct ta_hdcp_shared_memory *)psp->hdcp_context.context.mem_context.shared_buf; memset(hdcp_cmd, 0, sizeof(struct ta_hdcp_shared_memory)); msg_in = &hdcp_cmd->in_msg.hdcp2_prepare_process_authentication_message_v2; @@ -958,7 +958,7 @@ enum mod_hdcp_status mod_hdcp_hdcp2_prepare_stream_management(struct mod_hdcp *h enum mod_hdcp_status status = MOD_HDCP_STATUS_SUCCESS; mutex_lock(&psp->hdcp_context.mutex); - hdcp_cmd = (struct ta_hdcp_shared_memory *)psp->hdcp_context.hdcp_shared_buf; + hdcp_cmd = (struct ta_hdcp_shared_memory *)psp->hdcp_context.context.mem_context.shared_buf; memset(hdcp_cmd, 0, sizeof(struct ta_hdcp_shared_memory)); msg_in = &hdcp_cmd->in_msg.hdcp2_prepare_process_authentication_message_v2; @@ -994,7 +994,7 @@ enum mod_hdcp_status mod_hdcp_hdcp2_validate_stream_ready(struct mod_hdcp *hdcp) enum mod_hdcp_status status = MOD_HDCP_STATUS_SUCCESS; mutex_lock(&psp->hdcp_context.mutex); - hdcp_cmd = (struct ta_hdcp_shared_memory *)psp->hdcp_context.hdcp_shared_buf; + hdcp_cmd = (struct ta_hdcp_shared_memory *)psp->hdcp_context.context.mem_context.shared_buf; memset(hdcp_cmd, 0, sizeof(struct ta_hdcp_shared_memory)); msg_in = &hdcp_cmd->in_msg.hdcp2_prepare_process_authentication_message_v2; @@ -1021,14 +1021,3 @@ enum mod_hdcp_status mod_hdcp_hdcp2_validate_stream_ready(struct mod_hdcp *hdcp) mutex_unlock(&psp->hdcp_context.mutex); return status; } - -bool mod_hdcp_is_link_encryption_enabled(struct mod_hdcp *hdcp) -{ - /* unsupported */ - return true; -} - -void mod_hdcp_save_current_encryption_states(struct mod_hdcp *hdcp) -{ - /* unsupported */ -} diff --git a/drivers/gpu/drm/amd/display/modules/inc/mod_hdcp.h b/drivers/gpu/drm/amd/display/modules/inc/mod_hdcp.h index c590493fd293..f37101f5a777 100644 --- a/drivers/gpu/drm/amd/display/modules/inc/mod_hdcp.h +++ b/drivers/gpu/drm/amd/display/modules/inc/mod_hdcp.h @@ -123,7 +123,6 @@ enum mod_hdcp_display_state { struct mod_hdcp_psp_caps { uint8_t dtm_v3_supported; - uint8_t opm_state_query_supported; }; enum mod_hdcp_display_disable_option { @@ -226,6 +225,7 @@ struct mod_hdcp_output { uint8_t watchdog_timer_stop; uint16_t callback_delay; uint16_t watchdog_timer_delay; + uint8_t auth_complete; }; /* used to represent per display info */ @@ -282,15 +282,22 @@ enum mod_hdcp_status mod_hdcp_setup(struct mod_hdcp *hdcp, /* called per link on link destroy */ enum mod_hdcp_status mod_hdcp_teardown(struct mod_hdcp *hdcp); -/* called per display on cp_desired set to true */ +/* called per display after stream is enabled */ enum mod_hdcp_status mod_hdcp_add_display(struct mod_hdcp *hdcp, struct mod_hdcp_link *link, struct mod_hdcp_display *display, struct mod_hdcp_output *output); -/* called per display on cp_desired set to false */ +/* called per display before stream is disabled */ enum mod_hdcp_status mod_hdcp_remove_display(struct mod_hdcp *hdcp, uint8_t index, struct mod_hdcp_output *output); +/* called per display to apply new authentication adjustment */ +enum mod_hdcp_status mod_hdcp_update_authentication(struct mod_hdcp *hdcp, + uint8_t index, + struct mod_hdcp_link_adjustment *link_adjust, + struct mod_hdcp_display_adjustment *display_adjust, + struct mod_hdcp_output *output); + /* called to query hdcp information on a specific index */ enum mod_hdcp_status mod_hdcp_query_display(struct mod_hdcp *hdcp, uint8_t index, struct mod_hdcp_display_query *query); diff --git a/drivers/gpu/drm/amd/include/amd_shared.h b/drivers/gpu/drm/amd/include/amd_shared.h index ff1d3d4a6488..257f280d3d53 100644 --- a/drivers/gpu/drm/amd/include/amd_shared.h +++ b/drivers/gpu/drm/amd/include/amd_shared.h @@ -47,6 +47,7 @@ enum amd_apu_flags { AMD_APU_IS_RENOIR = 0x00000008UL, AMD_APU_IS_GREEN_SARDINE = 0x00000010UL, AMD_APU_IS_VANGOGH = 0x00000020UL, + AMD_APU_IS_CYAN_SKILLFISH2 = 0x00000040UL, }; /** diff --git a/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_3_1_2_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_3_1_2_sh_mask.h index e5fd0121ceff..a9d553ef26c0 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_3_1_2_sh_mask.h +++ b/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_3_1_2_sh_mask.h @@ -5971,6 +5971,7 @@ #define DMCUB_INTERRUPT_ENABLE__DMCUB_GPINT1_INT_EN__SHIFT 0xb #define DMCUB_INTERRUPT_ENABLE__DMCUB_GPINT2_INT_EN__SHIFT 0xc #define DMCUB_INTERRUPT_ENABLE__DMCUB_UNDEFINED_ADDRESS_FAULT_INT_EN__SHIFT 0xd +#define DMCUB_INTERRUPT_ENABLE__DMCUB_GPINT_IH_INT_EN__SHIFT 0x11 #define DMCUB_INTERRUPT_ENABLE__DMCUB_TIMER0_INT_EN_MASK 0x00000001L #define DMCUB_INTERRUPT_ENABLE__DMCUB_TIMER1_INT_EN_MASK 0x00000002L #define DMCUB_INTERRUPT_ENABLE__DMCUB_INBOX0_READY_INT_EN_MASK 0x00000004L @@ -5985,6 +5986,7 @@ #define DMCUB_INTERRUPT_ENABLE__DMCUB_GPINT1_INT_EN_MASK 0x00000800L #define DMCUB_INTERRUPT_ENABLE__DMCUB_GPINT2_INT_EN_MASK 0x00001000L #define DMCUB_INTERRUPT_ENABLE__DMCUB_UNDEFINED_ADDRESS_FAULT_INT_EN_MASK 0x00002000L +#define DMCUB_INTERRUPT_ENABLE__DMCUB_GPINT_IH_INT_EN_MASK 0x00020000L //DMCUB_INTERRUPT_ACK #define DMCUB_INTERRUPT_ACK__DMCUB_TIMER0_INT_ACK__SHIFT 0x0 #define DMCUB_INTERRUPT_ACK__DMCUB_TIMER1_INT_ACK__SHIFT 0x1 @@ -6000,6 +6002,7 @@ #define DMCUB_INTERRUPT_ACK__DMCUB_GPINT1_INT_ACK__SHIFT 0xb #define DMCUB_INTERRUPT_ACK__DMCUB_GPINT2_INT_ACK__SHIFT 0xc #define DMCUB_INTERRUPT_ACK__DMCUB_UNDEFINED_ADDRESS_FAULT_ACK__SHIFT 0xd +#define DMCUB_INTERRUPT_ACK__DMCUB_GPINT_IH_INT_ACK__SHIFT 0x11 #define DMCUB_INTERRUPT_ACK__DMCUB_TIMER0_INT_ACK_MASK 0x00000001L #define DMCUB_INTERRUPT_ACK__DMCUB_TIMER1_INT_ACK_MASK 0x00000002L #define DMCUB_INTERRUPT_ACK__DMCUB_INBOX0_READY_INT_ACK_MASK 0x00000004L @@ -6014,6 +6017,7 @@ #define DMCUB_INTERRUPT_ACK__DMCUB_GPINT1_INT_ACK_MASK 0x00000800L #define DMCUB_INTERRUPT_ACK__DMCUB_GPINT2_INT_ACK_MASK 0x00001000L #define DMCUB_INTERRUPT_ACK__DMCUB_UNDEFINED_ADDRESS_FAULT_ACK_MASK 0x00002000L +#define DMCUB_INTERRUPT_ACK__DMCUB_GPINT_IH_INT_ACK_MASK 0x00020000L //DMCUB_INTERRUPT_STATUS #define DMCUB_INTERRUPT_STATUS__DMCUB_TIMER0_INT_STAT__SHIFT 0x0 #define DMCUB_INTERRUPT_STATUS__DMCUB_TIMER1_INT_STAT__SHIFT 0x1 diff --git a/drivers/gpu/drm/amd/include/asic_reg/gc/gc_9_4_2_offset.h b/drivers/gpu/drm/amd/include/asic_reg/gc/gc_9_4_2_offset.h index a9ad00e017a5..1a8a6a350789 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/gc/gc_9_4_2_offset.h +++ b/drivers/gpu/drm/amd/include/asic_reg/gc/gc_9_4_2_offset.h @@ -508,6 +508,10 @@ #define regCP_ME2_PIPE2_INT_STATUS_BASE_IDX 0 #define regCP_ME2_PIPE3_INT_STATUS 0x1094 #define regCP_ME2_PIPE3_INT_STATUS_BASE_IDX 0 +#define regCP_ME1_INT_STAT_DEBUG 0x1095 +#define regCP_ME1_INT_STAT_DEBUG_BASE_IDX 0 +#define regCP_ME2_INT_STAT_DEBUG 0x1096 +#define regCP_ME2_INT_STAT_DEBUG_BASE_IDX 0 #define regCC_GC_EDC_CONFIG 0x1098 #define regCC_GC_EDC_CONFIG_BASE_IDX 0 #define regCP_ME1_PIPE_PRIORITY_CNTS 0x1099 diff --git a/drivers/gpu/drm/amd/include/asic_reg/gc/gc_9_4_2_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/gc/gc_9_4_2_sh_mask.h index bc4d2997cb51..049221262e5c 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/gc/gc_9_4_2_sh_mask.h +++ b/drivers/gpu/drm/amd/include/asic_reg/gc/gc_9_4_2_sh_mask.h @@ -2989,6 +2989,60 @@ #define CP_ME1_PIPE3_INT_STATUS__GENERIC2_INT_STATUS_MASK 0x20000000L #define CP_ME1_PIPE3_INT_STATUS__GENERIC1_INT_STATUS_MASK 0x40000000L #define CP_ME1_PIPE3_INT_STATUS__GENERIC0_INT_STATUS_MASK 0x80000000L +//CP_ME1_INT_STAT_DEBUG +#define CP_ME1_INT_STAT_DEBUG__CMP_QUERY_STATUS_INT_ASSERTED__SHIFT 0xc +#define CP_ME1_INT_STAT_DEBUG__DEQUEUE_REQUEST_INT_ASSERTED__SHIFT 0xd +#define CP_ME1_INT_STAT_DEBUG__CP_ECC_ERROR_INT_ASSERTED__SHIFT 0xe +#define CP_ME1_INT_STAT_DEBUG__SUA_VIOLATION_INT_STATUS__SHIFT 0xf +#define CP_ME1_INT_STAT_DEBUG__GPF_INT_ASSERTED__SHIFT 0x10 +#define CP_ME1_INT_STAT_DEBUG__WRM_POLL_TIMEOUT_INT_ASSERTED__SHIFT 0x11 +#define CP_ME1_INT_STAT_DEBUG__PRIV_REG_INT_ASSERTED__SHIFT 0x17 +#define CP_ME1_INT_STAT_DEBUG__OPCODE_ERROR_INT_ASSERTED__SHIFT 0x18 +#define CP_ME1_INT_STAT_DEBUG__TIME_STAMP_INT_ASSERTED__SHIFT 0x1a +#define CP_ME1_INT_STAT_DEBUG__RESERVED_BIT_ERROR_INT_ASSERTED__SHIFT 0x1b +#define CP_ME1_INT_STAT_DEBUG__GENERIC2_INT_ASSERTED__SHIFT 0x1d +#define CP_ME1_INT_STAT_DEBUG__GENERIC1_INT_ASSERTED__SHIFT 0x1e +#define CP_ME1_INT_STAT_DEBUG__GENERIC0_INT_ASSERTED__SHIFT 0x1f +#define CP_ME1_INT_STAT_DEBUG__CMP_QUERY_STATUS_INT_ASSERTED_MASK 0x00001000L +#define CP_ME1_INT_STAT_DEBUG__DEQUEUE_REQUEST_INT_ASSERTED_MASK 0x00002000L +#define CP_ME1_INT_STAT_DEBUG__CP_ECC_ERROR_INT_ASSERTED_MASK 0x00004000L +#define CP_ME1_INT_STAT_DEBUG__SUA_VIOLATION_INT_STATUS_MASK 0x00008000L +#define CP_ME1_INT_STAT_DEBUG__GPF_INT_ASSERTED_MASK 0x00010000L +#define CP_ME1_INT_STAT_DEBUG__WRM_POLL_TIMEOUT_INT_ASSERTED_MASK 0x00020000L +#define CP_ME1_INT_STAT_DEBUG__PRIV_REG_INT_ASSERTED_MASK 0x00800000L +#define CP_ME1_INT_STAT_DEBUG__OPCODE_ERROR_INT_ASSERTED_MASK 0x01000000L +#define CP_ME1_INT_STAT_DEBUG__TIME_STAMP_INT_ASSERTED_MASK 0x04000000L +#define CP_ME1_INT_STAT_DEBUG__RESERVED_BIT_ERROR_INT_ASSERTED_MASK 0x08000000L +#define CP_ME1_INT_STAT_DEBUG__GENERIC2_INT_ASSERTED_MASK 0x20000000L +#define CP_ME1_INT_STAT_DEBUG__GENERIC1_INT_ASSERTED_MASK 0x40000000L +#define CP_ME1_INT_STAT_DEBUG__GENERIC0_INT_ASSERTED_MASK 0x80000000L +//CP_ME2_INT_STAT_DEBUG +#define CP_ME2_INT_STAT_DEBUG__CMP_QUERY_STATUS_INT_ASSERTED__SHIFT 0xc +#define CP_ME2_INT_STAT_DEBUG__DEQUEUE_REQUEST_INT_ASSERTED__SHIFT 0xd +#define CP_ME2_INT_STAT_DEBUG__CP_ECC_ERROR_INT_ASSERTED__SHIFT 0xe +#define CP_ME2_INT_STAT_DEBUG__SUA_VIOLATION_INT_STATUS__SHIFT 0xf +#define CP_ME2_INT_STAT_DEBUG__GPF_INT_ASSERTED__SHIFT 0x10 +#define CP_ME2_INT_STAT_DEBUG__WRM_POLL_TIMEOUT_INT_ASSERTED__SHIFT 0x11 +#define CP_ME2_INT_STAT_DEBUG__PRIV_REG_INT_ASSERTED__SHIFT 0x17 +#define CP_ME2_INT_STAT_DEBUG__OPCODE_ERROR_INT_ASSERTED__SHIFT 0x18 +#define CP_ME2_INT_STAT_DEBUG__TIME_STAMP_INT_ASSERTED__SHIFT 0x1a +#define CP_ME2_INT_STAT_DEBUG__RESERVED_BIT_ERROR_INT_ASSERTED__SHIFT 0x1b +#define CP_ME2_INT_STAT_DEBUG__GENERIC2_INT_ASSERTED__SHIFT 0x1d +#define CP_ME2_INT_STAT_DEBUG__GENERIC1_INT_ASSERTED__SHIFT 0x1e +#define CP_ME2_INT_STAT_DEBUG__GENERIC0_INT_ASSERTED__SHIFT 0x1f +#define CP_ME2_INT_STAT_DEBUG__CMP_QUERY_STATUS_INT_ASSERTED_MASK 0x00001000L +#define CP_ME2_INT_STAT_DEBUG__DEQUEUE_REQUEST_INT_ASSERTED_MASK 0x00002000L +#define CP_ME2_INT_STAT_DEBUG__CP_ECC_ERROR_INT_ASSERTED_MASK 0x00004000L +#define CP_ME2_INT_STAT_DEBUG__SUA_VIOLATION_INT_STATUS_MASK 0x00008000L +#define CP_ME2_INT_STAT_DEBUG__GPF_INT_ASSERTED_MASK 0x00010000L +#define CP_ME2_INT_STAT_DEBUG__WRM_POLL_TIMEOUT_INT_ASSERTED_MASK 0x00020000L +#define CP_ME2_INT_STAT_DEBUG__PRIV_REG_INT_ASSERTED_MASK 0x00800000L +#define CP_ME2_INT_STAT_DEBUG__OPCODE_ERROR_INT_ASSERTED_MASK 0x01000000L +#define CP_ME2_INT_STAT_DEBUG__TIME_STAMP_INT_ASSERTED_MASK 0x04000000L +#define CP_ME2_INT_STAT_DEBUG__RESERVED_BIT_ERROR_INT_ASSERTED_MASK 0x08000000L +#define CP_ME2_INT_STAT_DEBUG__GENERIC2_INT_ASSERTED_MASK 0x20000000L +#define CP_ME2_INT_STAT_DEBUG__GENERIC1_INT_ASSERTED_MASK 0x40000000L +#define CP_ME2_INT_STAT_DEBUG__GENERIC0_INT_ASSERTED_MASK 0x80000000L //CP_ME2_PIPE0_INT_STATUS #define CP_ME2_PIPE0_INT_STATUS__CMP_QUERY_STATUS_INT_STATUS__SHIFT 0xc #define CP_ME2_PIPE0_INT_STATUS__DEQUEUE_REQUEST_INT_STATUS__SHIFT 0xd diff --git a/drivers/gpu/drm/amd/include/asic_reg/mp/mp_11_0_8_offset.h b/drivers/gpu/drm/amd/include/asic_reg/mp/mp_11_0_8_offset.h new file mode 100644 index 000000000000..19293ccaec23 --- /dev/null +++ b/drivers/gpu/drm/amd/include/asic_reg/mp/mp_11_0_8_offset.h @@ -0,0 +1,352 @@ +/* + * Copyright (C) 2021 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#ifndef _mp_11_0_8_OFFSET_HEADER +#define _mp_11_0_8_OFFSET_HEADER + + + +// addressBlock: mp_SmuMp0_SmnDec +// base address: 0x0 +#define mmMP0_SMN_C2PMSG_32 0x0060 +#define mmMP0_SMN_C2PMSG_32_BASE_IDX 0 +#define mmMP0_SMN_C2PMSG_33 0x0061 +#define mmMP0_SMN_C2PMSG_33_BASE_IDX 0 +#define mmMP0_SMN_C2PMSG_34 0x0062 +#define mmMP0_SMN_C2PMSG_34_BASE_IDX 0 +#define mmMP0_SMN_C2PMSG_35 0x0063 +#define mmMP0_SMN_C2PMSG_35_BASE_IDX 0 +#define mmMP0_SMN_C2PMSG_36 0x0064 +#define mmMP0_SMN_C2PMSG_36_BASE_IDX 0 +#define mmMP0_SMN_C2PMSG_37 0x0065 +#define mmMP0_SMN_C2PMSG_37_BASE_IDX 0 +#define mmMP0_SMN_C2PMSG_38 0x0066 +#define mmMP0_SMN_C2PMSG_38_BASE_IDX 0 +#define mmMP0_SMN_C2PMSG_39 0x0067 +#define mmMP0_SMN_C2PMSG_39_BASE_IDX 0 +#define mmMP0_SMN_C2PMSG_40 0x0068 +#define mmMP0_SMN_C2PMSG_40_BASE_IDX 0 +#define mmMP0_SMN_C2PMSG_41 0x0069 +#define mmMP0_SMN_C2PMSG_41_BASE_IDX 0 +#define mmMP0_SMN_C2PMSG_42 0x006a +#define mmMP0_SMN_C2PMSG_42_BASE_IDX 0 +#define mmMP0_SMN_C2PMSG_43 0x006b +#define mmMP0_SMN_C2PMSG_43_BASE_IDX 0 +#define mmMP0_SMN_C2PMSG_44 0x006c +#define mmMP0_SMN_C2PMSG_44_BASE_IDX 0 +#define mmMP0_SMN_C2PMSG_45 0x006d +#define mmMP0_SMN_C2PMSG_45_BASE_IDX 0 +#define mmMP0_SMN_C2PMSG_46 0x006e +#define mmMP0_SMN_C2PMSG_46_BASE_IDX 0 +#define mmMP0_SMN_C2PMSG_47 0x006f +#define mmMP0_SMN_C2PMSG_47_BASE_IDX 0 +#define mmMP0_SMN_C2PMSG_48 0x0070 +#define mmMP0_SMN_C2PMSG_48_BASE_IDX 0 +#define mmMP0_SMN_C2PMSG_49 0x0071 +#define mmMP0_SMN_C2PMSG_49_BASE_IDX 0 +#define mmMP0_SMN_C2PMSG_50 0x0072 +#define mmMP0_SMN_C2PMSG_50_BASE_IDX 0 +#define mmMP0_SMN_C2PMSG_51 0x0073 +#define mmMP0_SMN_C2PMSG_51_BASE_IDX 0 +#define mmMP0_SMN_C2PMSG_52 0x0074 +#define mmMP0_SMN_C2PMSG_52_BASE_IDX 0 +#define mmMP0_SMN_C2PMSG_53 0x0075 +#define mmMP0_SMN_C2PMSG_53_BASE_IDX 0 +#define mmMP0_SMN_C2PMSG_54 0x0076 +#define mmMP0_SMN_C2PMSG_54_BASE_IDX 0 +#define mmMP0_SMN_C2PMSG_55 0x0077 +#define mmMP0_SMN_C2PMSG_55_BASE_IDX 0 +#define mmMP0_SMN_C2PMSG_56 0x0078 +#define mmMP0_SMN_C2PMSG_56_BASE_IDX 0 +#define mmMP0_SMN_C2PMSG_57 0x0079 +#define mmMP0_SMN_C2PMSG_57_BASE_IDX 0 +#define mmMP0_SMN_C2PMSG_58 0x007a +#define mmMP0_SMN_C2PMSG_58_BASE_IDX 0 +#define mmMP0_SMN_C2PMSG_59 0x007b +#define mmMP0_SMN_C2PMSG_59_BASE_IDX 0 +#define mmMP0_SMN_C2PMSG_60 0x007c +#define mmMP0_SMN_C2PMSG_60_BASE_IDX 0 +#define mmMP0_SMN_C2PMSG_61 0x007d +#define mmMP0_SMN_C2PMSG_61_BASE_IDX 0 +#define mmMP0_SMN_C2PMSG_62 0x007e +#define mmMP0_SMN_C2PMSG_62_BASE_IDX 0 +#define mmMP0_SMN_C2PMSG_63 0x007f +#define mmMP0_SMN_C2PMSG_63_BASE_IDX 0 +#define mmMP0_SMN_C2PMSG_64 0x0080 +#define mmMP0_SMN_C2PMSG_64_BASE_IDX 0 +#define mmMP0_SMN_C2PMSG_65 0x0081 +#define mmMP0_SMN_C2PMSG_65_BASE_IDX 0 +#define mmMP0_SMN_C2PMSG_66 0x0082 +#define mmMP0_SMN_C2PMSG_66_BASE_IDX 0 +#define mmMP0_SMN_C2PMSG_67 0x0083 +#define mmMP0_SMN_C2PMSG_67_BASE_IDX 0 +#define mmMP0_SMN_C2PMSG_68 0x0084 +#define mmMP0_SMN_C2PMSG_68_BASE_IDX 0 +#define mmMP0_SMN_C2PMSG_69 0x0085 +#define mmMP0_SMN_C2PMSG_69_BASE_IDX 0 +#define mmMP0_SMN_C2PMSG_70 0x0086 +#define mmMP0_SMN_C2PMSG_70_BASE_IDX 0 +#define mmMP0_SMN_C2PMSG_71 0x0087 +#define mmMP0_SMN_C2PMSG_71_BASE_IDX 0 +#define mmMP0_SMN_C2PMSG_72 0x0088 +#define mmMP0_SMN_C2PMSG_72_BASE_IDX 0 +#define mmMP0_SMN_C2PMSG_73 0x0089 +#define mmMP0_SMN_C2PMSG_73_BASE_IDX 0 +#define mmMP0_SMN_C2PMSG_74 0x008a +#define mmMP0_SMN_C2PMSG_74_BASE_IDX 0 +#define mmMP0_SMN_C2PMSG_75 0x008b +#define mmMP0_SMN_C2PMSG_75_BASE_IDX 0 +#define mmMP0_SMN_C2PMSG_76 0x008c +#define mmMP0_SMN_C2PMSG_76_BASE_IDX 0 +#define mmMP0_SMN_C2PMSG_77 0x008d +#define mmMP0_SMN_C2PMSG_77_BASE_IDX 0 +#define mmMP0_SMN_C2PMSG_78 0x008e +#define mmMP0_SMN_C2PMSG_78_BASE_IDX 0 +#define mmMP0_SMN_C2PMSG_79 0x008f +#define mmMP0_SMN_C2PMSG_79_BASE_IDX 0 +#define mmMP0_SMN_C2PMSG_80 0x0090 +#define mmMP0_SMN_C2PMSG_80_BASE_IDX 0 +#define mmMP0_SMN_C2PMSG_81 0x0091 +#define mmMP0_SMN_C2PMSG_81_BASE_IDX 0 +#define mmMP0_SMN_C2PMSG_82 0x0092 +#define mmMP0_SMN_C2PMSG_82_BASE_IDX 0 +#define mmMP0_SMN_C2PMSG_83 0x0093 +#define mmMP0_SMN_C2PMSG_83_BASE_IDX 0 +#define mmMP0_SMN_C2PMSG_84 0x0094 +#define mmMP0_SMN_C2PMSG_84_BASE_IDX 0 +#define mmMP0_SMN_C2PMSG_85 0x0095 +#define mmMP0_SMN_C2PMSG_85_BASE_IDX 0 +#define mmMP0_SMN_C2PMSG_86 0x0096 +#define mmMP0_SMN_C2PMSG_86_BASE_IDX 0 +#define mmMP0_SMN_C2PMSG_87 0x0097 +#define mmMP0_SMN_C2PMSG_87_BASE_IDX 0 +#define mmMP0_SMN_C2PMSG_88 0x0098 +#define mmMP0_SMN_C2PMSG_88_BASE_IDX 0 +#define mmMP0_SMN_C2PMSG_89 0x0099 +#define mmMP0_SMN_C2PMSG_89_BASE_IDX 0 +#define mmMP0_SMN_C2PMSG_90 0x009a +#define mmMP0_SMN_C2PMSG_90_BASE_IDX 0 +#define mmMP0_SMN_C2PMSG_91 0x009b +#define mmMP0_SMN_C2PMSG_91_BASE_IDX 0 +#define mmMP0_SMN_C2PMSG_92 0x009c +#define mmMP0_SMN_C2PMSG_92_BASE_IDX 0 +#define mmMP0_SMN_C2PMSG_93 0x009d +#define mmMP0_SMN_C2PMSG_93_BASE_IDX 0 +#define mmMP0_SMN_C2PMSG_94 0x009e +#define mmMP0_SMN_C2PMSG_94_BASE_IDX 0 +#define mmMP0_SMN_C2PMSG_95 0x009f +#define mmMP0_SMN_C2PMSG_95_BASE_IDX 0 +#define mmMP0_SMN_C2PMSG_96 0x00a0 +#define mmMP0_SMN_C2PMSG_96_BASE_IDX 0 +#define mmMP0_SMN_C2PMSG_97 0x00a1 +#define mmMP0_SMN_C2PMSG_97_BASE_IDX 0 +#define mmMP0_SMN_C2PMSG_98 0x00a2 +#define mmMP0_SMN_C2PMSG_98_BASE_IDX 0 +#define mmMP0_SMN_C2PMSG_99 0x00a3 +#define mmMP0_SMN_C2PMSG_99_BASE_IDX 0 +#define mmMP0_SMN_C2PMSG_100 0x00a4 +#define mmMP0_SMN_C2PMSG_100_BASE_IDX 0 +#define mmMP0_SMN_C2PMSG_101 0x00a5 +#define mmMP0_SMN_C2PMSG_101_BASE_IDX 0 +#define mmMP0_SMN_C2PMSG_102 0x00a6 +#define mmMP0_SMN_C2PMSG_102_BASE_IDX 0 +#define mmMP0_SMN_C2PMSG_103 0x00a7 +#define mmMP0_SMN_C2PMSG_103_BASE_IDX 0 +#define mmMP0_SMN_IH_CREDIT 0x00c1 +#define mmMP0_SMN_IH_CREDIT_BASE_IDX 0 +#define mmMP0_SMN_IH_SW_INT 0x00c2 +#define mmMP0_SMN_IH_SW_INT_BASE_IDX 0 +#define mmMP0_SMN_IH_SW_INT_CTRL 0x00c3 +#define mmMP0_SMN_IH_SW_INT_CTRL_BASE_IDX 0 + + +// addressBlock: mp_SmuMp1_SmnDec +// base address: 0x0 +#define mmMP1_SMN_C2PMSG_32 0x0260 +#define mmMP1_SMN_C2PMSG_32_BASE_IDX 0 +#define mmMP1_SMN_C2PMSG_33 0x0261 +#define mmMP1_SMN_C2PMSG_33_BASE_IDX 0 +#define mmMP1_SMN_C2PMSG_34 0x0262 +#define mmMP1_SMN_C2PMSG_34_BASE_IDX 0 +#define mmMP1_SMN_C2PMSG_35 0x0263 +#define mmMP1_SMN_C2PMSG_35_BASE_IDX 0 +#define mmMP1_SMN_C2PMSG_36 0x0264 +#define mmMP1_SMN_C2PMSG_36_BASE_IDX 0 +#define mmMP1_SMN_C2PMSG_37 0x0265 +#define mmMP1_SMN_C2PMSG_37_BASE_IDX 0 +#define mmMP1_SMN_C2PMSG_38 0x0266 +#define mmMP1_SMN_C2PMSG_38_BASE_IDX 0 +#define mmMP1_SMN_C2PMSG_39 0x0267 +#define mmMP1_SMN_C2PMSG_39_BASE_IDX 0 +#define mmMP1_SMN_C2PMSG_40 0x0268 +#define mmMP1_SMN_C2PMSG_40_BASE_IDX 0 +#define mmMP1_SMN_C2PMSG_41 0x0269 +#define mmMP1_SMN_C2PMSG_41_BASE_IDX 0 +#define mmMP1_SMN_C2PMSG_42 0x026a +#define mmMP1_SMN_C2PMSG_42_BASE_IDX 0 +#define mmMP1_SMN_C2PMSG_43 0x026b +#define mmMP1_SMN_C2PMSG_43_BASE_IDX 0 +#define mmMP1_SMN_C2PMSG_44 0x026c +#define mmMP1_SMN_C2PMSG_44_BASE_IDX 0 +#define mmMP1_SMN_C2PMSG_45 0x026d +#define mmMP1_SMN_C2PMSG_45_BASE_IDX 0 +#define mmMP1_SMN_C2PMSG_46 0x026e +#define mmMP1_SMN_C2PMSG_46_BASE_IDX 0 +#define mmMP1_SMN_C2PMSG_47 0x026f +#define mmMP1_SMN_C2PMSG_47_BASE_IDX 0 +#define mmMP1_SMN_C2PMSG_48 0x0270 +#define mmMP1_SMN_C2PMSG_48_BASE_IDX 0 +#define mmMP1_SMN_C2PMSG_49 0x0271 +#define mmMP1_SMN_C2PMSG_49_BASE_IDX 0 +#define mmMP1_SMN_C2PMSG_50 0x0272 +#define mmMP1_SMN_C2PMSG_50_BASE_IDX 0 +#define mmMP1_SMN_C2PMSG_51 0x0273 +#define mmMP1_SMN_C2PMSG_51_BASE_IDX 0 +#define mmMP1_SMN_C2PMSG_52 0x0274 +#define mmMP1_SMN_C2PMSG_52_BASE_IDX 0 +#define mmMP1_SMN_C2PMSG_53 0x0275 +#define mmMP1_SMN_C2PMSG_53_BASE_IDX 0 +#define mmMP1_SMN_C2PMSG_54 0x0276 +#define mmMP1_SMN_C2PMSG_54_BASE_IDX 0 +#define mmMP1_SMN_C2PMSG_55 0x0277 +#define mmMP1_SMN_C2PMSG_55_BASE_IDX 0 +#define mmMP1_SMN_C2PMSG_56 0x0278 +#define mmMP1_SMN_C2PMSG_56_BASE_IDX 0 +#define mmMP1_SMN_C2PMSG_57 0x0279 +#define mmMP1_SMN_C2PMSG_57_BASE_IDX 0 +#define mmMP1_SMN_C2PMSG_58 0x027a +#define mmMP1_SMN_C2PMSG_58_BASE_IDX 0 +#define mmMP1_SMN_C2PMSG_59 0x027b +#define mmMP1_SMN_C2PMSG_59_BASE_IDX 0 +#define mmMP1_SMN_C2PMSG_60 0x027c +#define mmMP1_SMN_C2PMSG_60_BASE_IDX 0 +#define mmMP1_SMN_C2PMSG_61 0x027d +#define mmMP1_SMN_C2PMSG_61_BASE_IDX 0 +#define mmMP1_SMN_C2PMSG_62 0x027e +#define mmMP1_SMN_C2PMSG_62_BASE_IDX 0 +#define mmMP1_SMN_C2PMSG_63 0x027f +#define mmMP1_SMN_C2PMSG_63_BASE_IDX 0 +#define mmMP1_SMN_C2PMSG_64 0x0280 +#define mmMP1_SMN_C2PMSG_64_BASE_IDX 0 +#define mmMP1_SMN_C2PMSG_65 0x0281 +#define mmMP1_SMN_C2PMSG_65_BASE_IDX 0 +#define mmMP1_SMN_C2PMSG_66 0x0282 +#define mmMP1_SMN_C2PMSG_66_BASE_IDX 0 +#define mmMP1_SMN_C2PMSG_67 0x0283 +#define mmMP1_SMN_C2PMSG_67_BASE_IDX 0 +#define mmMP1_SMN_C2PMSG_68 0x0284 +#define mmMP1_SMN_C2PMSG_68_BASE_IDX 0 +#define mmMP1_SMN_C2PMSG_69 0x0285 +#define mmMP1_SMN_C2PMSG_69_BASE_IDX 0 +#define mmMP1_SMN_C2PMSG_70 0x0286 +#define mmMP1_SMN_C2PMSG_70_BASE_IDX 0 +#define mmMP1_SMN_C2PMSG_71 0x0287 +#define mmMP1_SMN_C2PMSG_71_BASE_IDX 0 +#define mmMP1_SMN_C2PMSG_72 0x0288 +#define mmMP1_SMN_C2PMSG_72_BASE_IDX 0 +#define mmMP1_SMN_C2PMSG_73 0x0289 +#define mmMP1_SMN_C2PMSG_73_BASE_IDX 0 +#define mmMP1_SMN_C2PMSG_74 0x028a +#define mmMP1_SMN_C2PMSG_74_BASE_IDX 0 +#define mmMP1_SMN_C2PMSG_75 0x028b +#define mmMP1_SMN_C2PMSG_75_BASE_IDX 0 +#define mmMP1_SMN_C2PMSG_76 0x028c +#define mmMP1_SMN_C2PMSG_76_BASE_IDX 0 +#define mmMP1_SMN_C2PMSG_77 0x028d +#define mmMP1_SMN_C2PMSG_77_BASE_IDX 0 +#define mmMP1_SMN_C2PMSG_78 0x028e +#define mmMP1_SMN_C2PMSG_78_BASE_IDX 0 +#define mmMP1_SMN_C2PMSG_79 0x028f +#define mmMP1_SMN_C2PMSG_79_BASE_IDX 0 +#define mmMP1_SMN_C2PMSG_80 0x0290 +#define mmMP1_SMN_C2PMSG_80_BASE_IDX 0 +#define mmMP1_SMN_C2PMSG_81 0x0291 +#define mmMP1_SMN_C2PMSG_81_BASE_IDX 0 +#define mmMP1_SMN_C2PMSG_82 0x0292 +#define mmMP1_SMN_C2PMSG_82_BASE_IDX 0 +#define mmMP1_SMN_C2PMSG_83 0x0293 +#define mmMP1_SMN_C2PMSG_83_BASE_IDX 0 +#define mmMP1_SMN_C2PMSG_84 0x0294 +#define mmMP1_SMN_C2PMSG_84_BASE_IDX 0 +#define mmMP1_SMN_C2PMSG_85 0x0295 +#define mmMP1_SMN_C2PMSG_85_BASE_IDX 0 +#define mmMP1_SMN_C2PMSG_86 0x0296 +#define mmMP1_SMN_C2PMSG_86_BASE_IDX 0 +#define mmMP1_SMN_C2PMSG_87 0x0297 +#define mmMP1_SMN_C2PMSG_87_BASE_IDX 0 +#define mmMP1_SMN_C2PMSG_88 0x0298 +#define mmMP1_SMN_C2PMSG_88_BASE_IDX 0 +#define mmMP1_SMN_C2PMSG_89 0x0299 +#define mmMP1_SMN_C2PMSG_89_BASE_IDX 0 +#define mmMP1_SMN_C2PMSG_90 0x029a +#define mmMP1_SMN_C2PMSG_90_BASE_IDX 0 +#define mmMP1_SMN_C2PMSG_91 0x029b +#define mmMP1_SMN_C2PMSG_91_BASE_IDX 0 +#define mmMP1_SMN_C2PMSG_92 0x029c +#define mmMP1_SMN_C2PMSG_92_BASE_IDX 0 +#define mmMP1_SMN_C2PMSG_93 0x029d +#define mmMP1_SMN_C2PMSG_93_BASE_IDX 0 +#define mmMP1_SMN_C2PMSG_94 0x029e +#define mmMP1_SMN_C2PMSG_94_BASE_IDX 0 +#define mmMP1_SMN_C2PMSG_95 0x029f +#define mmMP1_SMN_C2PMSG_95_BASE_IDX 0 +#define mmMP1_SMN_C2PMSG_96 0x02a0 +#define mmMP1_SMN_C2PMSG_96_BASE_IDX 0 +#define mmMP1_SMN_C2PMSG_97 0x02a1 +#define mmMP1_SMN_C2PMSG_97_BASE_IDX 0 +#define mmMP1_SMN_C2PMSG_98 0x02a2 +#define mmMP1_SMN_C2PMSG_98_BASE_IDX 0 +#define mmMP1_SMN_C2PMSG_99 0x02a3 +#define mmMP1_SMN_C2PMSG_99_BASE_IDX 0 +#define mmMP1_SMN_C2PMSG_100 0x02a4 +#define mmMP1_SMN_C2PMSG_100_BASE_IDX 0 +#define mmMP1_SMN_C2PMSG_101 0x02a5 +#define mmMP1_SMN_C2PMSG_101_BASE_IDX 0 +#define mmMP1_SMN_C2PMSG_102 0x02a6 +#define mmMP1_SMN_C2PMSG_102_BASE_IDX 0 +#define mmMP1_SMN_C2PMSG_103 0x02a7 +#define mmMP1_SMN_C2PMSG_103_BASE_IDX 0 +#define mmMP1_SMN_IH_CREDIT 0x02c1 +#define mmMP1_SMN_IH_CREDIT_BASE_IDX 0 +#define mmMP1_SMN_IH_SW_INT 0x02c2 +#define mmMP1_SMN_IH_SW_INT_BASE_IDX 0 +#define mmMP1_SMN_IH_SW_INT_CTRL 0x02c3 +#define mmMP1_SMN_IH_SW_INT_CTRL_BASE_IDX 0 +#define mmMP1_SMN_FPS_CNT 0x02c4 +#define mmMP1_SMN_FPS_CNT_BASE_IDX 0 +#define mmMP1_SMN_EXT_SCRATCH0 0x03c0 +#define mmMP1_SMN_EXT_SCRATCH0_BASE_IDX 0 +#define mmMP1_SMN_EXT_SCRATCH1 0x03c1 +#define mmMP1_SMN_EXT_SCRATCH1_BASE_IDX 0 +#define mmMP1_SMN_EXT_SCRATCH2 0x03c2 +#define mmMP1_SMN_EXT_SCRATCH2_BASE_IDX 0 +#define mmMP1_SMN_EXT_SCRATCH3 0x03c3 +#define mmMP1_SMN_EXT_SCRATCH3_BASE_IDX 0 +#define mmMP1_SMN_EXT_SCRATCH4 0x03c4 +#define mmMP1_SMN_EXT_SCRATCH4_BASE_IDX 0 +#define mmMP1_SMN_EXT_SCRATCH5 0x03c5 +#define mmMP1_SMN_EXT_SCRATCH5_BASE_IDX 0 +#define mmMP1_SMN_EXT_SCRATCH6 0x03c6 +#define mmMP1_SMN_EXT_SCRATCH6_BASE_IDX 0 +#define mmMP1_SMN_EXT_SCRATCH7 0x03c7 +#define mmMP1_SMN_EXT_SCRATCH7_BASE_IDX 0 + + +#endif diff --git a/drivers/gpu/drm/amd/include/asic_reg/thm/thm_11_0_2_offset.h b/drivers/gpu/drm/amd/include/asic_reg/thm/thm_11_0_2_offset.h index a485526f3a51..8474f419caa5 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/thm/thm_11_0_2_offset.h +++ b/drivers/gpu/drm/amd/include/asic_reg/thm/thm_11_0_2_offset.h @@ -38,6 +38,9 @@ #define mmCG_TACH_CTRL 0x006a #define mmCG_TACH_CTRL_BASE_IDX 0 +#define mmCG_TACH_STATUS 0x006b +#define mmCG_TACH_STATUS_BASE_IDX 0 + #define mmTHM_THERMAL_INT_ENA 0x000a #define mmTHM_THERMAL_INT_ENA_BASE_IDX 0 #define mmTHM_THERMAL_INT_CTRL 0x000b @@ -49,4 +52,7 @@ #define mmTHM_BACO_CNTL 0x0081 #define mmTHM_BACO_CNTL_BASE_IDX 0 +#define mmCG_THERMAL_STATUS 0x006C +#define mmCG_THERMAL_STATUS_BASE_IDX 0 + #endif diff --git a/drivers/gpu/drm/amd/include/asic_reg/thm/thm_11_0_2_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/thm/thm_11_0_2_sh_mask.h index d130d92aee19..f2f9eae9a68f 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/thm/thm_11_0_2_sh_mask.h +++ b/drivers/gpu/drm/amd/include/asic_reg/thm/thm_11_0_2_sh_mask.h @@ -92,5 +92,8 @@ #define THM_TCON_THERM_TRIP__RSVD3_MASK 0x7FFFC000L #define THM_TCON_THERM_TRIP__SW_THERM_TP_MASK 0x80000000L +#define CG_THERMAL_STATUS__FDO_PWM_DUTY__SHIFT 0x9 +#define CG_THERMAL_STATUS__FDO_PWM_DUTY_MASK 0x0001FE00L + #endif diff --git a/drivers/gpu/drm/amd/include/asic_reg/umc/umc_8_7_0_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/umc/umc_8_7_0_sh_mask.h index 4c5097fa0c09..20329da53b0a 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/umc/umc_8_7_0_sh_mask.h +++ b/drivers/gpu/drm/amd/include/asic_reg/umc/umc_8_7_0_sh_mask.h @@ -1,79 +1,100 @@ -#ifndef _umc_8_7_0_SH_MASK_HEADER
-#define _umc_8_7_0_SH_MASK_HEADER
-
-//UMCCH0_0_GeccErrCntSel
-#define UMCCH0_0_GeccErrCntSel__GeccErrCntCsSel__SHIFT 0x0
-#define UMCCH0_0_GeccErrCntSel__GeccErrInt__SHIFT 0xc
-#define UMCCH0_0_GeccErrCntSel__GeccErrCntEn__SHIFT 0xf
-#define UMCCH0_0_GeccErrCntSel__PoisonCntEn__SHIFT 0x10
-#define UMCCH0_0_GeccErrCntSel__GeccErrCntCsSel_MASK 0x0000000FL
-#define UMCCH0_0_GeccErrCntSel__GeccErrInt_MASK 0x00003000L
-#define UMCCH0_0_GeccErrCntSel__GeccErrCntEn_MASK 0x00008000L
-#define UMCCH0_0_GeccErrCntSel__PoisonCntEn_MASK 0x00030000L
-//UMCCH0_0_GeccErrCnt
-#define UMCCH0_0_GeccErrCnt__GeccErrCnt__SHIFT 0x0
-#define UMCCH0_0_GeccErrCnt__GeccUnCorrErrCnt__SHIFT 0x10
-#define UMCCH0_0_GeccErrCnt__GeccErrCnt_MASK 0x0000FFFFL
-#define UMCCH0_0_GeccErrCnt__GeccUnCorrErrCnt_MASK 0xFFFF0000L
-//MCA_UMC_UMC0_MCUMC_STATUST0
-#define MCA_UMC_UMC0_MCUMC_STATUST0__ErrorCode__SHIFT 0x0
-#define MCA_UMC_UMC0_MCUMC_STATUST0__ErrorCodeExt__SHIFT 0x10
-#define MCA_UMC_UMC0_MCUMC_STATUST0__RESERV22__SHIFT 0x16
-#define MCA_UMC_UMC0_MCUMC_STATUST0__AddrLsb__SHIFT 0x18
-#define MCA_UMC_UMC0_MCUMC_STATUST0__RESERV30__SHIFT 0x1e
-#define MCA_UMC_UMC0_MCUMC_STATUST0__ErrCoreId__SHIFT 0x20
-#define MCA_UMC_UMC0_MCUMC_STATUST0__RESERV38__SHIFT 0x26
-#define MCA_UMC_UMC0_MCUMC_STATUST0__Scrub__SHIFT 0x28
-#define MCA_UMC_UMC0_MCUMC_STATUST0__RESERV41__SHIFT 0x29
-#define MCA_UMC_UMC0_MCUMC_STATUST0__Poison__SHIFT 0x2b
-#define MCA_UMC_UMC0_MCUMC_STATUST0__Deferred__SHIFT 0x2c
-#define MCA_UMC_UMC0_MCUMC_STATUST0__UECC__SHIFT 0x2d
-#define MCA_UMC_UMC0_MCUMC_STATUST0__CECC__SHIFT 0x2e
-#define MCA_UMC_UMC0_MCUMC_STATUST0__RESERV47__SHIFT 0x2f
-#define MCA_UMC_UMC0_MCUMC_STATUST0__Transparent__SHIFT 0x34
-#define MCA_UMC_UMC0_MCUMC_STATUST0__SyndV__SHIFT 0x35
-#define MCA_UMC_UMC0_MCUMC_STATUST0__RESERV54__SHIFT 0x36
-#define MCA_UMC_UMC0_MCUMC_STATUST0__TCC__SHIFT 0x37
-#define MCA_UMC_UMC0_MCUMC_STATUST0__ErrCoreIdVal__SHIFT 0x38
-#define MCA_UMC_UMC0_MCUMC_STATUST0__PCC__SHIFT 0x39
-#define MCA_UMC_UMC0_MCUMC_STATUST0__AddrV__SHIFT 0x3a
-#define MCA_UMC_UMC0_MCUMC_STATUST0__MiscV__SHIFT 0x3b
-#define MCA_UMC_UMC0_MCUMC_STATUST0__En__SHIFT 0x3c
-#define MCA_UMC_UMC0_MCUMC_STATUST0__UC__SHIFT 0x3d
-#define MCA_UMC_UMC0_MCUMC_STATUST0__Overflow__SHIFT 0x3e
-#define MCA_UMC_UMC0_MCUMC_STATUST0__Val__SHIFT 0x3f
-#define MCA_UMC_UMC0_MCUMC_STATUST0__ErrorCode_MASK 0x000000000000FFFFL
-#define MCA_UMC_UMC0_MCUMC_STATUST0__ErrorCodeExt_MASK 0x00000000003F0000L
-#define MCA_UMC_UMC0_MCUMC_STATUST0__RESERV22_MASK 0x0000000000C00000L
-#define MCA_UMC_UMC0_MCUMC_STATUST0__AddrLsb_MASK 0x000000003F000000L
-#define MCA_UMC_UMC0_MCUMC_STATUST0__RESERV30_MASK 0x00000000C0000000L
-#define MCA_UMC_UMC0_MCUMC_STATUST0__ErrCoreId_MASK 0x0000003F00000000L
-#define MCA_UMC_UMC0_MCUMC_STATUST0__RESERV38_MASK 0x000000C000000000L
-#define MCA_UMC_UMC0_MCUMC_STATUST0__Scrub_MASK 0x0000010000000000L
-#define MCA_UMC_UMC0_MCUMC_STATUST0__RESERV41_MASK 0x0000060000000000L
-#define MCA_UMC_UMC0_MCUMC_STATUST0__Poison_MASK 0x0000080000000000L
-#define MCA_UMC_UMC0_MCUMC_STATUST0__Deferred_MASK 0x0000100000000000L
-#define MCA_UMC_UMC0_MCUMC_STATUST0__UECC_MASK 0x0000200000000000L
-#define MCA_UMC_UMC0_MCUMC_STATUST0__CECC_MASK 0x0000400000000000L
-#define MCA_UMC_UMC0_MCUMC_STATUST0__RESERV47_MASK 0x000F800000000000L
-#define MCA_UMC_UMC0_MCUMC_STATUST0__Transparent_MASK 0x0010000000000000L
-#define MCA_UMC_UMC0_MCUMC_STATUST0__SyndV_MASK 0x0020000000000000L
-#define MCA_UMC_UMC0_MCUMC_STATUST0__RESERV54_MASK 0x0040000000000000L
-#define MCA_UMC_UMC0_MCUMC_STATUST0__TCC_MASK 0x0080000000000000L
-#define MCA_UMC_UMC0_MCUMC_STATUST0__ErrCoreIdVal_MASK 0x0100000000000000L
-#define MCA_UMC_UMC0_MCUMC_STATUST0__PCC_MASK 0x0200000000000000L
-#define MCA_UMC_UMC0_MCUMC_STATUST0__AddrV_MASK 0x0400000000000000L
-#define MCA_UMC_UMC0_MCUMC_STATUST0__MiscV_MASK 0x0800000000000000L
-#define MCA_UMC_UMC0_MCUMC_STATUST0__En_MASK 0x1000000000000000L
-#define MCA_UMC_UMC0_MCUMC_STATUST0__UC_MASK 0x2000000000000000L
-#define MCA_UMC_UMC0_MCUMC_STATUST0__Overflow_MASK 0x4000000000000000L
-#define MCA_UMC_UMC0_MCUMC_STATUST0__Val_MASK 0x8000000000000000L
-//MCA_UMC_UMC0_MCUMC_ADDRT0
-#define MCA_UMC_UMC0_MCUMC_ADDRT0__ErrorAddr__SHIFT 0x0
-#define MCA_UMC_UMC0_MCUMC_ADDRT0__LSB__SHIFT 0x38
-#define MCA_UMC_UMC0_MCUMC_ADDRT0__Reserved__SHIFT 0x3e
-#define MCA_UMC_UMC0_MCUMC_ADDRT0__ErrorAddr_MASK 0x00FFFFFFFFFFFFFFL
-#define MCA_UMC_UMC0_MCUMC_ADDRT0__LSB_MASK 0x3F00000000000000L
-#define MCA_UMC_UMC0_MCUMC_ADDRT0__Reserved_MASK 0xC000000000000000L
-
-#endif
+/* + * Copyright (C) 2020 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef _umc_8_7_0_SH_MASK_HEADER +#define _umc_8_7_0_SH_MASK_HEADER + +//UMCCH0_0_GeccErrCntSel +#define UMCCH0_0_GeccErrCntSel__GeccErrCntCsSel__SHIFT 0x0 +#define UMCCH0_0_GeccErrCntSel__GeccErrInt__SHIFT 0xc +#define UMCCH0_0_GeccErrCntSel__GeccErrCntEn__SHIFT 0xf +#define UMCCH0_0_GeccErrCntSel__PoisonCntEn__SHIFT 0x10 +#define UMCCH0_0_GeccErrCntSel__GeccErrCntCsSel_MASK 0x0000000FL +#define UMCCH0_0_GeccErrCntSel__GeccErrInt_MASK 0x00003000L +#define UMCCH0_0_GeccErrCntSel__GeccErrCntEn_MASK 0x00008000L +#define UMCCH0_0_GeccErrCntSel__PoisonCntEn_MASK 0x00030000L +//UMCCH0_0_GeccErrCnt +#define UMCCH0_0_GeccErrCnt__GeccErrCnt__SHIFT 0x0 +#define UMCCH0_0_GeccErrCnt__GeccUnCorrErrCnt__SHIFT 0x10 +#define UMCCH0_0_GeccErrCnt__GeccErrCnt_MASK 0x0000FFFFL +#define UMCCH0_0_GeccErrCnt__GeccUnCorrErrCnt_MASK 0xFFFF0000L +//MCA_UMC_UMC0_MCUMC_STATUST0 +#define MCA_UMC_UMC0_MCUMC_STATUST0__ErrorCode__SHIFT 0x0 +#define MCA_UMC_UMC0_MCUMC_STATUST0__ErrorCodeExt__SHIFT 0x10 +#define MCA_UMC_UMC0_MCUMC_STATUST0__RESERV22__SHIFT 0x16 +#define MCA_UMC_UMC0_MCUMC_STATUST0__AddrLsb__SHIFT 0x18 +#define MCA_UMC_UMC0_MCUMC_STATUST0__RESERV30__SHIFT 0x1e +#define MCA_UMC_UMC0_MCUMC_STATUST0__ErrCoreId__SHIFT 0x20 +#define MCA_UMC_UMC0_MCUMC_STATUST0__RESERV38__SHIFT 0x26 +#define MCA_UMC_UMC0_MCUMC_STATUST0__Scrub__SHIFT 0x28 +#define MCA_UMC_UMC0_MCUMC_STATUST0__RESERV41__SHIFT 0x29 +#define MCA_UMC_UMC0_MCUMC_STATUST0__Poison__SHIFT 0x2b +#define MCA_UMC_UMC0_MCUMC_STATUST0__Deferred__SHIFT 0x2c +#define MCA_UMC_UMC0_MCUMC_STATUST0__UECC__SHIFT 0x2d +#define MCA_UMC_UMC0_MCUMC_STATUST0__CECC__SHIFT 0x2e +#define MCA_UMC_UMC0_MCUMC_STATUST0__RESERV47__SHIFT 0x2f +#define MCA_UMC_UMC0_MCUMC_STATUST0__Transparent__SHIFT 0x34 +#define MCA_UMC_UMC0_MCUMC_STATUST0__SyndV__SHIFT 0x35 +#define MCA_UMC_UMC0_MCUMC_STATUST0__RESERV54__SHIFT 0x36 +#define MCA_UMC_UMC0_MCUMC_STATUST0__TCC__SHIFT 0x37 +#define MCA_UMC_UMC0_MCUMC_STATUST0__ErrCoreIdVal__SHIFT 0x38 +#define MCA_UMC_UMC0_MCUMC_STATUST0__PCC__SHIFT 0x39 +#define MCA_UMC_UMC0_MCUMC_STATUST0__AddrV__SHIFT 0x3a +#define MCA_UMC_UMC0_MCUMC_STATUST0__MiscV__SHIFT 0x3b +#define MCA_UMC_UMC0_MCUMC_STATUST0__En__SHIFT 0x3c +#define MCA_UMC_UMC0_MCUMC_STATUST0__UC__SHIFT 0x3d +#define MCA_UMC_UMC0_MCUMC_STATUST0__Overflow__SHIFT 0x3e +#define MCA_UMC_UMC0_MCUMC_STATUST0__Val__SHIFT 0x3f +#define MCA_UMC_UMC0_MCUMC_STATUST0__ErrorCode_MASK 0x000000000000FFFFL +#define MCA_UMC_UMC0_MCUMC_STATUST0__ErrorCodeExt_MASK 0x00000000003F0000L +#define MCA_UMC_UMC0_MCUMC_STATUST0__RESERV22_MASK 0x0000000000C00000L +#define MCA_UMC_UMC0_MCUMC_STATUST0__AddrLsb_MASK 0x000000003F000000L +#define MCA_UMC_UMC0_MCUMC_STATUST0__RESERV30_MASK 0x00000000C0000000L +#define MCA_UMC_UMC0_MCUMC_STATUST0__ErrCoreId_MASK 0x0000003F00000000L +#define MCA_UMC_UMC0_MCUMC_STATUST0__RESERV38_MASK 0x000000C000000000L +#define MCA_UMC_UMC0_MCUMC_STATUST0__Scrub_MASK 0x0000010000000000L +#define MCA_UMC_UMC0_MCUMC_STATUST0__RESERV41_MASK 0x0000060000000000L +#define MCA_UMC_UMC0_MCUMC_STATUST0__Poison_MASK 0x0000080000000000L +#define MCA_UMC_UMC0_MCUMC_STATUST0__Deferred_MASK 0x0000100000000000L +#define MCA_UMC_UMC0_MCUMC_STATUST0__UECC_MASK 0x0000200000000000L +#define MCA_UMC_UMC0_MCUMC_STATUST0__CECC_MASK 0x0000400000000000L +#define MCA_UMC_UMC0_MCUMC_STATUST0__RESERV47_MASK 0x000F800000000000L +#define MCA_UMC_UMC0_MCUMC_STATUST0__Transparent_MASK 0x0010000000000000L +#define MCA_UMC_UMC0_MCUMC_STATUST0__SyndV_MASK 0x0020000000000000L +#define MCA_UMC_UMC0_MCUMC_STATUST0__RESERV54_MASK 0x0040000000000000L +#define MCA_UMC_UMC0_MCUMC_STATUST0__TCC_MASK 0x0080000000000000L +#define MCA_UMC_UMC0_MCUMC_STATUST0__ErrCoreIdVal_MASK 0x0100000000000000L +#define MCA_UMC_UMC0_MCUMC_STATUST0__PCC_MASK 0x0200000000000000L +#define MCA_UMC_UMC0_MCUMC_STATUST0__AddrV_MASK 0x0400000000000000L +#define MCA_UMC_UMC0_MCUMC_STATUST0__MiscV_MASK 0x0800000000000000L +#define MCA_UMC_UMC0_MCUMC_STATUST0__En_MASK 0x1000000000000000L +#define MCA_UMC_UMC0_MCUMC_STATUST0__UC_MASK 0x2000000000000000L +#define MCA_UMC_UMC0_MCUMC_STATUST0__Overflow_MASK 0x4000000000000000L +#define MCA_UMC_UMC0_MCUMC_STATUST0__Val_MASK 0x8000000000000000L +//MCA_UMC_UMC0_MCUMC_ADDRT0 +#define MCA_UMC_UMC0_MCUMC_ADDRT0__ErrorAddr__SHIFT 0x0 +#define MCA_UMC_UMC0_MCUMC_ADDRT0__LSB__SHIFT 0x38 +#define MCA_UMC_UMC0_MCUMC_ADDRT0__Reserved__SHIFT 0x3e +#define MCA_UMC_UMC0_MCUMC_ADDRT0__ErrorAddr_MASK 0x00FFFFFFFFFFFFFFL +#define MCA_UMC_UMC0_MCUMC_ADDRT0__LSB_MASK 0x3F00000000000000L +#define MCA_UMC_UMC0_MCUMC_ADDRT0__Reserved_MASK 0xC000000000000000L + +#endif diff --git a/drivers/gpu/drm/amd/include/cyan_skillfish_ip_offset.h b/drivers/gpu/drm/amd/include/cyan_skillfish_ip_offset.h new file mode 100644 index 000000000000..9cb5f3631c60 --- /dev/null +++ b/drivers/gpu/drm/amd/include/cyan_skillfish_ip_offset.h @@ -0,0 +1,714 @@ +/* + * Copyright (C) 2018 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#ifndef _cyan_skillfish_ip_offset_HEADER +#define _cyan_skillfish_ip_offset_HEADER + +#define MAX_INSTANCE 6 +#define MAX_SEGMENT 5 + + +struct IP_BASE_INSTANCE +{ + unsigned int segment[MAX_SEGMENT]; +}; + +struct IP_BASE +{ + struct IP_BASE_INSTANCE instance[MAX_INSTANCE]; +}; + + +static const struct IP_BASE ATHUB_BASE ={ { { { 0x00000C00, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0 } } } }; +static const struct IP_BASE CLK_BASE ={ { { { 0x00016C00, 0, 0, 0, 0 } }, + { { 0x00016E00, 0, 0, 0, 0 } }, + { { 0x00017000, 0, 0, 0, 0 } }, + { { 0x00017200, 0, 0, 0, 0 } }, + { { 0x00017E00, 0, 0, 0, 0 } }, + { { 0x0001B000, 0, 0, 0, 0 } } } }; +static const struct IP_BASE DF_BASE ={ { { { 0x00007000, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0 } } } }; +static const struct IP_BASE DMU_BASE ={ { { { 0x00000012, 0x000000C0, 0x000034C0, 0x00009000, 0 } }, + { { 0, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0 } } } }; +static const struct IP_BASE FUSE_BASE ={ { { { 0x00017400, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0 } } } }; +static const struct IP_BASE GC_BASE ={ { { { 0x00001260, 0x0000A000, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0 } } } }; +static const struct IP_BASE HDP_BASE ={ { { { 0x00000F20, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0 } } } }; +static const struct IP_BASE MMHUB_BASE ={ { { { 0x0001A000, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0 } } } }; +static const struct IP_BASE MP0_BASE ={ { { { 0x00016000, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0 } } } }; +static const struct IP_BASE MP1_BASE ={ { { { 0x00016000, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0 } } } }; +static const struct IP_BASE NBIO_BASE ={ { { { 0x00000000, 0x00000014, 0x00000D20, 0x00010400, 0 } }, + { { 0, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0 } } } }; +static const struct IP_BASE OSSSYS_BASE ={ { { { 0x000010A0, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0 } } } }; +static const struct IP_BASE SMUIO_BASE ={ { { { 0x00016800, 0x00016A00, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0 } } } }; +static const struct IP_BASE THM_BASE ={ { { { 0x00016600, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0 } } } }; +static const struct IP_BASE UMC0_BASE ={ { { { 0x00014000, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0 } } } }; +static const struct IP_BASE UVD0_BASE ={ { { { 0x00007800, 0x00007E00, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0 } } } }; + + +#define ATHUB_BASE__INST0_SEG0 0x00000C00 +#define ATHUB_BASE__INST0_SEG1 0 +#define ATHUB_BASE__INST0_SEG2 0 +#define ATHUB_BASE__INST0_SEG3 0 +#define ATHUB_BASE__INST0_SEG4 0 + +#define ATHUB_BASE__INST1_SEG0 0 +#define ATHUB_BASE__INST1_SEG1 0 +#define ATHUB_BASE__INST1_SEG2 0 +#define ATHUB_BASE__INST1_SEG3 0 +#define ATHUB_BASE__INST1_SEG4 0 + +#define ATHUB_BASE__INST2_SEG0 0 +#define ATHUB_BASE__INST2_SEG1 0 +#define ATHUB_BASE__INST2_SEG2 0 +#define ATHUB_BASE__INST2_SEG3 0 +#define ATHUB_BASE__INST2_SEG4 0 + +#define ATHUB_BASE__INST3_SEG0 0 +#define ATHUB_BASE__INST3_SEG1 0 +#define ATHUB_BASE__INST3_SEG2 0 +#define ATHUB_BASE__INST3_SEG3 0 +#define ATHUB_BASE__INST3_SEG4 0 + +#define ATHUB_BASE__INST4_SEG0 0 +#define ATHUB_BASE__INST4_SEG1 0 +#define ATHUB_BASE__INST4_SEG2 0 +#define ATHUB_BASE__INST4_SEG3 0 +#define ATHUB_BASE__INST4_SEG4 0 + +#define ATHUB_BASE__INST5_SEG0 0 +#define ATHUB_BASE__INST5_SEG1 0 +#define ATHUB_BASE__INST5_SEG2 0 +#define ATHUB_BASE__INST5_SEG3 0 +#define ATHUB_BASE__INST5_SEG4 0 + +#define CLK_BASE__INST0_SEG0 0x00016C00 +#define CLK_BASE__INST0_SEG1 0 +#define CLK_BASE__INST0_SEG2 0 +#define CLK_BASE__INST0_SEG3 0 +#define CLK_BASE__INST0_SEG4 0 + +#define CLK_BASE__INST1_SEG0 0x00016E00 +#define CLK_BASE__INST1_SEG1 0 +#define CLK_BASE__INST1_SEG2 0 +#define CLK_BASE__INST1_SEG3 0 +#define CLK_BASE__INST1_SEG4 0 + +#define CLK_BASE__INST2_SEG0 0x00017000 +#define CLK_BASE__INST2_SEG1 0 +#define CLK_BASE__INST2_SEG2 0 +#define CLK_BASE__INST2_SEG3 0 +#define CLK_BASE__INST2_SEG4 0 + +#define CLK_BASE__INST3_SEG0 0x00017200 +#define CLK_BASE__INST3_SEG1 0 +#define CLK_BASE__INST3_SEG2 0 +#define CLK_BASE__INST3_SEG3 0 +#define CLK_BASE__INST3_SEG4 0 + +#define CLK_BASE__INST4_SEG0 0x00017E00 +#define CLK_BASE__INST4_SEG1 0 +#define CLK_BASE__INST4_SEG2 0 +#define CLK_BASE__INST4_SEG3 0 +#define CLK_BASE__INST4_SEG4 0 + +#define CLK_BASE__INST5_SEG0 0x0001B000 +#define CLK_BASE__INST5_SEG1 0 +#define CLK_BASE__INST5_SEG2 0 +#define CLK_BASE__INST5_SEG3 0 +#define CLK_BASE__INST5_SEG4 0 + +#define DF_BASE__INST0_SEG0 0x00007000 +#define DF_BASE__INST0_SEG1 0 +#define DF_BASE__INST0_SEG2 0 +#define DF_BASE__INST0_SEG3 0 +#define DF_BASE__INST0_SEG4 0 + +#define DF_BASE__INST1_SEG0 0 +#define DF_BASE__INST1_SEG1 0 +#define DF_BASE__INST1_SEG2 0 +#define DF_BASE__INST1_SEG3 0 +#define DF_BASE__INST1_SEG4 0 + +#define DF_BASE__INST2_SEG0 0 +#define DF_BASE__INST2_SEG1 0 +#define DF_BASE__INST2_SEG2 0 +#define DF_BASE__INST2_SEG3 0 +#define DF_BASE__INST2_SEG4 0 + +#define DF_BASE__INST3_SEG0 0 +#define DF_BASE__INST3_SEG1 0 +#define DF_BASE__INST3_SEG2 0 +#define DF_BASE__INST3_SEG3 0 +#define DF_BASE__INST3_SEG4 0 + +#define DF_BASE__INST4_SEG0 0 +#define DF_BASE__INST4_SEG1 0 +#define DF_BASE__INST4_SEG2 0 +#define DF_BASE__INST4_SEG3 0 +#define DF_BASE__INST4_SEG4 0 + +#define DF_BASE__INST5_SEG0 0 +#define DF_BASE__INST5_SEG1 0 +#define DF_BASE__INST5_SEG2 0 +#define DF_BASE__INST5_SEG3 0 +#define DF_BASE__INST5_SEG4 0 + +#define DMU_BASE__INST0_SEG0 0x00000012 +#define DMU_BASE__INST0_SEG1 0x000000C0 +#define DMU_BASE__INST0_SEG2 0x000034C0 +#define DMU_BASE__INST0_SEG3 0x00009000 +#define DMU_BASE__INST0_SEG4 0 + +#define DMU_BASE__INST1_SEG0 0 +#define DMU_BASE__INST1_SEG1 0 +#define DMU_BASE__INST1_SEG2 0 +#define DMU_BASE__INST1_SEG3 0 +#define DMU_BASE__INST1_SEG4 0 + +#define DMU_BASE__INST2_SEG0 0 +#define DMU_BASE__INST2_SEG1 0 +#define DMU_BASE__INST2_SEG2 0 +#define DMU_BASE__INST2_SEG3 0 +#define DMU_BASE__INST2_SEG4 0 + +#define DMU_BASE__INST3_SEG0 0 +#define DMU_BASE__INST3_SEG1 0 +#define DMU_BASE__INST3_SEG2 0 +#define DMU_BASE__INST3_SEG3 0 +#define DMU_BASE__INST3_SEG4 0 + +#define DMU_BASE__INST4_SEG0 0 +#define DMU_BASE__INST4_SEG1 0 +#define DMU_BASE__INST4_SEG2 0 +#define DMU_BASE__INST4_SEG3 0 +#define DMU_BASE__INST4_SEG4 0 + +#define DMU_BASE__INST5_SEG0 0 +#define DMU_BASE__INST5_SEG1 0 +#define DMU_BASE__INST5_SEG2 0 +#define DMU_BASE__INST5_SEG3 0 +#define DMU_BASE__INST5_SEG4 0 + +#define FUSE_BASE__INST0_SEG0 0x00017400 +#define FUSE_BASE__INST0_SEG1 0 +#define FUSE_BASE__INST0_SEG2 0 +#define FUSE_BASE__INST0_SEG3 0 +#define FUSE_BASE__INST0_SEG4 0 + +#define FUSE_BASE__INST1_SEG0 0 +#define FUSE_BASE__INST1_SEG1 0 +#define FUSE_BASE__INST1_SEG2 0 +#define FUSE_BASE__INST1_SEG3 0 +#define FUSE_BASE__INST1_SEG4 0 + +#define FUSE_BASE__INST2_SEG0 0 +#define FUSE_BASE__INST2_SEG1 0 +#define FUSE_BASE__INST2_SEG2 0 +#define FUSE_BASE__INST2_SEG3 0 +#define FUSE_BASE__INST2_SEG4 0 + +#define FUSE_BASE__INST3_SEG0 0 +#define FUSE_BASE__INST3_SEG1 0 +#define FUSE_BASE__INST3_SEG2 0 +#define FUSE_BASE__INST3_SEG3 0 +#define FUSE_BASE__INST3_SEG4 0 + +#define FUSE_BASE__INST4_SEG0 0 +#define FUSE_BASE__INST4_SEG1 0 +#define FUSE_BASE__INST4_SEG2 0 +#define FUSE_BASE__INST4_SEG3 0 +#define FUSE_BASE__INST4_SEG4 0 + +#define FUSE_BASE__INST5_SEG0 0 +#define FUSE_BASE__INST5_SEG1 0 +#define FUSE_BASE__INST5_SEG2 0 +#define FUSE_BASE__INST5_SEG3 0 +#define FUSE_BASE__INST5_SEG4 0 + +#define GC_BASE__INST0_SEG0 0x00001260 +#define GC_BASE__INST0_SEG1 0x0000A000 +#define GC_BASE__INST0_SEG2 0 +#define GC_BASE__INST0_SEG3 0 +#define GC_BASE__INST0_SEG4 0 + +#define GC_BASE__INST1_SEG0 0 +#define GC_BASE__INST1_SEG1 0 +#define GC_BASE__INST1_SEG2 0 +#define GC_BASE__INST1_SEG3 0 +#define GC_BASE__INST1_SEG4 0 + +#define GC_BASE__INST2_SEG0 0 +#define GC_BASE__INST2_SEG1 0 +#define GC_BASE__INST2_SEG2 0 +#define GC_BASE__INST2_SEG3 0 +#define GC_BASE__INST2_SEG4 0 + +#define GC_BASE__INST3_SEG0 0 +#define GC_BASE__INST3_SEG1 0 +#define GC_BASE__INST3_SEG2 0 +#define GC_BASE__INST3_SEG3 0 +#define GC_BASE__INST3_SEG4 0 + +#define GC_BASE__INST4_SEG0 0 +#define GC_BASE__INST4_SEG1 0 +#define GC_BASE__INST4_SEG2 0 +#define GC_BASE__INST4_SEG3 0 +#define GC_BASE__INST4_SEG4 0 + +#define GC_BASE__INST5_SEG0 0 +#define GC_BASE__INST5_SEG1 0 +#define GC_BASE__INST5_SEG2 0 +#define GC_BASE__INST5_SEG3 0 +#define GC_BASE__INST5_SEG4 0 + +#define HDP_BASE__INST0_SEG0 0x00000F20 +#define HDP_BASE__INST0_SEG1 0 +#define HDP_BASE__INST0_SEG2 0 +#define HDP_BASE__INST0_SEG3 0 +#define HDP_BASE__INST0_SEG4 0 + +#define HDP_BASE__INST1_SEG0 0 +#define HDP_BASE__INST1_SEG1 0 +#define HDP_BASE__INST1_SEG2 0 +#define HDP_BASE__INST1_SEG3 0 +#define HDP_BASE__INST1_SEG4 0 + +#define HDP_BASE__INST2_SEG0 0 +#define HDP_BASE__INST2_SEG1 0 +#define HDP_BASE__INST2_SEG2 0 +#define HDP_BASE__INST2_SEG3 0 +#define HDP_BASE__INST2_SEG4 0 + +#define HDP_BASE__INST3_SEG0 0 +#define HDP_BASE__INST3_SEG1 0 +#define HDP_BASE__INST3_SEG2 0 +#define HDP_BASE__INST3_SEG3 0 +#define HDP_BASE__INST3_SEG4 0 + +#define HDP_BASE__INST4_SEG0 0 +#define HDP_BASE__INST4_SEG1 0 +#define HDP_BASE__INST4_SEG2 0 +#define HDP_BASE__INST4_SEG3 0 +#define HDP_BASE__INST4_SEG4 0 + +#define HDP_BASE__INST5_SEG0 0 +#define HDP_BASE__INST5_SEG1 0 +#define HDP_BASE__INST5_SEG2 0 +#define HDP_BASE__INST5_SEG3 0 +#define HDP_BASE__INST5_SEG4 0 + +#define MMHUB_BASE__INST0_SEG0 0x0001A000 +#define MMHUB_BASE__INST0_SEG1 0 +#define MMHUB_BASE__INST0_SEG2 0 +#define MMHUB_BASE__INST0_SEG3 0 +#define MMHUB_BASE__INST0_SEG4 0 + +#define MMHUB_BASE__INST1_SEG0 0 +#define MMHUB_BASE__INST1_SEG1 0 +#define MMHUB_BASE__INST1_SEG2 0 +#define MMHUB_BASE__INST1_SEG3 0 +#define MMHUB_BASE__INST1_SEG4 0 + +#define MMHUB_BASE__INST2_SEG0 0 +#define MMHUB_BASE__INST2_SEG1 0 +#define MMHUB_BASE__INST2_SEG2 0 +#define MMHUB_BASE__INST2_SEG3 0 +#define MMHUB_BASE__INST2_SEG4 0 + +#define MMHUB_BASE__INST3_SEG0 0 +#define MMHUB_BASE__INST3_SEG1 0 +#define MMHUB_BASE__INST3_SEG2 0 +#define MMHUB_BASE__INST3_SEG3 0 +#define MMHUB_BASE__INST3_SEG4 0 + +#define MMHUB_BASE__INST4_SEG0 0 +#define MMHUB_BASE__INST4_SEG1 0 +#define MMHUB_BASE__INST4_SEG2 0 +#define MMHUB_BASE__INST4_SEG3 0 +#define MMHUB_BASE__INST4_SEG4 0 + +#define MMHUB_BASE__INST5_SEG0 0 +#define MMHUB_BASE__INST5_SEG1 0 +#define MMHUB_BASE__INST5_SEG2 0 +#define MMHUB_BASE__INST5_SEG3 0 +#define MMHUB_BASE__INST5_SEG4 0 + +#define MP0_BASE__INST0_SEG0 0x00016000 +#define MP0_BASE__INST0_SEG1 0 +#define MP0_BASE__INST0_SEG2 0 +#define MP0_BASE__INST0_SEG3 0 +#define MP0_BASE__INST0_SEG4 0 + +#define MP0_BASE__INST1_SEG0 0 +#define MP0_BASE__INST1_SEG1 0 +#define MP0_BASE__INST1_SEG2 0 +#define MP0_BASE__INST1_SEG3 0 +#define MP0_BASE__INST1_SEG4 0 + +#define MP0_BASE__INST2_SEG0 0 +#define MP0_BASE__INST2_SEG1 0 +#define MP0_BASE__INST2_SEG2 0 +#define MP0_BASE__INST2_SEG3 0 +#define MP0_BASE__INST2_SEG4 0 + +#define MP0_BASE__INST3_SEG0 0 +#define MP0_BASE__INST3_SEG1 0 +#define MP0_BASE__INST3_SEG2 0 +#define MP0_BASE__INST3_SEG3 0 +#define MP0_BASE__INST3_SEG4 0 + +#define MP0_BASE__INST4_SEG0 0 +#define MP0_BASE__INST4_SEG1 0 +#define MP0_BASE__INST4_SEG2 0 +#define MP0_BASE__INST4_SEG3 0 +#define MP0_BASE__INST4_SEG4 0 + +#define MP0_BASE__INST5_SEG0 0 +#define MP0_BASE__INST5_SEG1 0 +#define MP0_BASE__INST5_SEG2 0 +#define MP0_BASE__INST5_SEG3 0 +#define MP0_BASE__INST5_SEG4 0 + +#define MP1_BASE__INST0_SEG0 0x00016000 +#define MP1_BASE__INST0_SEG1 0 +#define MP1_BASE__INST0_SEG2 0 +#define MP1_BASE__INST0_SEG3 0 +#define MP1_BASE__INST0_SEG4 0 + +#define MP1_BASE__INST1_SEG0 0 +#define MP1_BASE__INST1_SEG1 0 +#define MP1_BASE__INST1_SEG2 0 +#define MP1_BASE__INST1_SEG3 0 +#define MP1_BASE__INST1_SEG4 0 + +#define MP1_BASE__INST2_SEG0 0 +#define MP1_BASE__INST2_SEG1 0 +#define MP1_BASE__INST2_SEG2 0 +#define MP1_BASE__INST2_SEG3 0 +#define MP1_BASE__INST2_SEG4 0 + +#define MP1_BASE__INST3_SEG0 0 +#define MP1_BASE__INST3_SEG1 0 +#define MP1_BASE__INST3_SEG2 0 +#define MP1_BASE__INST3_SEG3 0 +#define MP1_BASE__INST3_SEG4 0 + +#define MP1_BASE__INST4_SEG0 0 +#define MP1_BASE__INST4_SEG1 0 +#define MP1_BASE__INST4_SEG2 0 +#define MP1_BASE__INST4_SEG3 0 +#define MP1_BASE__INST4_SEG4 0 + +#define MP1_BASE__INST5_SEG0 0 +#define MP1_BASE__INST5_SEG1 0 +#define MP1_BASE__INST5_SEG2 0 +#define MP1_BASE__INST5_SEG3 0 +#define MP1_BASE__INST5_SEG4 0 + +#define NBIO_BASE__INST0_SEG0 0x00000000 +#define NBIO_BASE__INST0_SEG1 0x00000014 +#define NBIO_BASE__INST0_SEG2 0x00000D20 +#define NBIO_BASE__INST0_SEG3 0x00010400 +#define NBIO_BASE__INST0_SEG4 0 + +#define NBIO_BASE__INST1_SEG0 0 +#define NBIO_BASE__INST1_SEG1 0 +#define NBIO_BASE__INST1_SEG2 0 +#define NBIO_BASE__INST1_SEG3 0 +#define NBIO_BASE__INST1_SEG4 0 + +#define NBIO_BASE__INST2_SEG0 0 +#define NBIO_BASE__INST2_SEG1 0 +#define NBIO_BASE__INST2_SEG2 0 +#define NBIO_BASE__INST2_SEG3 0 +#define NBIO_BASE__INST2_SEG4 0 + +#define NBIO_BASE__INST3_SEG0 0 +#define NBIO_BASE__INST3_SEG1 0 +#define NBIO_BASE__INST3_SEG2 0 +#define NBIO_BASE__INST3_SEG3 0 +#define NBIO_BASE__INST3_SEG4 0 + +#define NBIO_BASE__INST4_SEG0 0 +#define NBIO_BASE__INST4_SEG1 0 +#define NBIO_BASE__INST4_SEG2 0 +#define NBIO_BASE__INST4_SEG3 0 +#define NBIO_BASE__INST4_SEG4 0 + +#define NBIO_BASE__INST5_SEG0 0 +#define NBIO_BASE__INST5_SEG1 0 +#define NBIO_BASE__INST5_SEG2 0 +#define NBIO_BASE__INST5_SEG3 0 +#define NBIO_BASE__INST5_SEG4 0 + +#define OSSSYS_BASE__INST0_SEG0 0x000010A0 +#define OSSSYS_BASE__INST0_SEG1 0 +#define OSSSYS_BASE__INST0_SEG2 0 +#define OSSSYS_BASE__INST0_SEG3 0 +#define OSSSYS_BASE__INST0_SEG4 0 + +#define OSSSYS_BASE__INST1_SEG0 0 +#define OSSSYS_BASE__INST1_SEG1 0 +#define OSSSYS_BASE__INST1_SEG2 0 +#define OSSSYS_BASE__INST1_SEG3 0 +#define OSSSYS_BASE__INST1_SEG4 0 + +#define OSSSYS_BASE__INST2_SEG0 0 +#define OSSSYS_BASE__INST2_SEG1 0 +#define OSSSYS_BASE__INST2_SEG2 0 +#define OSSSYS_BASE__INST2_SEG3 0 +#define OSSSYS_BASE__INST2_SEG4 0 + +#define OSSSYS_BASE__INST3_SEG0 0 +#define OSSSYS_BASE__INST3_SEG1 0 +#define OSSSYS_BASE__INST3_SEG2 0 +#define OSSSYS_BASE__INST3_SEG3 0 +#define OSSSYS_BASE__INST3_SEG4 0 + +#define OSSSYS_BASE__INST4_SEG0 0 +#define OSSSYS_BASE__INST4_SEG1 0 +#define OSSSYS_BASE__INST4_SEG2 0 +#define OSSSYS_BASE__INST4_SEG3 0 +#define OSSSYS_BASE__INST4_SEG4 0 + +#define OSSSYS_BASE__INST5_SEG0 0 +#define OSSSYS_BASE__INST5_SEG1 0 +#define OSSSYS_BASE__INST5_SEG2 0 +#define OSSSYS_BASE__INST5_SEG3 0 +#define OSSSYS_BASE__INST5_SEG4 0 + +#define SMUIO_BASE__INST0_SEG0 0x00016800 +#define SMUIO_BASE__INST0_SEG1 0x00016A00 +#define SMUIO_BASE__INST0_SEG2 0 +#define SMUIO_BASE__INST0_SEG3 0 +#define SMUIO_BASE__INST0_SEG4 0 + +#define SMUIO_BASE__INST1_SEG0 0 +#define SMUIO_BASE__INST1_SEG1 0 +#define SMUIO_BASE__INST1_SEG2 0 +#define SMUIO_BASE__INST1_SEG3 0 +#define SMUIO_BASE__INST1_SEG4 0 + +#define SMUIO_BASE__INST2_SEG0 0 +#define SMUIO_BASE__INST2_SEG1 0 +#define SMUIO_BASE__INST2_SEG2 0 +#define SMUIO_BASE__INST2_SEG3 0 +#define SMUIO_BASE__INST2_SEG4 0 + +#define SMUIO_BASE__INST3_SEG0 0 +#define SMUIO_BASE__INST3_SEG1 0 +#define SMUIO_BASE__INST3_SEG2 0 +#define SMUIO_BASE__INST3_SEG3 0 +#define SMUIO_BASE__INST3_SEG4 0 + +#define SMUIO_BASE__INST4_SEG0 0 +#define SMUIO_BASE__INST4_SEG1 0 +#define SMUIO_BASE__INST4_SEG2 0 +#define SMUIO_BASE__INST4_SEG3 0 +#define SMUIO_BASE__INST4_SEG4 0 + +#define SMUIO_BASE__INST5_SEG0 0 +#define SMUIO_BASE__INST5_SEG1 0 +#define SMUIO_BASE__INST5_SEG2 0 +#define SMUIO_BASE__INST5_SEG3 0 +#define SMUIO_BASE__INST5_SEG4 0 + +#define THM_BASE__INST0_SEG0 0x00016600 +#define THM_BASE__INST0_SEG1 0 +#define THM_BASE__INST0_SEG2 0 +#define THM_BASE__INST0_SEG3 0 +#define THM_BASE__INST0_SEG4 0 + +#define THM_BASE__INST1_SEG0 0 +#define THM_BASE__INST1_SEG1 0 +#define THM_BASE__INST1_SEG2 0 +#define THM_BASE__INST1_SEG3 0 +#define THM_BASE__INST1_SEG4 0 + +#define THM_BASE__INST2_SEG0 0 +#define THM_BASE__INST2_SEG1 0 +#define THM_BASE__INST2_SEG2 0 +#define THM_BASE__INST2_SEG3 0 +#define THM_BASE__INST2_SEG4 0 + +#define THM_BASE__INST3_SEG0 0 +#define THM_BASE__INST3_SEG1 0 +#define THM_BASE__INST3_SEG2 0 +#define THM_BASE__INST3_SEG3 0 +#define THM_BASE__INST3_SEG4 0 + +#define THM_BASE__INST4_SEG0 0 +#define THM_BASE__INST4_SEG1 0 +#define THM_BASE__INST4_SEG2 0 +#define THM_BASE__INST4_SEG3 0 +#define THM_BASE__INST4_SEG4 0 + +#define THM_BASE__INST5_SEG0 0 +#define THM_BASE__INST5_SEG1 0 +#define THM_BASE__INST5_SEG2 0 +#define THM_BASE__INST5_SEG3 0 +#define THM_BASE__INST5_SEG4 0 + +#define UMC0_BASE__INST0_SEG0 0x00014000 +#define UMC0_BASE__INST0_SEG1 0 +#define UMC0_BASE__INST0_SEG2 0 +#define UMC0_BASE__INST0_SEG3 0 +#define UMC0_BASE__INST0_SEG4 0 + +#define UMC0_BASE__INST1_SEG0 0 +#define UMC0_BASE__INST1_SEG1 0 +#define UMC0_BASE__INST1_SEG2 0 +#define UMC0_BASE__INST1_SEG3 0 +#define UMC0_BASE__INST1_SEG4 0 + +#define UMC0_BASE__INST2_SEG0 0 +#define UMC0_BASE__INST2_SEG1 0 +#define UMC0_BASE__INST2_SEG2 0 +#define UMC0_BASE__INST2_SEG3 0 +#define UMC0_BASE__INST2_SEG4 0 + +#define UMC0_BASE__INST3_SEG0 0 +#define UMC0_BASE__INST3_SEG1 0 +#define UMC0_BASE__INST3_SEG2 0 +#define UMC0_BASE__INST3_SEG3 0 +#define UMC0_BASE__INST3_SEG4 0 + +#define UMC0_BASE__INST4_SEG0 0 +#define UMC0_BASE__INST4_SEG1 0 +#define UMC0_BASE__INST4_SEG2 0 +#define UMC0_BASE__INST4_SEG3 0 +#define UMC0_BASE__INST4_SEG4 0 + +#define UMC0_BASE__INST5_SEG0 0 +#define UMC0_BASE__INST5_SEG1 0 +#define UMC0_BASE__INST5_SEG2 0 +#define UMC0_BASE__INST5_SEG3 0 +#define UMC0_BASE__INST5_SEG4 0 + +#define UVD0_BASE__INST0_SEG0 0x00007800 +#define UVD0_BASE__INST0_SEG1 0x00007E00 +#define UVD0_BASE__INST0_SEG2 0 +#define UVD0_BASE__INST0_SEG3 0 +#define UVD0_BASE__INST0_SEG4 0 + +#define UVD0_BASE__INST1_SEG0 0 +#define UVD0_BASE__INST1_SEG1 0 +#define UVD0_BASE__INST1_SEG2 0 +#define UVD0_BASE__INST1_SEG3 0 +#define UVD0_BASE__INST1_SEG4 0 + +#define UVD0_BASE__INST2_SEG0 0 +#define UVD0_BASE__INST2_SEG1 0 +#define UVD0_BASE__INST2_SEG2 0 +#define UVD0_BASE__INST2_SEG3 0 +#define UVD0_BASE__INST2_SEG4 0 + +#define UVD0_BASE__INST3_SEG0 0 +#define UVD0_BASE__INST3_SEG1 0 +#define UVD0_BASE__INST3_SEG2 0 +#define UVD0_BASE__INST3_SEG3 0 +#define UVD0_BASE__INST3_SEG4 0 + +#define UVD0_BASE__INST4_SEG0 0 +#define UVD0_BASE__INST4_SEG1 0 +#define UVD0_BASE__INST4_SEG2 0 +#define UVD0_BASE__INST4_SEG3 0 +#define UVD0_BASE__INST4_SEG4 0 + +#define UVD0_BASE__INST5_SEG0 0 +#define UVD0_BASE__INST5_SEG1 0 +#define UVD0_BASE__INST5_SEG2 0 +#define UVD0_BASE__INST5_SEG3 0 +#define UVD0_BASE__INST5_SEG4 0 + +#endif + diff --git a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h index 95c656d205ed..c84bd7b2cf59 100644 --- a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h +++ b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h @@ -44,6 +44,7 @@ struct kgd_mem; enum kfd_preempt_type { KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN = 0, KFD_PREEMPT_TYPE_WAVEFRONT_RESET, + KFD_PREEMPT_TYPE_WAVEFRONT_SAVE }; struct kfd_vm_fault_info { @@ -298,6 +299,8 @@ struct kfd2kgd_calls { void (*get_cu_occupancy)(struct kgd_dev *kgd, int pasid, int *wave_cnt, int *max_waves_per_cu); + void (*program_trap_handler_settings)(struct kgd_dev *kgd, + uint32_t vmid, uint64_t tba_addr, uint64_t tma_addr); }; #endif /* KGD_KFD_INTERFACE_H_INCLUDED */ diff --git a/drivers/gpu/drm/amd/include/kgd_pp_interface.h b/drivers/gpu/drm/amd/include/kgd_pp_interface.h index e38b191c7b7c..bac15c466733 100644 --- a/drivers/gpu/drm/amd/include/kgd_pp_interface.h +++ b/drivers/gpu/drm/amd/include/kgd_pp_interface.h @@ -306,8 +306,8 @@ struct amd_pm_funcs { /* export for sysfs */ void (*set_fan_control_mode)(void *handle, u32 mode); u32 (*get_fan_control_mode)(void *handle); - int (*set_fan_speed_percent)(void *handle, u32 speed); - int (*get_fan_speed_percent)(void *handle, u32 *speed); + int (*set_fan_speed_pwm)(void *handle, u32 speed); + int (*get_fan_speed_pwm)(void *handle, u32 *speed); int (*force_clock_level)(void *handle, enum pp_clock_type type, uint32_t mask); int (*print_clock_levels)(void *handle, enum pp_clock_type type, char *buf); int (*force_performance_level)(void *handle, enum amd_dpm_forced_level level); diff --git a/drivers/gpu/drm/amd/pm/amdgpu_pm.c b/drivers/gpu/drm/amd/pm/amdgpu_pm.c index 769f58d5ae1a..2d55627b05b1 100644 --- a/drivers/gpu/drm/amd/pm/amdgpu_pm.c +++ b/drivers/gpu/drm/amd/pm/amdgpu_pm.c @@ -2094,14 +2094,19 @@ static int default_attr_update(struct amdgpu_device *adev, struct amdgpu_device_ *states = ATTR_STATE_UNSUPPORTED; } - if (asic_type == CHIP_ARCTURUS) { - /* Arcturus does not support standalone mclk/socclk/fclk level setting */ + switch (asic_type) { + case CHIP_ARCTURUS: + case CHIP_ALDEBARAN: + /* the Mi series card does not support standalone mclk/socclk/fclk level setting */ if (DEVICE_ATTR_IS(pp_dpm_mclk) || DEVICE_ATTR_IS(pp_dpm_socclk) || DEVICE_ATTR_IS(pp_dpm_fclk)) { dev_attr->attr.mode &= ~S_IWUGO; dev_attr->store = NULL; } + break; + default: + break; } if (DEVICE_ATTR_IS(pp_dpm_dcefclk)) { @@ -2379,7 +2384,7 @@ static ssize_t amdgpu_hwmon_get_pwm1_enable(struct device *dev, pm_runtime_mark_last_busy(adev_to_drm(adev)->dev); pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); - return sprintf(buf, "%u\n", pwm_mode); + return sysfs_emit(buf, "%u\n", pwm_mode); } static ssize_t amdgpu_hwmon_set_pwm1_enable(struct device *dev, @@ -2424,14 +2429,14 @@ static ssize_t amdgpu_hwmon_get_pwm1_min(struct device *dev, struct device_attribute *attr, char *buf) { - return sprintf(buf, "%i\n", 0); + return sysfs_emit(buf, "%i\n", 0); } static ssize_t amdgpu_hwmon_get_pwm1_max(struct device *dev, struct device_attribute *attr, char *buf) { - return sprintf(buf, "%i\n", 255); + return sysfs_emit(buf, "%i\n", 255); } static ssize_t amdgpu_hwmon_set_pwm1(struct device *dev, @@ -2469,10 +2474,8 @@ static ssize_t amdgpu_hwmon_set_pwm1(struct device *dev, return err; } - value = (value * 100) / 255; - - if (adev->powerplay.pp_funcs->set_fan_speed_percent) - err = amdgpu_dpm_set_fan_speed_percent(adev, value); + if (adev->powerplay.pp_funcs->set_fan_speed_pwm) + err = amdgpu_dpm_set_fan_speed_pwm(adev, value); else err = -EINVAL; @@ -2504,8 +2507,8 @@ static ssize_t amdgpu_hwmon_get_pwm1(struct device *dev, return err; } - if (adev->powerplay.pp_funcs->get_fan_speed_percent) - err = amdgpu_dpm_get_fan_speed_percent(adev, &speed); + if (adev->powerplay.pp_funcs->get_fan_speed_pwm) + err = amdgpu_dpm_get_fan_speed_pwm(adev, &speed); else err = -EINVAL; @@ -2515,9 +2518,7 @@ static ssize_t amdgpu_hwmon_get_pwm1(struct device *dev, if (err) return err; - speed = (speed * 255) / 100; - - return sprintf(buf, "%i\n", speed); + return sysfs_emit(buf, "%i\n", speed); } static ssize_t amdgpu_hwmon_get_fan1_input(struct device *dev, @@ -2550,7 +2551,7 @@ static ssize_t amdgpu_hwmon_get_fan1_input(struct device *dev, if (err) return err; - return sprintf(buf, "%i\n", speed); + return sysfs_emit(buf, "%i\n", speed); } static ssize_t amdgpu_hwmon_get_fan1_min(struct device *dev, @@ -2647,7 +2648,7 @@ static ssize_t amdgpu_hwmon_get_fan1_target(struct device *dev, if (err) return err; - return sprintf(buf, "%i\n", rpm); + return sysfs_emit(buf, "%i\n", rpm); } static ssize_t amdgpu_hwmon_set_fan1_target(struct device *dev, @@ -2729,7 +2730,7 @@ static ssize_t amdgpu_hwmon_get_fan1_enable(struct device *dev, pm_runtime_mark_last_busy(adev_to_drm(adev)->dev); pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); - return sprintf(buf, "%i\n", pwm_mode == AMD_FAN_CTRL_AUTO ? 0 : 1); + return sysfs_emit(buf, "%i\n", pwm_mode == AMD_FAN_CTRL_AUTO ? 0 : 1); } static ssize_t amdgpu_hwmon_set_fan1_enable(struct device *dev, @@ -2899,7 +2900,7 @@ static ssize_t amdgpu_hwmon_show_power_cap_min(struct device *dev, struct device_attribute *attr, char *buf) { - return sprintf(buf, "%i\n", 0); + return sysfs_emit(buf, "%i\n", 0); } @@ -3174,6 +3175,9 @@ static ssize_t amdgpu_hwmon_show_mclk_label(struct device *dev, * * - fan[1-\*]_enable: Enable or disable the sensors.1: Enable 0: Disable * + * NOTE: DO NOT set the fan speed via "pwm1" and "fan[1-\*]_target" interfaces at the same time. + * That will get the former one overridden. + * * hwmon interfaces for GPU clocks: * * - freq1_input: the gfx/compute clock in hertz @@ -3349,13 +3353,13 @@ static umode_t hwmon_attributes_visible(struct kobject *kobj, if (!is_support_sw_smu(adev)) { /* mask fan attributes if we have no bindings for this asic to expose */ - if ((!adev->powerplay.pp_funcs->get_fan_speed_percent && + if ((!adev->powerplay.pp_funcs->get_fan_speed_pwm && attr == &sensor_dev_attr_pwm1.dev_attr.attr) || /* can't query fan */ (!adev->powerplay.pp_funcs->get_fan_control_mode && attr == &sensor_dev_attr_pwm1_enable.dev_attr.attr)) /* can't query state */ effective_mode &= ~S_IRUGO; - if ((!adev->powerplay.pp_funcs->set_fan_speed_percent && + if ((!adev->powerplay.pp_funcs->set_fan_speed_pwm && attr == &sensor_dev_attr_pwm1.dev_attr.attr) || /* can't manage fan */ (!adev->powerplay.pp_funcs->set_fan_control_mode && attr == &sensor_dev_attr_pwm1_enable.dev_attr.attr)) /* can't manage state */ @@ -3379,8 +3383,8 @@ static umode_t hwmon_attributes_visible(struct kobject *kobj, if (!is_support_sw_smu(adev)) { /* hide max/min values if we can't both query and manage the fan */ - if ((!adev->powerplay.pp_funcs->set_fan_speed_percent && - !adev->powerplay.pp_funcs->get_fan_speed_percent) && + if ((!adev->powerplay.pp_funcs->set_fan_speed_pwm && + !adev->powerplay.pp_funcs->get_fan_speed_pwm) && (!adev->powerplay.pp_funcs->set_fan_speed_rpm && !adev->powerplay.pp_funcs->get_fan_speed_rpm) && (attr == &sensor_dev_attr_pwm1_max.dev_attr.attr || diff --git a/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h b/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h index f6e0e7d8a007..98f1b3d8c1d5 100644 --- a/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h +++ b/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h @@ -280,11 +280,11 @@ enum amdgpu_pcie_gen { #define amdgpu_dpm_get_fan_control_mode(adev) \ ((adev)->powerplay.pp_funcs->get_fan_control_mode((adev)->powerplay.pp_handle)) -#define amdgpu_dpm_set_fan_speed_percent(adev, s) \ - ((adev)->powerplay.pp_funcs->set_fan_speed_percent((adev)->powerplay.pp_handle, (s))) +#define amdgpu_dpm_set_fan_speed_pwm(adev, s) \ + ((adev)->powerplay.pp_funcs->set_fan_speed_pwm((adev)->powerplay.pp_handle, (s))) -#define amdgpu_dpm_get_fan_speed_percent(adev, s) \ - ((adev)->powerplay.pp_funcs->get_fan_speed_percent((adev)->powerplay.pp_handle, (s))) +#define amdgpu_dpm_get_fan_speed_pwm(adev, s) \ + ((adev)->powerplay.pp_funcs->get_fan_speed_pwm((adev)->powerplay.pp_handle, (s))) #define amdgpu_dpm_get_fan_speed_rpm(adev, s) \ ((adev)->powerplay.pp_funcs->get_fan_speed_rpm)((adev)->powerplay.pp_handle, (s)) @@ -450,6 +450,7 @@ struct amdgpu_pm { /* Used for I2C access to various EEPROMs on relevant ASICs */ struct i2c_adapter smu_i2c; + struct mutex smu_i2c_mutex; struct list_head pm_attr_list; }; diff --git a/drivers/gpu/drm/amd/pm/inc/amdgpu_smu.h b/drivers/gpu/drm/amd/pm/inc/amdgpu_smu.h index 3e89852e4820..715b4225f5ee 100644 --- a/drivers/gpu/drm/amd/pm/inc/amdgpu_smu.h +++ b/drivers/gpu/drm/amd/pm/inc/amdgpu_smu.h @@ -34,6 +34,8 @@ #define SMU_FW_NAME_LEN 0x24 #define SMU_DPM_USER_PROFILE_RESTORE (1 << 0) +#define SMU_CUSTOM_FAN_SPEED_RPM (1 << 1) +#define SMU_CUSTOM_FAN_SPEED_PWM (1 << 2) // Power Throttlers #define SMU_THROTTLER_PPT0_BIT 0 @@ -229,8 +231,10 @@ enum smu_memory_pool_size struct smu_user_dpm_profile { uint32_t fan_mode; uint32_t power_limit; - uint32_t fan_speed_percent; + uint32_t fan_speed_pwm; + uint32_t fan_speed_rpm; uint32_t flags; + uint32_t user_od; /* user clock state information */ uint32_t clk_mask[SMU_CLK_COUNT]; @@ -352,6 +356,7 @@ struct smu_table_context void *overdrive_table; void *boot_overdrive_table; + void *user_overdrive_table; uint32_t gpu_metrics_table_size; void *gpu_metrics_table; @@ -538,7 +543,7 @@ struct smu_context struct work_struct interrupt_work; unsigned fan_max_rpm; - unsigned manual_fan_speed_percent; + unsigned manual_fan_speed_pwm; uint32_t gfx_default_hard_min_freq; uint32_t gfx_default_soft_max_freq; @@ -624,6 +629,12 @@ struct pptable_funcs { long *input, uint32_t size); /** + * @restore_user_od_settings: Restore the user customized + * OD settings on S3/S4/Runpm resume. + */ + int (*restore_user_od_settings)(struct smu_context *smu); + + /** * @get_clock_by_type_with_latency: Get the speed and latency of a clock * domain. */ @@ -714,9 +725,14 @@ struct pptable_funcs { bool (*is_dpm_running)(struct smu_context *smu); /** - * @get_fan_speed_percent: Get the current fan speed in percent. + * @get_fan_speed_pwm: Get the current fan speed in PWM. */ - int (*get_fan_speed_percent)(struct smu_context *smu, uint32_t *speed); + int (*get_fan_speed_pwm)(struct smu_context *smu, uint32_t *speed); + + /** + * @get_fan_speed_rpm: Get the current fan speed in rpm. + */ + int (*get_fan_speed_rpm)(struct smu_context *smu, uint32_t *speed); /** * @set_watermarks_table: Configure and upload the watermarks tables to @@ -1035,9 +1051,14 @@ struct pptable_funcs { int (*set_fan_control_mode)(struct smu_context *smu, uint32_t mode); /** - * @set_fan_speed_percent: Set a static fan speed in percent. + * @set_fan_speed_pwm: Set a static fan speed in PWM. + */ + int (*set_fan_speed_pwm)(struct smu_context *smu, uint32_t speed); + + /** + * @set_fan_speed_rpm: Set a static fan speed in rpm. */ - int (*set_fan_speed_percent)(struct smu_context *smu, uint32_t speed); + int (*set_fan_speed_rpm)(struct smu_context *smu, uint32_t speed); /** * @set_xgmi_pstate: Set inter-chip global memory interconnect pstate. diff --git a/drivers/gpu/drm/amd/pm/inc/hwmgr.h b/drivers/gpu/drm/amd/pm/inc/hwmgr.h index 490371bd2520..8ed01071fe5a 100644 --- a/drivers/gpu/drm/amd/pm/inc/hwmgr.h +++ b/drivers/gpu/drm/amd/pm/inc/hwmgr.h @@ -278,9 +278,9 @@ struct pp_hwmgr_func { int (*get_fan_speed_info)(struct pp_hwmgr *hwmgr, struct phm_fan_speed_info *fan_speed_info); void (*set_fan_control_mode)(struct pp_hwmgr *hwmgr, uint32_t mode); uint32_t (*get_fan_control_mode)(struct pp_hwmgr *hwmgr); - int (*set_fan_speed_percent)(struct pp_hwmgr *hwmgr, uint32_t percent); - int (*get_fan_speed_percent)(struct pp_hwmgr *hwmgr, uint32_t *speed); - int (*set_fan_speed_rpm)(struct pp_hwmgr *hwmgr, uint32_t percent); + int (*set_fan_speed_pwm)(struct pp_hwmgr *hwmgr, uint32_t speed); + int (*get_fan_speed_pwm)(struct pp_hwmgr *hwmgr, uint32_t *speed); + int (*set_fan_speed_rpm)(struct pp_hwmgr *hwmgr, uint32_t speed); int (*get_fan_speed_rpm)(struct pp_hwmgr *hwmgr, uint32_t *speed); int (*reset_fan_speed_to_default)(struct pp_hwmgr *hwmgr); int (*uninitialize_thermal_controller)(struct pp_hwmgr *hwmgr); diff --git a/drivers/gpu/drm/amd/pm/inc/smu11_driver_if_cyan_skillfish.h b/drivers/gpu/drm/amd/pm/inc/smu11_driver_if_cyan_skillfish.h new file mode 100644 index 000000000000..8a08ecc34c69 --- /dev/null +++ b/drivers/gpu/drm/amd/pm/inc/smu11_driver_if_cyan_skillfish.h @@ -0,0 +1,95 @@ +/* + * Copyright 2021 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#ifndef __SMU11_DRIVER_IF_CYAN_SKILLFISH_H__ +#define __SMU11_DRIVER_IF_CYAN_SKILLFISH_H__ + +// *** IMPORTANT *** +// Always increment the interface version if +// any structure is changed in this file +#define MP1_DRIVER_IF_VERSION 0x8 + +#define TABLE_BIOS_IF 0 // Called by BIOS +#define TABLE_WATERMARKS 1 // Called by Driver; defined here, but not used, for backward compatible +#define TABLE_PMSTATUSLOG 3 // Called by Tools for Agm logging +#define TABLE_DPMCLOCKS 4 // Called by Driver; defined here, but not used, for backward compatible +#define TABLE_MOMENTARY_PM 5 // Called by Tools; defined here, but not used, for backward compatible +#define TABLE_COUNT 6 + +#define NUM_DSPCLK_LEVELS 8 +#define NUM_SOCCLK_DPM_LEVELS 8 +#define NUM_DCEFCLK_DPM_LEVELS 4 +#define NUM_FCLK_DPM_LEVELS 4 +#define NUM_MEMCLK_DPM_LEVELS 4 + +#define NUMBER_OF_PSTATES 8 +#define NUMBER_OF_CORES 8 + +typedef enum { + S3_TYPE_ENTRY, + S5_TYPE_ENTRY, +} Sleep_Type_e; + +typedef enum { + GFX_OFF = 0, + GFX_ON = 1, +} GFX_Mode_e; + +typedef enum { + CPU_P0 = 0, + CPU_P1, + CPU_P2, + CPU_P3, + CPU_P4, + CPU_P5, + CPU_P6, + CPU_P7 +} CPU_PState_e; + +typedef enum { + CPU_CORE0 = 0, + CPU_CORE1, + CPU_CORE2, + CPU_CORE3, + CPU_CORE4, + CPU_CORE5, + CPU_CORE6, + CPU_CORE7 +} CORE_ID_e; + +typedef enum { + DF_DPM0 = 0, + DF_DPM1, + DF_DPM2, + DF_DPM3, + DF_PState_Count +} DF_PState_e; + +typedef enum { + GFX_DPM0 = 0, + GFX_DPM1, + GFX_DPM2, + GFX_DPM3, + GFX_PState_Count +} GFX_PState_e; + +#endif diff --git a/drivers/gpu/drm/amd/pm/inc/smu11_driver_if_sienna_cichlid.h b/drivers/gpu/drm/amd/pm/inc/smu11_driver_if_sienna_cichlid.h index 61c87c39be80..63b8701fd466 100644 --- a/drivers/gpu/drm/amd/pm/inc/smu11_driver_if_sienna_cichlid.h +++ b/drivers/gpu/drm/amd/pm/inc/smu11_driver_if_sienna_cichlid.h @@ -131,7 +131,7 @@ #define FEATURE_GFX_EDC_BIT 49 #define FEATURE_GFX_PER_PART_VMIN_BIT 50 #define FEATURE_SMART_SHIFT_BIT 51 -#define FEATURE_SPARE_52_BIT 52 +#define FEATURE_APT_BIT 52 #define FEATURE_SPARE_53_BIT 53 #define FEATURE_SPARE_54_BIT 54 #define FEATURE_SPARE_55_BIT 55 @@ -211,6 +211,7 @@ typedef enum { #define THROTTLER_FIT_BIT 17 #define THROTTLER_PPM_BIT 18 #define THROTTLER_APCC_BIT 19 +#define THROTTLER_COUNT 20 // FW DState Features Control Bits // FW DState Features Control Bits @@ -1406,7 +1407,67 @@ typedef struct { } SmuMetrics_t; typedef struct { - SmuMetrics_t SmuMetrics; + uint32_t CurrClock[PPCLK_COUNT]; + + uint16_t AverageGfxclkFrequencyPreDs; + uint16_t AverageGfxclkFrequencyPostDs; + uint16_t AverageFclkFrequencyPreDs; + uint16_t AverageFclkFrequencyPostDs; + uint16_t AverageUclkFrequencyPreDs ; + uint16_t AverageUclkFrequencyPostDs ; + + + uint16_t AverageGfxActivity ; + uint16_t AverageUclkActivity ; + uint8_t CurrSocVoltageOffset ; + uint8_t CurrGfxVoltageOffset ; + uint8_t CurrMemVidOffset ; + uint8_t Padding8 ; + uint16_t AverageSocketPower ; + uint16_t TemperatureEdge ; + uint16_t TemperatureHotspot ; + uint16_t TemperatureMem ; + uint16_t TemperatureVrGfx ; + uint16_t TemperatureVrMem0 ; + uint16_t TemperatureVrMem1 ; + uint16_t TemperatureVrSoc ; + uint16_t TemperatureLiquid0 ; + uint16_t TemperatureLiquid1 ; + uint16_t TemperaturePlx ; + uint16_t Padding16 ; + uint32_t AccCnt ; + uint8_t ThrottlingPercentage[THROTTLER_COUNT]; + + + uint8_t LinkDpmLevel; + uint8_t CurrFanPwm; + uint16_t CurrFanSpeed; + + //BACO metrics, PMFW-1721 + //metrics for D3hot entry/exit and driver ARM msgs + uint8_t D3HotEntryCountPerMode[D3HOT_SEQUENCE_COUNT]; + uint8_t D3HotExitCountPerMode[D3HOT_SEQUENCE_COUNT]; + uint8_t ArmMsgReceivedCountPerMode[D3HOT_SEQUENCE_COUNT]; + + //PMFW-4362 + uint32_t EnergyAccumulator; + uint16_t AverageVclk0Frequency ; + uint16_t AverageDclk0Frequency ; + uint16_t AverageVclk1Frequency ; + uint16_t AverageDclk1Frequency ; + uint16_t VcnActivityPercentage ; //place holder, David N. to provide full sequence + uint8_t PcieRate ; + uint8_t PcieWidth ; + uint16_t AverageGfxclkFrequencyTarget; + uint16_t Padding16_2; + +} SmuMetrics_V2_t; + +typedef struct { + union { + SmuMetrics_t SmuMetrics; + SmuMetrics_V2_t SmuMetrics_V2; + }; uint32_t Spare[1]; // Padding - ignore diff --git a/drivers/gpu/drm/amd/pm/inc/smu_types.h b/drivers/gpu/drm/amd/pm/inc/smu_types.h index 1d3765b873df..6f1b1b50d527 100644 --- a/drivers/gpu/drm/amd/pm/inc/smu_types.h +++ b/drivers/gpu/drm/amd/pm/inc/smu_types.h @@ -282,6 +282,7 @@ enum smu_clk_type { __SMU_DUMMY_MAP(TDC), \ __SMU_DUMMY_MAP(THERMAL), \ __SMU_DUMMY_MAP(GFX_PER_CU_CG), \ + __SMU_DUMMY_MAP(DATA_CALCULATIONS), \ __SMU_DUMMY_MAP(RM), \ __SMU_DUMMY_MAP(DS_DCEFCLK), \ __SMU_DUMMY_MAP(ACDC), \ @@ -297,7 +298,6 @@ enum smu_clk_type { __SMU_DUMMY_MAP(DS_FCLK), \ __SMU_DUMMY_MAP(DS_MP1CLK), \ __SMU_DUMMY_MAP(DS_MP0CLK), \ - __SMU_DUMMY_MAP(XGMI), \ __SMU_DUMMY_MAP(XGMI_PER_LINK_PWR_DWN), \ __SMU_DUMMY_MAP(DPM_GFX_PACE), \ __SMU_DUMMY_MAP(MEM_VDDCI_SCALING), \ diff --git a/drivers/gpu/drm/amd/pm/inc/smu_v11_0.h b/drivers/gpu/drm/amd/pm/inc/smu_v11_0.h index f61b5c914a3d..cbdae8a2c698 100644 --- a/drivers/gpu/drm/amd/pm/inc/smu_v11_0.h +++ b/drivers/gpu/drm/amd/pm/inc/smu_v11_0.h @@ -30,11 +30,12 @@ #define SMU11_DRIVER_IF_VERSION_NV10 0x37 #define SMU11_DRIVER_IF_VERSION_NV12 0x38 #define SMU11_DRIVER_IF_VERSION_NV14 0x38 -#define SMU11_DRIVER_IF_VERSION_Sienna_Cichlid 0x3D +#define SMU11_DRIVER_IF_VERSION_Sienna_Cichlid 0x40 #define SMU11_DRIVER_IF_VERSION_Navy_Flounder 0xE #define SMU11_DRIVER_IF_VERSION_VANGOGH 0x03 #define SMU11_DRIVER_IF_VERSION_Dimgrey_Cavefish 0xF #define SMU11_DRIVER_IF_VERSION_Beige_Goby 0xD +#define SMU11_DRIVER_IF_VERSION_Cyan_Skillfish 0x8 /* MP Apertures */ #define MP0_Public 0x03800000 @@ -220,9 +221,18 @@ int smu_v11_0_set_fan_control_mode(struct smu_context *smu, uint32_t mode); -int smu_v11_0_set_fan_speed_percent(struct smu_context *smu, +int smu_v11_0_set_fan_speed_pwm(struct smu_context *smu, uint32_t speed); +int smu_v11_0_set_fan_speed_rpm(struct smu_context *smu, + uint32_t speed); + +int smu_v11_0_get_fan_speed_pwm(struct smu_context *smu, + uint32_t *speed); + +int smu_v11_0_get_fan_speed_rpm(struct smu_context *smu, + uint32_t *speed); + int smu_v11_0_set_xgmi_pstate(struct smu_context *smu, uint32_t pstate); @@ -302,5 +312,7 @@ void smu_v11_0_interrupt_work(struct smu_context *smu); int smu_v11_0_set_light_sbr(struct smu_context *smu, bool enable); +int smu_v11_0_restore_user_od_settings(struct smu_context *smu); + #endif #endif diff --git a/drivers/gpu/drm/amd/pm/inc/smu_v11_8_pmfw.h b/drivers/gpu/drm/amd/pm/inc/smu_v11_8_pmfw.h new file mode 100644 index 000000000000..bd4fcb6b9610 --- /dev/null +++ b/drivers/gpu/drm/amd/pm/inc/smu_v11_8_pmfw.h @@ -0,0 +1,152 @@ +/* + * Copyright 2021 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __SMU_V11_8_0_PMFW_H__ +#define __SMU_V11_8_0_PMFW_H__ + +#pragma pack(push, 1) + +#define ENABLE_DEBUG_FEATURES + +// Feature Control Defines +#define FEATURE_CCLK_CONTROLLER_BIT 0 +#define FEATURE_GFXCLK_EFFT_FREQ_BIT 1 +#define FEATURE_DATA_CALCULATION_BIT 2 +#define FEATURE_THERMAL_BIT 3 +#define FEATURE_PLL_POWER_DOWN_BIT 4 +#define FEATURE_FCLK_DPM_BIT 5 +#define FEATURE_GFX_DPM_BIT 6 +#define FEATURE_DS_GFXCLK_BIT 7 +#define FEATURE_DS_SOCCLK_BIT 8 +#define FEATURE_DS_LCLK_BIT 9 +#define FEATURE_CORE_CSTATES_BIT 10 +#define FEATURE_G6_SSC_BIT 11 //G6 memory UCLK and UCLK_DIV SS +#define FEATURE_RM_BIT 12 +#define FEATURE_SOC_DPM_BIT 13 +#define FEATURE_DS_SMNCLK_BIT 14 +#define FEATURE_DS_MP1CLK_BIT 15 +#define FEATURE_DS_MP0CLK_BIT 16 +#define FEATURE_MGCG_BIT 17 +#define FEATURE_DS_FUSE_SRAM_BIT 18 +#define FEATURE_GFX_CKS_BIT 19 +#define FEATURE_FP_THROTTLING_BIT 20 +#define FEATURE_PROCHOT_BIT 21 +#define FEATURE_CPUOFF_BIT 22 +#define FEATURE_UMC_THROTTLE_BIT 23 +#define FEATURE_DF_THROTTLE_BIT 24 +#define FEATURE_DS_MP3CLK_BIT 25 +#define FEATURE_DS_SHUBCLK_BIT 26 +#define FEATURE_TDC_BIT 27 //Legacy APM_BIT +#define FEATURE_UMC_CAL_SHARING_BIT 28 +#define FEATURE_DFLL_BTC_CALIBRATION_BIT 29 +#define FEATURE_EDC_BIT 30 +#define FEATURE_DLDO_BIT 31 +#define FEATURE_MEAS_DRAM_BLACKOUT_BIT 32 +#define FEATURE_CC1_BIT 33 +#define FEATURE_PPT_BIT 34 +#define FEATURE_STAPM_BIT 35 +#define FEATURE_CSTATE_BOOST_BIT 36 +#define FEATURE_SPARE_37_BIT 37 +#define FEATURE_SPARE_38_BIT 38 +#define FEATURE_SPARE_39_BIT 39 +#define FEATURE_SPARE_40_BIT 40 +#define FEATURE_SPARE_41_BIT 41 +#define FEATURE_SPARE_42_BIT 42 +#define FEATURE_SPARE_43_BIT 43 +#define FEATURE_SPARE_44_BIT 44 +#define FEATURE_SPARE_45_BIT 45 +#define FEATURE_SPARE_46_BIT 46 +#define FEATURE_SPARE_47_BIT 47 +#define FEATURE_SPARE_48_BIT 48 +#define FEATURE_SPARE_49_BIT 49 +#define FEATURE_SPARE_50_BIT 50 +#define FEATURE_SPARE_51_BIT 51 +#define FEATURE_SPARE_52_BIT 52 +#define FEATURE_SPARE_53_BIT 53 +#define FEATURE_SPARE_54_BIT 54 +#define FEATURE_SPARE_55_BIT 55 +#define FEATURE_SPARE_56_BIT 56 +#define FEATURE_SPARE_57_BIT 57 +#define FEATURE_SPARE_58_BIT 58 +#define FEATURE_SPARE_59_BIT 59 +#define FEATURE_SPARE_60_BIT 60 +#define FEATURE_SPARE_61_BIT 61 +#define FEATURE_SPARE_62_BIT 62 +#define FEATURE_SPARE_63_BIT 63 + +#define NUM_FEATURES 64 + +#define FEATURE_CCLK_CONTROLLER_MASK (1 << FEATURE_CCLK_CONTROLLER_BIT) +#define FEATURE_DATA_CALCULATION_MASK (1 << FEATURE_DATA_CALCULATION_BIT) +#define FEATURE_THERMAL_MASK (1 << FEATURE_THERMAL_BIT) +#define FEATURE_PLL_POWER_DOWN_MASK (1 << FEATURE_PLL_POWER_DOWN_BIT) +#define FEATURE_FCLK_DPM_MASK (1 << FEATURE_FCLK_DPM_BIT) +#define FEATURE_GFX_DPM_MASK (1 << FEATURE_GFX_DPM_BIT) +#define FEATURE_DS_GFXCLK_MASK (1 << FEATURE_DS_GFXCLK_BIT) +#define FEATURE_DS_SOCCLK_MASK (1 << FEATURE_DS_SOCCLK_BIT) +#define FEATURE_DS_LCLK_MASK (1 << FEATURE_DS_LCLK_BIT) +#define FEATURE_RM_MASK (1 << FEATURE_RM_BIT) +#define FEATURE_DS_SMNCLK_MASK (1 << FEATURE_DS_SMNCLK_BIT) +#define FEATURE_DS_MP1CLK_MASK (1 << FEATURE_DS_MP1CLK_BIT) +#define FEATURE_DS_MP0CLK_MASK (1 << FEATURE_DS_MP0CLK_BIT) +#define FEATURE_MGCG_MASK (1 << FEATURE_MGCG_BIT) +#define FEATURE_DS_FUSE_SRAM_MASK (1 << FEATURE_DS_FUSE_SRAM_BIT) +#define FEATURE_PROCHOT_MASK (1 << FEATURE_PROCHOT_BIT) +#define FEATURE_CPUOFF_MASK (1 << FEATURE_CPUOFF_BIT) +#define FEATURE_GFX_CKS_MASK (1 << FEATURE_GFX_CKS_BIT) +#define FEATURE_UMC_THROTTLE_MASK (1 << FEATURE_UMC_THROTTLE_BIT) +#define FEATURE_DF_THROTTLE_MASK (1 << FEATURE_DF_THROTTLE_BIT) +#define FEATURE_SOC_DPM_MASK (1 << FEATURE_SOC_DPM_BIT) + +typedef struct { + // MP1_EXT_SCRATCH0 + uint32_t SPARE1 : 4; + uint32_t SPARE2 : 4; + uint32_t SPARE3 : 4; + uint32_t CurrLevel_LCLK : 4; + uint32_t CurrLevel_MP0CLK : 4; + uint32_t CurrLevel_FCLK : 4; + uint32_t CurrLevel_SOCCLK : 4; + uint32_t CurrLevel_DCEFCLK : 4; + // MP1_EXT_SCRATCH1 + uint32_t SPARE4 : 4; + uint32_t SPARE5 : 4; + uint32_t SPARE6 : 4; + uint32_t TargLevel_LCLK : 4; + uint32_t TargLevel_MP0CLK : 4; + uint32_t TargLevel_FCLK : 4; + uint32_t TargLevel_SOCCLK : 4; + uint32_t TargLevel_DCEFCLK : 4; + // MP1_EXT_SCRATCH2 + uint32_t CurrLevel_SHUBCLK : 4; + uint32_t TargLevel_SHUBCLK : 4; + uint32_t Reserved : 24; + // MP1_EXT_SCRATCH3-4 + uint32_t Reserved2[2]; + // MP1_EXT_SCRATCH5 + uint32_t FeatureStatus[NUM_FEATURES / 32]; +} FwStatus_t; + +#pragma pack(pop) + +#endif diff --git a/drivers/gpu/drm/amd/pm/inc/smu_v11_8_ppsmc.h b/drivers/gpu/drm/amd/pm/inc/smu_v11_8_ppsmc.h new file mode 100644 index 000000000000..6e6088760b18 --- /dev/null +++ b/drivers/gpu/drm/amd/pm/inc/smu_v11_8_ppsmc.h @@ -0,0 +1,70 @@ +/* + * Copyright 2021 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef SMU_11_8_0_PPSMC_H +#define SMU_11_8_0_PPSMC_H + +// SMU Response Codes: +#define PPSMC_Result_OK 0x1 +#define PPSMC_Result_Failed 0xFF +#define PPSMC_Result_UnknownCmd 0xFE +#define PPSMC_Result_CmdRejectedPrereq 0xFD +#define PPSMC_Result_CmdRejectedBusy 0xFC + +// Message Definitions: +#define PPSMC_MSG_TestMessage 0x1 +#define PPSMC_MSG_GetSmuVersion 0x2 +#define PPSMC_MSG_GetDriverIfVersion 0x3 +#define PPSMC_MSG_SetDriverTableDramAddrHigh 0x4 +#define PPSMC_MSG_SetDriverTableDramAddrLow 0x5 +#define PPSMC_MSG_TransferTableSmu2Dram 0x6 +#define PPSMC_MSG_TransferTableDram2Smu 0x7 +#define PPSMC_MSG_Rsvd1 0xA +#define PPSMC_MSG_RequestCorePstate 0xB +#define PPSMC_MSG_QueryCorePstate 0xC +#define PPSMC_MSG_Rsvd2 0xD +#define PPSMC_MSG_RequestGfxclk 0xE +#define PPSMC_MSG_QueryGfxclk 0xF +#define PPSMC_MSG_QueryVddcrSocClock 0x11 +#define PPSMC_MSG_QueryDfPstate 0x13 +#define PPSMC_MSG_Rsvd3 0x14 +#define PPSMC_MSG_ConfigureS3PwrOffRegisterAddressHigh 0x16 +#define PPSMC_MSG_ConfigureS3PwrOffRegisterAddressLow 0x17 +#define PPSMC_MSG_RequestActiveWgp 0x18 +#define PPSMC_MSG_SetMinDeepSleepGfxclkFreq 0x19 +#define PPSMC_MSG_SetMaxDeepSleepDfllGfxDiv 0x1A +#define PPSMC_MSG_StartTelemetryReporting 0x1B +#define PPSMC_MSG_StopTelemetryReporting 0x1C +#define PPSMC_MSG_ClearTelemetryMax 0x1D +#define PPSMC_MSG_QueryActiveWgp 0x1E +#define PPSMC_MSG_SetCoreEnableMask 0x2C +#define PPSMC_MSG_InitiateGcRsmuSoftReset 0x2E +#define PPSMC_MSG_GfxCacWeightOperation 0x2F +#define PPSMC_MSG_L3CacWeightOperation 0x30 +#define PPSMC_MSG_PackCoreCacWeight 0x31 +#define PPSMC_MSG_SetDriverTableVMID 0x34 +#define PPSMC_MSG_SetSoftMinCclk 0x35 +#define PPSMC_MSG_SetSoftMaxCclk 0x36 +#define PPSMC_Message_Count 0x37 + +#endif diff --git a/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c b/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c index d2a38246a78a..321215003643 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c +++ b/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c @@ -533,7 +533,7 @@ static uint32_t pp_dpm_get_fan_control_mode(void *handle) return mode; } -static int pp_dpm_set_fan_speed_percent(void *handle, uint32_t percent) +static int pp_dpm_set_fan_speed_pwm(void *handle, uint32_t speed) { struct pp_hwmgr *hwmgr = handle; int ret = 0; @@ -541,17 +541,17 @@ static int pp_dpm_set_fan_speed_percent(void *handle, uint32_t percent) if (!hwmgr || !hwmgr->pm_en) return -EINVAL; - if (hwmgr->hwmgr_func->set_fan_speed_percent == NULL) { + if (hwmgr->hwmgr_func->set_fan_speed_pwm == NULL) { pr_info_ratelimited("%s was not implemented.\n", __func__); return 0; } mutex_lock(&hwmgr->smu_lock); - ret = hwmgr->hwmgr_func->set_fan_speed_percent(hwmgr, percent); + ret = hwmgr->hwmgr_func->set_fan_speed_pwm(hwmgr, speed); mutex_unlock(&hwmgr->smu_lock); return ret; } -static int pp_dpm_get_fan_speed_percent(void *handle, uint32_t *speed) +static int pp_dpm_get_fan_speed_pwm(void *handle, uint32_t *speed) { struct pp_hwmgr *hwmgr = handle; int ret = 0; @@ -559,13 +559,13 @@ static int pp_dpm_get_fan_speed_percent(void *handle, uint32_t *speed) if (!hwmgr || !hwmgr->pm_en) return -EINVAL; - if (hwmgr->hwmgr_func->get_fan_speed_percent == NULL) { + if (hwmgr->hwmgr_func->get_fan_speed_pwm == NULL) { pr_info_ratelimited("%s was not implemented.\n", __func__); return 0; } mutex_lock(&hwmgr->smu_lock); - ret = hwmgr->hwmgr_func->get_fan_speed_percent(hwmgr, speed); + ret = hwmgr->hwmgr_func->get_fan_speed_pwm(hwmgr, speed); mutex_unlock(&hwmgr->smu_lock); return ret; } @@ -1691,8 +1691,8 @@ static const struct amd_pm_funcs pp_dpm_funcs = { .dispatch_tasks = pp_dpm_dispatch_tasks, .set_fan_control_mode = pp_dpm_set_fan_control_mode, .get_fan_control_mode = pp_dpm_get_fan_control_mode, - .set_fan_speed_percent = pp_dpm_set_fan_speed_percent, - .get_fan_speed_percent = pp_dpm_get_fan_speed_percent, + .set_fan_speed_pwm = pp_dpm_set_fan_speed_pwm, + .get_fan_speed_pwm = pp_dpm_get_fan_speed_pwm, .get_fan_speed_rpm = pp_dpm_get_fan_speed_rpm, .set_fan_speed_rpm = pp_dpm_set_fan_speed_rpm, .get_pp_num_states = pp_dpm_get_pp_num_states, diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu10_hwmgr.c b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu10_hwmgr.c index 43c3f6e755e7..1de3ae77e03e 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu10_hwmgr.c +++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu10_hwmgr.c @@ -1036,13 +1036,13 @@ static int smu10_print_clock_levels(struct pp_hwmgr *hwmgr, else i = 1; - size += sprintf(buf + size, "0: %uMhz %s\n", + size += sysfs_emit_at(buf, size, "0: %uMhz %s\n", data->gfx_min_freq_limit/100, i == 0 ? "*" : ""); - size += sprintf(buf + size, "1: %uMhz %s\n", + size += sysfs_emit_at(buf, size, "1: %uMhz %s\n", i == 1 ? now : SMU10_UMD_PSTATE_GFXCLK, i == 1 ? "*" : ""); - size += sprintf(buf + size, "2: %uMhz %s\n", + size += sysfs_emit_at(buf, size, "2: %uMhz %s\n", data->gfx_max_freq_limit/100, i == 2 ? "*" : ""); break; @@ -1050,7 +1050,7 @@ static int smu10_print_clock_levels(struct pp_hwmgr *hwmgr, smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetFclkFrequency, &now); for (i = 0; i < mclk_table->count; i++) - size += sprintf(buf + size, "%d: %uMhz %s\n", + size += sysfs_emit_at(buf, size, "%d: %uMhz %s\n", i, mclk_table->entries[i].clk / 100, ((mclk_table->entries[i].clk / 100) @@ -1065,10 +1065,10 @@ static int smu10_print_clock_levels(struct pp_hwmgr *hwmgr, if (ret) return ret; - size = sprintf(buf, "%s:\n", "OD_SCLK"); - size += sprintf(buf + size, "0: %10uMhz\n", + size = sysfs_emit(buf, "%s:\n", "OD_SCLK"); + size += sysfs_emit_at(buf, size, "0: %10uMhz\n", (data->gfx_actual_soft_min_freq > 0) ? data->gfx_actual_soft_min_freq : min_freq); - size += sprintf(buf + size, "1: %10uMhz\n", + size += sysfs_emit_at(buf, size, "1: %10uMhz\n", (data->gfx_actual_soft_max_freq > 0) ? data->gfx_actual_soft_max_freq : max_freq); } break; @@ -1081,8 +1081,8 @@ static int smu10_print_clock_levels(struct pp_hwmgr *hwmgr, if (ret) return ret; - size = sprintf(buf, "%s:\n", "OD_RANGE"); - size += sprintf(buf + size, "SCLK: %7uMHz %10uMHz\n", + size = sysfs_emit(buf, "%s:\n", "OD_RANGE"); + size += sysfs_emit_at(buf, size, "SCLK: %7uMHz %10uMHz\n", min_freq, max_freq); } break; @@ -1456,11 +1456,11 @@ static int smu10_get_power_profile_mode(struct pp_hwmgr *hwmgr, char *buf) if (!buf) return -EINVAL; - size += sprintf(buf + size, "%s %16s %s %s %s %s\n",title[0], + size += sysfs_emit_at(buf, size, "%s %16s %s %s %s %s\n",title[0], title[1], title[2], title[3], title[4], title[5]); for (i = 0; i <= PP_SMC_POWER_PROFILE_COMPUTE; i++) - size += sprintf(buf + size, "%3d %14s%s: %14d %3d %10d %14d\n", + size += sysfs_emit_at(buf, size, "%3d %14s%s: %14d %3d %10d %14d\n", i, profile_name[i], (i == hwmgr->power_profile_mode) ? "*" : " ", profile_mode_setting[i][0], profile_mode_setting[i][1], profile_mode_setting[i][2], profile_mode_setting[i][3]); @@ -1580,7 +1580,7 @@ static int smu10_set_fine_grain_clk_vol(struct pp_hwmgr *hwmgr, } if (smu10_data->gfx_actual_soft_min_freq > smu10_data->gfx_actual_soft_max_freq) { - pr_err("The setting minimun sclk (%d) MHz is greater than the setting maximum sclk (%d) MHz\n", + pr_err("The setting minimum sclk (%d) MHz is greater than the setting maximum sclk (%d) MHz\n", smu10_data->gfx_actual_soft_min_freq, smu10_data->gfx_actual_soft_max_freq); return -EINVAL; } diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_hwmgr.c b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_hwmgr.c index 0541bfc81c1b..465ff8d2a01a 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_hwmgr.c +++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_hwmgr.c @@ -3212,7 +3212,7 @@ static int smu7_force_dpm_level(struct pp_hwmgr *hwmgr, if (!ret) { if (level == AMD_DPM_FORCED_LEVEL_PROFILE_PEAK && hwmgr->dpm_level != AMD_DPM_FORCED_LEVEL_PROFILE_PEAK) - smu7_fan_ctrl_set_fan_speed_percent(hwmgr, 100); + smu7_fan_ctrl_set_fan_speed_pwm(hwmgr, 255); else if (level != AMD_DPM_FORCED_LEVEL_PROFILE_PEAK && hwmgr->dpm_level == AMD_DPM_FORCED_LEVEL_PROFILE_PEAK) smu7_fan_ctrl_reset_fan_speed_to_default(hwmgr); } @@ -4896,8 +4896,8 @@ static int smu7_print_clock_levels(struct pp_hwmgr *hwmgr, struct smu7_odn_dpm_table *odn_table = &(data->odn_dpm_table); struct phm_odn_clock_levels *odn_sclk_table = &(odn_table->odn_core_clock_dpm_levels); struct phm_odn_clock_levels *odn_mclk_table = &(odn_table->odn_memory_clock_dpm_levels); - int i, now, size = 0; - uint32_t clock, pcie_speed; + int size = 0; + uint32_t i, now, clock, pcie_speed; switch (type) { case PP_SCLK: @@ -4911,7 +4911,7 @@ static int smu7_print_clock_levels(struct pp_hwmgr *hwmgr, now = i; for (i = 0; i < sclk_table->count; i++) - size += sprintf(buf + size, "%d: %uMhz %s\n", + size += sysfs_emit_at(buf, size, "%d: %uMhz %s\n", i, sclk_table->dpm_levels[i].value / 100, (i == now) ? "*" : ""); break; @@ -4926,7 +4926,7 @@ static int smu7_print_clock_levels(struct pp_hwmgr *hwmgr, now = i; for (i = 0; i < mclk_table->count; i++) - size += sprintf(buf + size, "%d: %uMhz %s\n", + size += sysfs_emit_at(buf, size, "%d: %uMhz %s\n", i, mclk_table->dpm_levels[i].value / 100, (i == now) ? "*" : ""); break; @@ -4940,7 +4940,7 @@ static int smu7_print_clock_levels(struct pp_hwmgr *hwmgr, now = i; for (i = 0; i < pcie_table->count; i++) - size += sprintf(buf + size, "%d: %s %s\n", i, + size += sysfs_emit_at(buf, size, "%d: %s %s\n", i, (pcie_table->dpm_levels[i].value == 0) ? "2.5GT/s, x8" : (pcie_table->dpm_levels[i].value == 1) ? "5.0GT/s, x16" : (pcie_table->dpm_levels[i].value == 2) ? "8.0GT/s, x16" : "", @@ -4948,32 +4948,32 @@ static int smu7_print_clock_levels(struct pp_hwmgr *hwmgr, break; case OD_SCLK: if (hwmgr->od_enabled) { - size = sprintf(buf, "%s:\n", "OD_SCLK"); + size = sysfs_emit(buf, "%s:\n", "OD_SCLK"); for (i = 0; i < odn_sclk_table->num_of_pl; i++) - size += sprintf(buf + size, "%d: %10uMHz %10umV\n", + size += sysfs_emit_at(buf, size, "%d: %10uMHz %10umV\n", i, odn_sclk_table->entries[i].clock/100, odn_sclk_table->entries[i].vddc); } break; case OD_MCLK: if (hwmgr->od_enabled) { - size = sprintf(buf, "%s:\n", "OD_MCLK"); + size = sysfs_emit(buf, "%s:\n", "OD_MCLK"); for (i = 0; i < odn_mclk_table->num_of_pl; i++) - size += sprintf(buf + size, "%d: %10uMHz %10umV\n", + size += sysfs_emit_at(buf, size, "%d: %10uMHz %10umV\n", i, odn_mclk_table->entries[i].clock/100, odn_mclk_table->entries[i].vddc); } break; case OD_RANGE: if (hwmgr->od_enabled) { - size = sprintf(buf, "%s:\n", "OD_RANGE"); - size += sprintf(buf + size, "SCLK: %7uMHz %10uMHz\n", + size = sysfs_emit(buf, "%s:\n", "OD_RANGE"); + size += sysfs_emit_at(buf, size, "SCLK: %7uMHz %10uMHz\n", data->golden_dpm_table.sclk_table.dpm_levels[0].value/100, hwmgr->platform_descriptor.overdriveLimit.engineClock/100); - size += sprintf(buf + size, "MCLK: %7uMHz %10uMHz\n", + size += sysfs_emit_at(buf, size, "MCLK: %7uMHz %10uMHz\n", data->golden_dpm_table.mclk_table.dpm_levels[0].value/100, hwmgr->platform_descriptor.overdriveLimit.memoryClock/100); - size += sprintf(buf + size, "VDDC: %7umV %11umV\n", + size += sysfs_emit_at(buf, size, "VDDC: %7umV %11umV\n", data->odn_dpm_table.min_vddc, data->odn_dpm_table.max_vddc); } @@ -4988,7 +4988,7 @@ static void smu7_set_fan_control_mode(struct pp_hwmgr *hwmgr, uint32_t mode) { switch (mode) { case AMD_FAN_CTRL_NONE: - smu7_fan_ctrl_set_fan_speed_percent(hwmgr, 100); + smu7_fan_ctrl_set_fan_speed_pwm(hwmgr, 255); break; case AMD_FAN_CTRL_MANUAL: if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, @@ -5503,7 +5503,7 @@ static int smu7_get_power_profile_mode(struct pp_hwmgr *hwmgr, char *buf) if (!buf) return -EINVAL; - size += sprintf(buf + size, "%s %16s %16s %16s %16s %16s %16s %16s\n", + size += sysfs_emit_at(buf, size, "%s %16s %16s %16s %16s %16s %16s %16s\n", title[0], title[1], title[2], title[3], title[4], title[5], title[6], title[7]); @@ -5511,7 +5511,7 @@ static int smu7_get_power_profile_mode(struct pp_hwmgr *hwmgr, char *buf) for (i = 0; i < len; i++) { if (i == hwmgr->power_profile_mode) { - size += sprintf(buf + size, "%3d %14s %s: %8d %16d %16d %16d %16d %16d\n", + size += sysfs_emit_at(buf, size, "%3d %14s %s: %8d %16d %16d %16d %16d %16d\n", i, profile_name[i], "*", data->current_profile_setting.sclk_up_hyst, data->current_profile_setting.sclk_down_hyst, @@ -5522,21 +5522,21 @@ static int smu7_get_power_profile_mode(struct pp_hwmgr *hwmgr, char *buf) continue; } if (smu7_profiling[i].bupdate_sclk) - size += sprintf(buf + size, "%3d %16s: %8d %16d %16d ", + size += sysfs_emit_at(buf, size, "%3d %16s: %8d %16d %16d ", i, profile_name[i], smu7_profiling[i].sclk_up_hyst, smu7_profiling[i].sclk_down_hyst, smu7_profiling[i].sclk_activity); else - size += sprintf(buf + size, "%3d %16s: %8s %16s %16s ", + size += sysfs_emit_at(buf, size, "%3d %16s: %8s %16s %16s ", i, profile_name[i], "-", "-", "-"); if (smu7_profiling[i].bupdate_mclk) - size += sprintf(buf + size, "%16d %16d %16d\n", + size += sysfs_emit_at(buf, size, "%16d %16d %16d\n", smu7_profiling[i].mclk_up_hyst, smu7_profiling[i].mclk_down_hyst, smu7_profiling[i].mclk_activity); else - size += sprintf(buf + size, "%16s %16s %16s\n", + size += sysfs_emit_at(buf, size, "%16s %16s %16s\n", "-", "-", "-"); } @@ -5692,8 +5692,8 @@ static const struct pp_hwmgr_func smu7_hwmgr_funcs = { .set_max_fan_rpm_output = smu7_set_max_fan_rpm_output, .stop_thermal_controller = smu7_thermal_stop_thermal_controller, .get_fan_speed_info = smu7_fan_ctrl_get_fan_speed_info, - .get_fan_speed_percent = smu7_fan_ctrl_get_fan_speed_percent, - .set_fan_speed_percent = smu7_fan_ctrl_set_fan_speed_percent, + .get_fan_speed_pwm = smu7_fan_ctrl_get_fan_speed_pwm, + .set_fan_speed_pwm = smu7_fan_ctrl_set_fan_speed_pwm, .reset_fan_speed_to_default = smu7_fan_ctrl_reset_fan_speed_to_default, .get_fan_speed_rpm = smu7_fan_ctrl_get_fan_speed_rpm, .set_fan_speed_rpm = smu7_fan_ctrl_set_fan_speed_rpm, diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_thermal.c b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_thermal.c index 6cfe148ed45b..a6c3610db23e 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_thermal.c +++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_thermal.c @@ -51,7 +51,7 @@ int smu7_fan_ctrl_get_fan_speed_info(struct pp_hwmgr *hwmgr, return 0; } -int smu7_fan_ctrl_get_fan_speed_percent(struct pp_hwmgr *hwmgr, +int smu7_fan_ctrl_get_fan_speed_pwm(struct pp_hwmgr *hwmgr, uint32_t *speed) { uint32_t duty100; @@ -70,12 +70,9 @@ int smu7_fan_ctrl_get_fan_speed_percent(struct pp_hwmgr *hwmgr, return -EINVAL; - tmp64 = (uint64_t)duty * 100; + tmp64 = (uint64_t)duty * 255; do_div(tmp64, duty100); - *speed = (uint32_t)tmp64; - - if (*speed > 100) - *speed = 100; + *speed = MIN((uint32_t)tmp64, 255); return 0; } @@ -199,12 +196,11 @@ int smu7_fan_ctrl_stop_smc_fan_control(struct pp_hwmgr *hwmgr) } /** - * smu7_fan_ctrl_set_fan_speed_percent - Set Fan Speed in percent. + * smu7_fan_ctrl_set_fan_speed_pwm - Set Fan Speed in PWM. * @hwmgr: the address of the powerplay hardware manager. - * @speed: is the percentage value (0% - 100%) to be set. - * Exception: Fails is the 100% setting appears to be 0. + * @speed: is the pwm value (0 - 255) to be set. */ -int smu7_fan_ctrl_set_fan_speed_percent(struct pp_hwmgr *hwmgr, +int smu7_fan_ctrl_set_fan_speed_pwm(struct pp_hwmgr *hwmgr, uint32_t speed) { uint32_t duty100; @@ -214,8 +210,7 @@ int smu7_fan_ctrl_set_fan_speed_percent(struct pp_hwmgr *hwmgr, if (hwmgr->thermal_controller.fanInfo.bNoFan) return 0; - if (speed > 100) - speed = 100; + speed = MIN(speed, 255); if (PP_CAP(PHM_PlatformCaps_MicrocodeFanControl)) smu7_fan_ctrl_stop_smc_fan_control(hwmgr); @@ -227,7 +222,7 @@ int smu7_fan_ctrl_set_fan_speed_percent(struct pp_hwmgr *hwmgr, return -EINVAL; tmp64 = (uint64_t)speed * duty100; - do_div(tmp64, 100); + do_div(tmp64, 255); duty = (uint32_t)tmp64; PHM_WRITE_VFPF_INDIRECT_FIELD(hwmgr->device, CGS_IND_REG__SMC, diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_thermal.h b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_thermal.h index 42c1ba0fad78..a386a437e1f0 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_thermal.h +++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_thermal.h @@ -41,10 +41,10 @@ extern int smu7_thermal_get_temperature(struct pp_hwmgr *hwmgr); extern int smu7_thermal_stop_thermal_controller(struct pp_hwmgr *hwmgr); extern int smu7_fan_ctrl_get_fan_speed_info(struct pp_hwmgr *hwmgr, struct phm_fan_speed_info *fan_speed_info); -extern int smu7_fan_ctrl_get_fan_speed_percent(struct pp_hwmgr *hwmgr, uint32_t *speed); +extern int smu7_fan_ctrl_get_fan_speed_pwm(struct pp_hwmgr *hwmgr, uint32_t *speed); extern int smu7_fan_ctrl_set_default_mode(struct pp_hwmgr *hwmgr); extern int smu7_fan_ctrl_set_static_mode(struct pp_hwmgr *hwmgr, uint32_t mode); -extern int smu7_fan_ctrl_set_fan_speed_percent(struct pp_hwmgr *hwmgr, uint32_t speed); +extern int smu7_fan_ctrl_set_fan_speed_pwm(struct pp_hwmgr *hwmgr, uint32_t speed); extern int smu7_fan_ctrl_reset_fan_speed_to_default(struct pp_hwmgr *hwmgr); extern int smu7_thermal_ctrl_uninitialize_thermal_controller(struct pp_hwmgr *hwmgr); extern int smu7_fan_ctrl_set_fan_speed_rpm(struct pp_hwmgr *hwmgr, uint32_t speed); diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu8_hwmgr.c b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu8_hwmgr.c index d425b02b1418..b94a77e4e714 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu8_hwmgr.c +++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu8_hwmgr.c @@ -1547,7 +1547,8 @@ static int smu8_print_clock_levels(struct pp_hwmgr *hwmgr, struct smu8_hwmgr *data = hwmgr->backend; struct phm_clock_voltage_dependency_table *sclk_table = hwmgr->dyn_state.vddc_dependency_on_sclk; - int i, now, size = 0; + uint32_t i, now; + int size = 0; switch (type) { case PP_SCLK: @@ -1558,7 +1559,7 @@ static int smu8_print_clock_levels(struct pp_hwmgr *hwmgr, CURR_SCLK_INDEX); for (i = 0; i < sclk_table->count; i++) - size += sprintf(buf + size, "%d: %uMhz %s\n", + size += sysfs_emit_at(buf, size, "%d: %uMhz %s\n", i, sclk_table->entries[i].clk / 100, (i == now) ? "*" : ""); break; @@ -1570,7 +1571,7 @@ static int smu8_print_clock_levels(struct pp_hwmgr *hwmgr, CURR_MCLK_INDEX); for (i = SMU8_NUM_NBPMEMORYCLOCK; i > 0; i--) - size += sprintf(buf + size, "%d: %uMhz %s\n", + size += sysfs_emit_at(buf, size, "%d: %uMhz %s\n", SMU8_NUM_NBPMEMORYCLOCK-i, data->sys_info.nbp_memory_clock[i-1] / 100, (SMU8_NUM_NBPMEMORYCLOCK-i == now) ? "*" : ""); break; diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega10_hwmgr.c b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega10_hwmgr.c index 02e8c6e5448d..c152a61ddd2c 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega10_hwmgr.c +++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega10_hwmgr.c @@ -4199,7 +4199,7 @@ static void vega10_set_fan_control_mode(struct pp_hwmgr *hwmgr, uint32_t mode) switch (mode) { case AMD_FAN_CTRL_NONE: - vega10_fan_ctrl_set_fan_speed_percent(hwmgr, 100); + vega10_fan_ctrl_set_fan_speed_pwm(hwmgr, 255); break; case AMD_FAN_CTRL_MANUAL: if (PP_CAP(PHM_PlatformCaps_MicrocodeFanControl)) @@ -4553,13 +4553,13 @@ static int vega10_get_ppfeature_status(struct pp_hwmgr *hwmgr, char *buf) "[EnableAllSmuFeatures] Failed to get enabled smc features!", return ret); - size += sprintf(buf + size, "Current ppfeatures: 0x%016llx\n", features_enabled); - size += sprintf(buf + size, "%-19s %-22s %s\n", + size += sysfs_emit_at(buf, size, "Current ppfeatures: 0x%016llx\n", features_enabled); + size += sysfs_emit_at(buf, size, "%-19s %-22s %s\n", output_title[0], output_title[1], output_title[2]); for (i = 0; i < GNLD_FEATURES_MAX; i++) { - size += sprintf(buf + size, "%-19s 0x%016llx %6s\n", + size += sysfs_emit_at(buf, size, "%-19s 0x%016llx %6s\n", ppfeature_name[i], 1ULL << i, (features_enabled & (1ULL << i)) ? "Y" : "N"); @@ -4650,7 +4650,7 @@ static int vega10_print_clock_levels(struct pp_hwmgr *hwmgr, else count = sclk_table->count; for (i = 0; i < count; i++) - size += sprintf(buf + size, "%d: %uMhz %s\n", + size += sysfs_emit_at(buf, size, "%d: %uMhz %s\n", i, sclk_table->dpm_levels[i].value / 100, (i == now) ? "*" : ""); break; @@ -4661,7 +4661,7 @@ static int vega10_print_clock_levels(struct pp_hwmgr *hwmgr, smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetCurrentUclkIndex, &now); for (i = 0; i < mclk_table->count; i++) - size += sprintf(buf + size, "%d: %uMhz %s\n", + size += sysfs_emit_at(buf, size, "%d: %uMhz %s\n", i, mclk_table->dpm_levels[i].value / 100, (i == now) ? "*" : ""); break; @@ -4672,7 +4672,7 @@ static int vega10_print_clock_levels(struct pp_hwmgr *hwmgr, smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetCurrentSocclkIndex, &now); for (i = 0; i < soc_table->count; i++) - size += sprintf(buf + size, "%d: %uMhz %s\n", + size += sysfs_emit_at(buf, size, "%d: %uMhz %s\n", i, soc_table->dpm_levels[i].value / 100, (i == now) ? "*" : ""); break; @@ -4684,7 +4684,7 @@ static int vega10_print_clock_levels(struct pp_hwmgr *hwmgr, PPSMC_MSG_GetClockFreqMHz, CLK_DCEFCLK, &now); for (i = 0; i < dcef_table->count; i++) - size += sprintf(buf + size, "%d: %uMhz %s\n", + size += sysfs_emit_at(buf, size, "%d: %uMhz %s\n", i, dcef_table->dpm_levels[i].value / 100, (dcef_table->dpm_levels[i].value / 100 == now) ? "*" : ""); @@ -4698,7 +4698,7 @@ static int vega10_print_clock_levels(struct pp_hwmgr *hwmgr, gen_speed = pptable->PcieGenSpeed[i]; lane_width = pptable->PcieLaneCount[i]; - size += sprintf(buf + size, "%d: %s %s %s\n", i, + size += sysfs_emit_at(buf, size, "%d: %s %s %s\n", i, (gen_speed == 0) ? "2.5GT/s," : (gen_speed == 1) ? "5.0GT/s," : (gen_speed == 2) ? "8.0GT/s," : @@ -4717,34 +4717,34 @@ static int vega10_print_clock_levels(struct pp_hwmgr *hwmgr, case OD_SCLK: if (hwmgr->od_enabled) { - size = sprintf(buf, "%s:\n", "OD_SCLK"); + size = sysfs_emit(buf, "%s:\n", "OD_SCLK"); podn_vdd_dep = &data->odn_dpm_table.vdd_dep_on_sclk; for (i = 0; i < podn_vdd_dep->count; i++) - size += sprintf(buf + size, "%d: %10uMhz %10umV\n", + size += sysfs_emit_at(buf, size, "%d: %10uMhz %10umV\n", i, podn_vdd_dep->entries[i].clk / 100, podn_vdd_dep->entries[i].vddc); } break; case OD_MCLK: if (hwmgr->od_enabled) { - size = sprintf(buf, "%s:\n", "OD_MCLK"); + size = sysfs_emit(buf, "%s:\n", "OD_MCLK"); podn_vdd_dep = &data->odn_dpm_table.vdd_dep_on_mclk; for (i = 0; i < podn_vdd_dep->count; i++) - size += sprintf(buf + size, "%d: %10uMhz %10umV\n", + size += sysfs_emit_at(buf, size, "%d: %10uMhz %10umV\n", i, podn_vdd_dep->entries[i].clk/100, podn_vdd_dep->entries[i].vddc); } break; case OD_RANGE: if (hwmgr->od_enabled) { - size = sprintf(buf, "%s:\n", "OD_RANGE"); - size += sprintf(buf + size, "SCLK: %7uMHz %10uMHz\n", + size = sysfs_emit(buf, "%s:\n", "OD_RANGE"); + size += sysfs_emit_at(buf, size, "SCLK: %7uMHz %10uMHz\n", data->golden_dpm_table.gfx_table.dpm_levels[0].value/100, hwmgr->platform_descriptor.overdriveLimit.engineClock/100); - size += sprintf(buf + size, "MCLK: %7uMHz %10uMHz\n", + size += sysfs_emit_at(buf, size, "MCLK: %7uMHz %10uMHz\n", data->golden_dpm_table.mem_table.dpm_levels[0].value/100, hwmgr->platform_descriptor.overdriveLimit.memoryClock/100); - size += sprintf(buf + size, "VDDC: %7umV %11umV\n", + size += sysfs_emit_at(buf, size, "VDDC: %7umV %11umV\n", data->odn_dpm_table.min_vddc, data->odn_dpm_table.max_vddc); } @@ -5112,15 +5112,15 @@ static int vega10_get_power_profile_mode(struct pp_hwmgr *hwmgr, char *buf) if (!buf) return -EINVAL; - size += sprintf(buf + size, "%s %16s %s %s %s %s\n",title[0], + size += sysfs_emit_at(buf, size, "%s %16s %s %s %s %s\n",title[0], title[1], title[2], title[3], title[4], title[5]); for (i = 0; i < PP_SMC_POWER_PROFILE_CUSTOM; i++) - size += sprintf(buf + size, "%3d %14s%s: %14d %3d %10d %14d\n", + size += sysfs_emit_at(buf, size, "%3d %14s%s: %14d %3d %10d %14d\n", i, profile_name[i], (i == hwmgr->power_profile_mode) ? "*" : " ", profile_mode_setting[i][0], profile_mode_setting[i][1], profile_mode_setting[i][2], profile_mode_setting[i][3]); - size += sprintf(buf + size, "%3d %14s%s: %14d %3d %10d %14d\n", i, + size += sysfs_emit_at(buf, size, "%3d %14s%s: %14d %3d %10d %14d\n", i, profile_name[i], (i == hwmgr->power_profile_mode) ? "*" : " ", data->custom_profile_mode[0], data->custom_profile_mode[1], data->custom_profile_mode[2], data->custom_profile_mode[3]); @@ -5536,8 +5536,8 @@ static const struct pp_hwmgr_func vega10_hwmgr_funcs = { .force_dpm_level = vega10_dpm_force_dpm_level, .stop_thermal_controller = vega10_thermal_stop_thermal_controller, .get_fan_speed_info = vega10_fan_ctrl_get_fan_speed_info, - .get_fan_speed_percent = vega10_fan_ctrl_get_fan_speed_percent, - .set_fan_speed_percent = vega10_fan_ctrl_set_fan_speed_percent, + .get_fan_speed_pwm = vega10_fan_ctrl_get_fan_speed_pwm, + .set_fan_speed_pwm = vega10_fan_ctrl_set_fan_speed_pwm, .reset_fan_speed_to_default = vega10_fan_ctrl_reset_fan_speed_to_default, .get_fan_speed_rpm = vega10_fan_ctrl_get_fan_speed_rpm, diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega10_thermal.c b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega10_thermal.c index 9b46b27bd30c..dad3e3741a4e 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega10_thermal.c +++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega10_thermal.c @@ -64,7 +64,7 @@ int vega10_fan_ctrl_get_fan_speed_info(struct pp_hwmgr *hwmgr, return 0; } -int vega10_fan_ctrl_get_fan_speed_percent(struct pp_hwmgr *hwmgr, +int vega10_fan_ctrl_get_fan_speed_pwm(struct pp_hwmgr *hwmgr, uint32_t *speed) { uint32_t current_rpm; @@ -78,11 +78,11 @@ int vega10_fan_ctrl_get_fan_speed_percent(struct pp_hwmgr *hwmgr, if (hwmgr->thermal_controller. advanceFanControlParameters.usMaxFanRPM != 0) - percent = current_rpm * 100 / + percent = current_rpm * 255 / hwmgr->thermal_controller. advanceFanControlParameters.usMaxFanRPM; - *speed = percent > 100 ? 100 : percent; + *speed = MIN(percent, 255); return 0; } @@ -241,12 +241,11 @@ int vega10_fan_ctrl_stop_smc_fan_control(struct pp_hwmgr *hwmgr) } /** - * vega10_fan_ctrl_set_fan_speed_percent - Set Fan Speed in percent. + * vega10_fan_ctrl_set_fan_speed_pwm - Set Fan Speed in PWM. * @hwmgr: the address of the powerplay hardware manager. - * @speed: is the percentage value (0% - 100%) to be set. - * Exception: Fails is the 100% setting appears to be 0. + * @speed: is the percentage value (0 - 255) to be set. */ -int vega10_fan_ctrl_set_fan_speed_percent(struct pp_hwmgr *hwmgr, +int vega10_fan_ctrl_set_fan_speed_pwm(struct pp_hwmgr *hwmgr, uint32_t speed) { struct amdgpu_device *adev = hwmgr->adev; @@ -257,8 +256,7 @@ int vega10_fan_ctrl_set_fan_speed_percent(struct pp_hwmgr *hwmgr, if (hwmgr->thermal_controller.fanInfo.bNoFan) return 0; - if (speed > 100) - speed = 100; + speed = MIN(speed, 255); if (PP_CAP(PHM_PlatformCaps_MicrocodeFanControl)) vega10_fan_ctrl_stop_smc_fan_control(hwmgr); @@ -270,7 +268,7 @@ int vega10_fan_ctrl_set_fan_speed_percent(struct pp_hwmgr *hwmgr, return -EINVAL; tmp64 = (uint64_t)speed * duty100; - do_div(tmp64, 100); + do_div(tmp64, 255); duty = (uint32_t)tmp64; WREG32_SOC15(THM, 0, mmCG_FDO_CTRL0, diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega10_thermal.h b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega10_thermal.h index 4a0ede7c1f07..6850a21a2991 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega10_thermal.h +++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega10_thermal.h @@ -54,12 +54,12 @@ extern int vega10_thermal_get_temperature(struct pp_hwmgr *hwmgr); extern int vega10_thermal_stop_thermal_controller(struct pp_hwmgr *hwmgr); extern int vega10_fan_ctrl_get_fan_speed_info(struct pp_hwmgr *hwmgr, struct phm_fan_speed_info *fan_speed_info); -extern int vega10_fan_ctrl_get_fan_speed_percent(struct pp_hwmgr *hwmgr, +extern int vega10_fan_ctrl_get_fan_speed_pwm(struct pp_hwmgr *hwmgr, uint32_t *speed); extern int vega10_fan_ctrl_set_default_mode(struct pp_hwmgr *hwmgr); extern int vega10_fan_ctrl_set_static_mode(struct pp_hwmgr *hwmgr, uint32_t mode); -extern int vega10_fan_ctrl_set_fan_speed_percent(struct pp_hwmgr *hwmgr, +extern int vega10_fan_ctrl_set_fan_speed_pwm(struct pp_hwmgr *hwmgr, uint32_t speed); extern int vega10_fan_ctrl_reset_fan_speed_to_default(struct pp_hwmgr *hwmgr); extern int vega10_thermal_ctrl_uninitialize_thermal_controller( diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega12_hwmgr.c b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega12_hwmgr.c index 29e0d1d4035a..8558718e15a8 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega12_hwmgr.c +++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega12_hwmgr.c @@ -2146,13 +2146,13 @@ static int vega12_get_ppfeature_status(struct pp_hwmgr *hwmgr, char *buf) "[EnableAllSmuFeatures] Failed to get enabled smc features!", return ret); - size += sprintf(buf + size, "Current ppfeatures: 0x%016llx\n", features_enabled); - size += sprintf(buf + size, "%-19s %-22s %s\n", + size += sysfs_emit_at(buf, size, "Current ppfeatures: 0x%016llx\n", features_enabled); + size += sysfs_emit_at(buf, size, "%-19s %-22s %s\n", output_title[0], output_title[1], output_title[2]); for (i = 0; i < GNLD_FEATURES_MAX; i++) { - size += sprintf(buf + size, "%-19s 0x%016llx %6s\n", + size += sysfs_emit_at(buf, size, "%-19s 0x%016llx %6s\n", ppfeature_name[i], 1ULL << i, (features_enabled & (1ULL << i)) ? "Y" : "N"); @@ -2256,7 +2256,7 @@ static int vega12_print_clock_levels(struct pp_hwmgr *hwmgr, "Attempt to get gfx clk levels Failed!", return -1); for (i = 0; i < clocks.num_levels; i++) - size += sprintf(buf + size, "%d: %uMhz %s\n", + size += sysfs_emit_at(buf, size, "%d: %uMhz %s\n", i, clocks.data[i].clocks_in_khz / 1000, (clocks.data[i].clocks_in_khz / 1000 == now / 100) ? "*" : ""); break; @@ -2272,7 +2272,7 @@ static int vega12_print_clock_levels(struct pp_hwmgr *hwmgr, "Attempt to get memory clk levels Failed!", return -1); for (i = 0; i < clocks.num_levels; i++) - size += sprintf(buf + size, "%d: %uMhz %s\n", + size += sysfs_emit_at(buf, size, "%d: %uMhz %s\n", i, clocks.data[i].clocks_in_khz / 1000, (clocks.data[i].clocks_in_khz / 1000 == now / 100) ? "*" : ""); break; @@ -2290,7 +2290,7 @@ static int vega12_print_clock_levels(struct pp_hwmgr *hwmgr, "Attempt to get soc clk levels Failed!", return -1); for (i = 0; i < clocks.num_levels; i++) - size += sprintf(buf + size, "%d: %uMhz %s\n", + size += sysfs_emit_at(buf, size, "%d: %uMhz %s\n", i, clocks.data[i].clocks_in_khz / 1000, (clocks.data[i].clocks_in_khz / 1000 == now) ? "*" : ""); break; @@ -2308,7 +2308,7 @@ static int vega12_print_clock_levels(struct pp_hwmgr *hwmgr, "Attempt to get dcef clk levels Failed!", return -1); for (i = 0; i < clocks.num_levels; i++) - size += sprintf(buf + size, "%d: %uMhz %s\n", + size += sysfs_emit_at(buf, size, "%d: %uMhz %s\n", i, clocks.data[i].clocks_in_khz / 1000, (clocks.data[i].clocks_in_khz / 1000 == now) ? "*" : ""); break; diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_hwmgr.c b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_hwmgr.c index 0791309586c5..0cf39c1244b1 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_hwmgr.c +++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_hwmgr.c @@ -2769,7 +2769,7 @@ static void vega20_set_fan_control_mode(struct pp_hwmgr *hwmgr, uint32_t mode) { switch (mode) { case AMD_FAN_CTRL_NONE: - vega20_fan_ctrl_set_fan_speed_percent(hwmgr, 100); + vega20_fan_ctrl_set_fan_speed_pwm(hwmgr, 255); break; case AMD_FAN_CTRL_MANUAL: if (PP_CAP(PHM_PlatformCaps_MicrocodeFanControl)) @@ -3243,13 +3243,13 @@ static int vega20_get_ppfeature_status(struct pp_hwmgr *hwmgr, char *buf) "[EnableAllSmuFeatures] Failed to get enabled smc features!", return ret); - size += sprintf(buf + size, "Current ppfeatures: 0x%016llx\n", features_enabled); - size += sprintf(buf + size, "%-19s %-22s %s\n", + size += sysfs_emit_at(buf, size, "Current ppfeatures: 0x%016llx\n", features_enabled); + size += sysfs_emit_at(buf, size, "%-19s %-22s %s\n", output_title[0], output_title[1], output_title[2]); for (i = 0; i < GNLD_FEATURES_MAX; i++) { - size += sprintf(buf + size, "%-19s 0x%016llx %6s\n", + size += sysfs_emit_at(buf, size, "%-19s 0x%016llx %6s\n", ppfeature_name[i], 1ULL << i, (features_enabled & (1ULL << i)) ? "Y" : "N"); @@ -3372,13 +3372,13 @@ static int vega20_print_clock_levels(struct pp_hwmgr *hwmgr, return ret); if (vega20_get_sclks(hwmgr, &clocks)) { - size += sprintf(buf + size, "0: %uMhz * (DPM disabled)\n", + size += sysfs_emit_at(buf, size, "0: %uMhz * (DPM disabled)\n", now / 100); break; } for (i = 0; i < clocks.num_levels; i++) - size += sprintf(buf + size, "%d: %uMhz %s\n", + size += sysfs_emit_at(buf, size, "%d: %uMhz %s\n", i, clocks.data[i].clocks_in_khz / 1000, (clocks.data[i].clocks_in_khz == now * 10) ? "*" : ""); break; @@ -3390,13 +3390,13 @@ static int vega20_print_clock_levels(struct pp_hwmgr *hwmgr, return ret); if (vega20_get_memclocks(hwmgr, &clocks)) { - size += sprintf(buf + size, "0: %uMhz * (DPM disabled)\n", + size += sysfs_emit_at(buf, size, "0: %uMhz * (DPM disabled)\n", now / 100); break; } for (i = 0; i < clocks.num_levels; i++) - size += sprintf(buf + size, "%d: %uMhz %s\n", + size += sysfs_emit_at(buf, size, "%d: %uMhz %s\n", i, clocks.data[i].clocks_in_khz / 1000, (clocks.data[i].clocks_in_khz == now * 10) ? "*" : ""); break; @@ -3408,13 +3408,13 @@ static int vega20_print_clock_levels(struct pp_hwmgr *hwmgr, return ret); if (vega20_get_socclocks(hwmgr, &clocks)) { - size += sprintf(buf + size, "0: %uMhz * (DPM disabled)\n", + size += sysfs_emit_at(buf, size, "0: %uMhz * (DPM disabled)\n", now / 100); break; } for (i = 0; i < clocks.num_levels; i++) - size += sprintf(buf + size, "%d: %uMhz %s\n", + size += sysfs_emit_at(buf, size, "%d: %uMhz %s\n", i, clocks.data[i].clocks_in_khz / 1000, (clocks.data[i].clocks_in_khz == now * 10) ? "*" : ""); break; @@ -3426,7 +3426,7 @@ static int vega20_print_clock_levels(struct pp_hwmgr *hwmgr, return ret); for (i = 0; i < fclk_dpm_table->count; i++) - size += sprintf(buf + size, "%d: %uMhz %s\n", + size += sysfs_emit_at(buf, size, "%d: %uMhz %s\n", i, fclk_dpm_table->dpm_levels[i].value, fclk_dpm_table->dpm_levels[i].value == (now / 100) ? "*" : ""); break; @@ -3438,13 +3438,13 @@ static int vega20_print_clock_levels(struct pp_hwmgr *hwmgr, return ret); if (vega20_get_dcefclocks(hwmgr, &clocks)) { - size += sprintf(buf + size, "0: %uMhz * (DPM disabled)\n", + size += sysfs_emit_at(buf, size, "0: %uMhz * (DPM disabled)\n", now / 100); break; } for (i = 0; i < clocks.num_levels; i++) - size += sprintf(buf + size, "%d: %uMhz %s\n", + size += sysfs_emit_at(buf, size, "%d: %uMhz %s\n", i, clocks.data[i].clocks_in_khz / 1000, (clocks.data[i].clocks_in_khz == now * 10) ? "*" : ""); break; @@ -3458,7 +3458,7 @@ static int vega20_print_clock_levels(struct pp_hwmgr *hwmgr, gen_speed = pptable->PcieGenSpeed[i]; lane_width = pptable->PcieLaneCount[i]; - size += sprintf(buf + size, "%d: %s %s %dMhz %s\n", i, + size += sysfs_emit_at(buf, size, "%d: %s %s %dMhz %s\n", i, (gen_speed == 0) ? "2.5GT/s," : (gen_speed == 1) ? "5.0GT/s," : (gen_speed == 2) ? "8.0GT/s," : @@ -3479,18 +3479,18 @@ static int vega20_print_clock_levels(struct pp_hwmgr *hwmgr, case OD_SCLK: if (od8_settings[OD8_SETTING_GFXCLK_FMIN].feature_id && od8_settings[OD8_SETTING_GFXCLK_FMAX].feature_id) { - size = sprintf(buf, "%s:\n", "OD_SCLK"); - size += sprintf(buf + size, "0: %10uMhz\n", + size = sysfs_emit(buf, "%s:\n", "OD_SCLK"); + size += sysfs_emit_at(buf, size, "0: %10uMhz\n", od_table->GfxclkFmin); - size += sprintf(buf + size, "1: %10uMhz\n", + size += sysfs_emit_at(buf, size, "1: %10uMhz\n", od_table->GfxclkFmax); } break; case OD_MCLK: if (od8_settings[OD8_SETTING_UCLK_FMAX].feature_id) { - size = sprintf(buf, "%s:\n", "OD_MCLK"); - size += sprintf(buf + size, "1: %10uMhz\n", + size = sysfs_emit(buf, "%s:\n", "OD_MCLK"); + size += sysfs_emit_at(buf, size, "1: %10uMhz\n", od_table->UclkFmax); } @@ -3503,14 +3503,14 @@ static int vega20_print_clock_levels(struct pp_hwmgr *hwmgr, od8_settings[OD8_SETTING_GFXCLK_VOLTAGE1].feature_id && od8_settings[OD8_SETTING_GFXCLK_VOLTAGE2].feature_id && od8_settings[OD8_SETTING_GFXCLK_VOLTAGE3].feature_id) { - size = sprintf(buf, "%s:\n", "OD_VDDC_CURVE"); - size += sprintf(buf + size, "0: %10uMhz %10dmV\n", + size = sysfs_emit(buf, "%s:\n", "OD_VDDC_CURVE"); + size += sysfs_emit_at(buf, size, "0: %10uMhz %10dmV\n", od_table->GfxclkFreq1, od_table->GfxclkVolt1 / VOLTAGE_SCALE); - size += sprintf(buf + size, "1: %10uMhz %10dmV\n", + size += sysfs_emit_at(buf, size, "1: %10uMhz %10dmV\n", od_table->GfxclkFreq2, od_table->GfxclkVolt2 / VOLTAGE_SCALE); - size += sprintf(buf + size, "2: %10uMhz %10dmV\n", + size += sysfs_emit_at(buf, size, "2: %10uMhz %10dmV\n", od_table->GfxclkFreq3, od_table->GfxclkVolt3 / VOLTAGE_SCALE); } @@ -3518,17 +3518,17 @@ static int vega20_print_clock_levels(struct pp_hwmgr *hwmgr, break; case OD_RANGE: - size = sprintf(buf, "%s:\n", "OD_RANGE"); + size = sysfs_emit(buf, "%s:\n", "OD_RANGE"); if (od8_settings[OD8_SETTING_GFXCLK_FMIN].feature_id && od8_settings[OD8_SETTING_GFXCLK_FMAX].feature_id) { - size += sprintf(buf + size, "SCLK: %7uMhz %10uMhz\n", + size += sysfs_emit_at(buf, size, "SCLK: %7uMhz %10uMhz\n", od8_settings[OD8_SETTING_GFXCLK_FMIN].min_value, od8_settings[OD8_SETTING_GFXCLK_FMAX].max_value); } if (od8_settings[OD8_SETTING_UCLK_FMAX].feature_id) { - size += sprintf(buf + size, "MCLK: %7uMhz %10uMhz\n", + size += sysfs_emit_at(buf, size, "MCLK: %7uMhz %10uMhz\n", od8_settings[OD8_SETTING_UCLK_FMAX].min_value, od8_settings[OD8_SETTING_UCLK_FMAX].max_value); } @@ -3539,22 +3539,22 @@ static int vega20_print_clock_levels(struct pp_hwmgr *hwmgr, od8_settings[OD8_SETTING_GFXCLK_VOLTAGE1].feature_id && od8_settings[OD8_SETTING_GFXCLK_VOLTAGE2].feature_id && od8_settings[OD8_SETTING_GFXCLK_VOLTAGE3].feature_id) { - size += sprintf(buf + size, "VDDC_CURVE_SCLK[0]: %7uMhz %10uMhz\n", + size += sysfs_emit_at(buf, size, "VDDC_CURVE_SCLK[0]: %7uMhz %10uMhz\n", od8_settings[OD8_SETTING_GFXCLK_FREQ1].min_value, od8_settings[OD8_SETTING_GFXCLK_FREQ1].max_value); - size += sprintf(buf + size, "VDDC_CURVE_VOLT[0]: %7dmV %11dmV\n", + size += sysfs_emit_at(buf, size, "VDDC_CURVE_VOLT[0]: %7dmV %11dmV\n", od8_settings[OD8_SETTING_GFXCLK_VOLTAGE1].min_value, od8_settings[OD8_SETTING_GFXCLK_VOLTAGE1].max_value); - size += sprintf(buf + size, "VDDC_CURVE_SCLK[1]: %7uMhz %10uMhz\n", + size += sysfs_emit_at(buf, size, "VDDC_CURVE_SCLK[1]: %7uMhz %10uMhz\n", od8_settings[OD8_SETTING_GFXCLK_FREQ2].min_value, od8_settings[OD8_SETTING_GFXCLK_FREQ2].max_value); - size += sprintf(buf + size, "VDDC_CURVE_VOLT[1]: %7dmV %11dmV\n", + size += sysfs_emit_at(buf, size, "VDDC_CURVE_VOLT[1]: %7dmV %11dmV\n", od8_settings[OD8_SETTING_GFXCLK_VOLTAGE2].min_value, od8_settings[OD8_SETTING_GFXCLK_VOLTAGE2].max_value); - size += sprintf(buf + size, "VDDC_CURVE_SCLK[2]: %7uMhz %10uMhz\n", + size += sysfs_emit_at(buf, size, "VDDC_CURVE_SCLK[2]: %7uMhz %10uMhz\n", od8_settings[OD8_SETTING_GFXCLK_FREQ3].min_value, od8_settings[OD8_SETTING_GFXCLK_FREQ3].max_value); - size += sprintf(buf + size, "VDDC_CURVE_VOLT[2]: %7dmV %11dmV\n", + size += sysfs_emit_at(buf, size, "VDDC_CURVE_VOLT[2]: %7dmV %11dmV\n", od8_settings[OD8_SETTING_GFXCLK_VOLTAGE3].min_value, od8_settings[OD8_SETTING_GFXCLK_VOLTAGE3].max_value); } @@ -4003,7 +4003,7 @@ static int vega20_get_power_profile_mode(struct pp_hwmgr *hwmgr, char *buf) if (!buf) return -EINVAL; - size += sprintf(buf + size, "%16s %s %s %s %s %s %s %s %s %s %s\n", + size += sysfs_emit_at(buf, size, "%16s %s %s %s %s %s %s %s %s %s %s\n", title[0], title[1], title[2], title[3], title[4], title[5], title[6], title[7], title[8], title[9], title[10]); @@ -4016,10 +4016,10 @@ static int vega20_get_power_profile_mode(struct pp_hwmgr *hwmgr, char *buf) "[GetPowerProfile] Failed to get activity monitor!", return result); - size += sprintf(buf + size, "%2d %14s%s:\n", + size += sysfs_emit_at(buf, size, "%2d %14s%s:\n", i, profile_name[i], (i == hwmgr->power_profile_mode) ? "*" : " "); - size += sprintf(buf + size, "%19s %d(%13s) %7d %7d %7d %7d %7d %7d %7d %7d %7d\n", + size += sysfs_emit_at(buf, size, "%19s %d(%13s) %7d %7d %7d %7d %7d %7d %7d %7d %7d\n", " ", 0, "GFXCLK", @@ -4033,7 +4033,7 @@ static int vega20_get_power_profile_mode(struct pp_hwmgr *hwmgr, char *buf) activity_monitor.Gfx_PD_Data_error_coeff, activity_monitor.Gfx_PD_Data_error_rate_coeff); - size += sprintf(buf + size, "%19s %d(%13s) %7d %7d %7d %7d %7d %7d %7d %7d %7d\n", + size += sysfs_emit_at(buf, size, "%19s %d(%13s) %7d %7d %7d %7d %7d %7d %7d %7d %7d\n", " ", 1, "SOCCLK", @@ -4047,7 +4047,7 @@ static int vega20_get_power_profile_mode(struct pp_hwmgr *hwmgr, char *buf) activity_monitor.Soc_PD_Data_error_coeff, activity_monitor.Soc_PD_Data_error_rate_coeff); - size += sprintf(buf + size, "%19s %d(%13s) %7d %7d %7d %7d %7d %7d %7d %7d %7d\n", + size += sysfs_emit_at(buf, size, "%19s %d(%13s) %7d %7d %7d %7d %7d %7d %7d %7d %7d\n", " ", 2, "UCLK", @@ -4061,7 +4061,7 @@ static int vega20_get_power_profile_mode(struct pp_hwmgr *hwmgr, char *buf) activity_monitor.Mem_PD_Data_error_coeff, activity_monitor.Mem_PD_Data_error_rate_coeff); - size += sprintf(buf + size, "%19s %d(%13s) %7d %7d %7d %7d %7d %7d %7d %7d %7d\n", + size += sysfs_emit_at(buf, size, "%19s %d(%13s) %7d %7d %7d %7d %7d %7d %7d %7d %7d\n", " ", 3, "FCLK", @@ -4409,8 +4409,8 @@ static const struct pp_hwmgr_func vega20_hwmgr_funcs = { .register_irq_handlers = smu9_register_irq_handlers, .disable_smc_firmware_ctf = vega20_thermal_disable_alert, /* fan control related */ - .get_fan_speed_percent = vega20_fan_ctrl_get_fan_speed_percent, - .set_fan_speed_percent = vega20_fan_ctrl_set_fan_speed_percent, + .get_fan_speed_pwm = vega20_fan_ctrl_get_fan_speed_pwm, + .set_fan_speed_pwm = vega20_fan_ctrl_set_fan_speed_pwm, .get_fan_speed_info = vega20_fan_ctrl_get_fan_speed_info, .get_fan_speed_rpm = vega20_fan_ctrl_get_fan_speed_rpm, .set_fan_speed_rpm = vega20_fan_ctrl_set_fan_speed_rpm, diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_thermal.c b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_thermal.c index 269dd7e95a44..f4f4efdbda79 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_thermal.c +++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_thermal.c @@ -114,26 +114,29 @@ static int vega20_get_current_rpm(struct pp_hwmgr *hwmgr, uint32_t *current_rpm) return 0; } -int vega20_fan_ctrl_get_fan_speed_percent(struct pp_hwmgr *hwmgr, +int vega20_fan_ctrl_get_fan_speed_pwm(struct pp_hwmgr *hwmgr, uint32_t *speed) { - struct vega20_hwmgr *data = (struct vega20_hwmgr *)(hwmgr->backend); - PPTable_t *pp_table = &(data->smc_state_table.pp_table); - uint32_t current_rpm, percent = 0; - int ret = 0; + struct amdgpu_device *adev = hwmgr->adev; + uint32_t duty100, duty; + uint64_t tmp64; - ret = vega20_get_current_rpm(hwmgr, ¤t_rpm); - if (ret) - return ret; + duty100 = REG_GET_FIELD(RREG32_SOC15(THM, 0, mmCG_FDO_CTRL1), + CG_FDO_CTRL1, FMAX_DUTY100); + duty = REG_GET_FIELD(RREG32_SOC15(THM, 0, mmCG_THERMAL_STATUS), + CG_THERMAL_STATUS, FDO_PWM_DUTY); - percent = current_rpm * 100 / pp_table->FanMaximumRpm; + if (!duty100) + return -EINVAL; - *speed = percent > 100 ? 100 : percent; + tmp64 = (uint64_t)duty * 255; + do_div(tmp64, duty100); + *speed = MIN((uint32_t)tmp64, 255); return 0; } -int vega20_fan_ctrl_set_fan_speed_percent(struct pp_hwmgr *hwmgr, +int vega20_fan_ctrl_set_fan_speed_pwm(struct pp_hwmgr *hwmgr, uint32_t speed) { struct amdgpu_device *adev = hwmgr->adev; @@ -141,8 +144,7 @@ int vega20_fan_ctrl_set_fan_speed_percent(struct pp_hwmgr *hwmgr, uint32_t duty; uint64_t tmp64; - if (speed > 100) - speed = 100; + speed = MIN(speed, 255); if (PP_CAP(PHM_PlatformCaps_MicrocodeFanControl)) vega20_fan_ctrl_stop_smc_fan_control(hwmgr); @@ -154,7 +156,7 @@ int vega20_fan_ctrl_set_fan_speed_percent(struct pp_hwmgr *hwmgr, return -EINVAL; tmp64 = (uint64_t)speed * duty100; - do_div(tmp64, 100); + do_div(tmp64, 255); duty = (uint32_t)tmp64; WREG32_SOC15(THM, 0, mmCG_FDO_CTRL0, diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_thermal.h b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_thermal.h index 2d1769bbd24e..b18d09cf761e 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_thermal.h +++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_thermal.h @@ -56,9 +56,9 @@ extern int vega20_fan_ctrl_get_fan_speed_rpm(struct pp_hwmgr *hwmgr, uint32_t *speed); extern int vega20_fan_ctrl_set_fan_speed_rpm(struct pp_hwmgr *hwmgr, uint32_t speed); -extern int vega20_fan_ctrl_get_fan_speed_percent(struct pp_hwmgr *hwmgr, +extern int vega20_fan_ctrl_get_fan_speed_pwm(struct pp_hwmgr *hwmgr, uint32_t *speed); -extern int vega20_fan_ctrl_set_fan_speed_percent(struct pp_hwmgr *hwmgr, +extern int vega20_fan_ctrl_set_fan_speed_pwm(struct pp_hwmgr *hwmgr, uint32_t speed); extern int vega20_fan_ctrl_stop_smc_fan_control(struct pp_hwmgr *hwmgr); extern int vega20_fan_ctrl_start_smc_fan_control(struct pp_hwmgr *hwmgr); diff --git a/drivers/gpu/drm/amd/pm/powerplay/si_dpm.c b/drivers/gpu/drm/amd/pm/powerplay/si_dpm.c index 15c0b8af376f..bdbbeb959c68 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/si_dpm.c +++ b/drivers/gpu/drm/amd/pm/powerplay/si_dpm.c @@ -6539,7 +6539,7 @@ static int si_fan_ctrl_stop_smc_fan_control(struct amdgpu_device *adev) } } -static int si_dpm_get_fan_speed_percent(void *handle, +static int si_dpm_get_fan_speed_pwm(void *handle, u32 *speed) { u32 duty, duty100; @@ -6555,17 +6555,14 @@ static int si_dpm_get_fan_speed_percent(void *handle, if (duty100 == 0) return -EINVAL; - tmp64 = (u64)duty * 100; + tmp64 = (u64)duty * 255; do_div(tmp64, duty100); - *speed = (u32)tmp64; - - if (*speed > 100) - *speed = 100; + *speed = MIN((u32)tmp64, 255); return 0; } -static int si_dpm_set_fan_speed_percent(void *handle, +static int si_dpm_set_fan_speed_pwm(void *handle, u32 speed) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; @@ -6580,7 +6577,7 @@ static int si_dpm_set_fan_speed_percent(void *handle, if (si_pi->fan_is_controlled_by_smc) return -EINVAL; - if (speed > 100) + if (speed > 255) return -EINVAL; duty100 = (RREG32(CG_FDO_CTRL1) & FMAX_DUTY100_MASK) >> FMAX_DUTY100_SHIFT; @@ -6589,7 +6586,7 @@ static int si_dpm_set_fan_speed_percent(void *handle, return -EINVAL; tmp64 = (u64)speed * duty100; - do_div(tmp64, 100); + do_div(tmp64, 255); duty = (u32)tmp64; tmp = RREG32(CG_FDO_CTRL0) & ~FDO_STATIC_DUTY_MASK; @@ -8059,8 +8056,8 @@ static const struct amd_pm_funcs si_dpm_funcs = { .vblank_too_short = &si_dpm_vblank_too_short, .set_fan_control_mode = &si_dpm_set_fan_control_mode, .get_fan_control_mode = &si_dpm_get_fan_control_mode, - .set_fan_speed_percent = &si_dpm_set_fan_speed_percent, - .get_fan_speed_percent = &si_dpm_get_fan_speed_percent, + .set_fan_speed_pwm = &si_dpm_set_fan_speed_pwm, + .get_fan_speed_pwm = &si_dpm_get_fan_speed_pwm, .check_state_equal = &si_check_state_equal, .get_vce_clock_state = amdgpu_get_vce_clock_state, .read_sensor = &si_dpm_read_sensor, diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c index ebe672142808..3ab1ce4d3419 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c +++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c @@ -36,6 +36,7 @@ #include "vangogh_ppt.h" #include "aldebaran_ppt.h" #include "yellow_carp_ppt.h" +#include "cyan_skillfish_ppt.h" #include "amd_pcie.h" /* @@ -57,7 +58,7 @@ static int smu_handle_task(struct smu_context *smu, enum amd_pp_task task_id, bool lock_needed); static int smu_reset(struct smu_context *smu); -static int smu_set_fan_speed_percent(void *handle, u32 speed); +static int smu_set_fan_speed_pwm(void *handle, u32 speed); static int smu_set_fan_control_mode(struct smu_context *smu, int value); static int smu_set_power_limit(void *handle, uint32_t limit); static int smu_set_fan_speed_rpm(void *handle, uint32_t speed); @@ -402,17 +403,35 @@ static void smu_restore_dpm_user_profile(struct smu_context *smu) } /* set the user dpm fan configurations */ - if (smu->user_dpm_profile.fan_mode == AMD_FAN_CTRL_MANUAL) { + if (smu->user_dpm_profile.fan_mode == AMD_FAN_CTRL_MANUAL || + smu->user_dpm_profile.fan_mode == AMD_FAN_CTRL_NONE) { ret = smu_set_fan_control_mode(smu, smu->user_dpm_profile.fan_mode); if (ret) { + smu->user_dpm_profile.fan_speed_pwm = 0; + smu->user_dpm_profile.fan_speed_rpm = 0; + smu->user_dpm_profile.fan_mode = AMD_FAN_CTRL_AUTO; dev_err(smu->adev->dev, "Failed to set manual fan control mode\n"); - return; } - if (!ret && smu->user_dpm_profile.fan_speed_percent) { - ret = smu_set_fan_speed_percent(smu, smu->user_dpm_profile.fan_speed_percent); + if (smu->user_dpm_profile.fan_speed_pwm) { + ret = smu_set_fan_speed_pwm(smu, smu->user_dpm_profile.fan_speed_pwm); + if (ret) + dev_err(smu->adev->dev, "Failed to set manual fan speed in pwm\n"); + } + + if (smu->user_dpm_profile.fan_speed_rpm) { + ret = smu_set_fan_speed_rpm(smu, smu->user_dpm_profile.fan_speed_rpm); if (ret) - dev_err(smu->adev->dev, "Failed to set manual fan speed\n"); + dev_err(smu->adev->dev, "Failed to set manual fan speed in rpm\n"); + } + } + + /* Restore user customized OD settings */ + if (smu->user_dpm_profile.user_od) { + if (smu->ppt_funcs->restore_user_od_settings) { + ret = smu->ppt_funcs->restore_user_od_settings(smu); + if (ret) + dev_err(smu->adev->dev, "Failed to upload customized OD settings\n"); } } @@ -588,6 +607,9 @@ static int smu_set_funcs(struct amdgpu_device *adev) case CHIP_YELLOW_CARP: yellow_carp_set_ppt_funcs(smu); break; + case CHIP_CYAN_SKILLFISH: + cyan_skillfish_set_ppt_funcs(smu); + break; default: return -EINVAL; } @@ -607,6 +629,7 @@ static int smu_early_init(void *handle) mutex_init(&smu->smu_baco.mutex); smu->smu_baco.state = SMU_BACO_STATE_EXIT; smu->smu_baco.platform_support = false; + smu->user_dpm_profile.fan_mode = -1; adev->powerplay.pp_handle = smu; adev->powerplay.pp_funcs = &swsmu_pm_funcs; @@ -2166,7 +2189,6 @@ static int smu_set_gfx_cgpg(struct smu_context *smu, bool enabled) static int smu_set_fan_speed_rpm(void *handle, uint32_t speed) { struct smu_context *smu = handle; - u32 percent; int ret = 0; if (!smu->pm_enabled || !smu->adev->pm.dpm_enabled) @@ -2174,11 +2196,16 @@ static int smu_set_fan_speed_rpm(void *handle, uint32_t speed) mutex_lock(&smu->mutex); - if (smu->ppt_funcs->set_fan_speed_percent) { - percent = speed * 100 / smu->fan_max_rpm; - ret = smu->ppt_funcs->set_fan_speed_percent(smu, percent); - if (!ret && !(smu->user_dpm_profile.flags & SMU_DPM_USER_PROFILE_RESTORE)) - smu->user_dpm_profile.fan_speed_percent = percent; + if (smu->ppt_funcs->set_fan_speed_rpm) { + ret = smu->ppt_funcs->set_fan_speed_rpm(smu, speed); + if (!ret && !(smu->user_dpm_profile.flags & SMU_DPM_USER_PROFILE_RESTORE)) { + smu->user_dpm_profile.flags |= SMU_CUSTOM_FAN_SPEED_RPM; + smu->user_dpm_profile.fan_speed_rpm = speed; + + /* Override custom PWM setting as they cannot co-exist */ + smu->user_dpm_profile.flags &= ~SMU_CUSTOM_FAN_SPEED_PWM; + smu->user_dpm_profile.fan_speed_pwm = 0; + } } mutex_unlock(&smu->mutex); @@ -2538,8 +2565,11 @@ static int smu_set_fan_control_mode(struct smu_context *smu, int value) /* reset user dpm fan speed */ if (!ret && value != AMD_FAN_CTRL_MANUAL && - !(smu->user_dpm_profile.flags & SMU_DPM_USER_PROFILE_RESTORE)) - smu->user_dpm_profile.fan_speed_percent = 0; + !(smu->user_dpm_profile.flags & SMU_DPM_USER_PROFILE_RESTORE)) { + smu->user_dpm_profile.fan_speed_pwm = 0; + smu->user_dpm_profile.fan_speed_rpm = 0; + smu->user_dpm_profile.flags &= ~(SMU_CUSTOM_FAN_SPEED_RPM | SMU_CUSTOM_FAN_SPEED_PWM); + } return ret; } @@ -2552,31 +2582,25 @@ static void smu_pp_set_fan_control_mode(void *handle, u32 value) } -static int smu_get_fan_speed_percent(void *handle, u32 *speed) +static int smu_get_fan_speed_pwm(void *handle, u32 *speed) { struct smu_context *smu = handle; int ret = 0; - uint32_t percent; if (!smu->pm_enabled || !smu->adev->pm.dpm_enabled) return -EOPNOTSUPP; mutex_lock(&smu->mutex); - if (smu->ppt_funcs->get_fan_speed_percent) { - ret = smu->ppt_funcs->get_fan_speed_percent(smu, &percent); - if (!ret) { - *speed = percent > 100 ? 100 : percent; - } - } + if (smu->ppt_funcs->get_fan_speed_pwm) + ret = smu->ppt_funcs->get_fan_speed_pwm(smu, speed); mutex_unlock(&smu->mutex); - return ret; } -static int smu_set_fan_speed_percent(void *handle, u32 speed) +static int smu_set_fan_speed_pwm(void *handle, u32 speed) { struct smu_context *smu = handle; int ret = 0; @@ -2586,12 +2610,16 @@ static int smu_set_fan_speed_percent(void *handle, u32 speed) mutex_lock(&smu->mutex); - if (smu->ppt_funcs->set_fan_speed_percent) { - if (speed > 100) - speed = 100; - ret = smu->ppt_funcs->set_fan_speed_percent(smu, speed); - if (!ret && !(smu->user_dpm_profile.flags & SMU_DPM_USER_PROFILE_RESTORE)) - smu->user_dpm_profile.fan_speed_percent = speed; + if (smu->ppt_funcs->set_fan_speed_pwm) { + ret = smu->ppt_funcs->set_fan_speed_pwm(smu, speed); + if (!ret && !(smu->user_dpm_profile.flags & SMU_DPM_USER_PROFILE_RESTORE)) { + smu->user_dpm_profile.flags |= SMU_CUSTOM_FAN_SPEED_PWM; + smu->user_dpm_profile.fan_speed_pwm = speed; + + /* Override custom RPM setting as they cannot co-exist */ + smu->user_dpm_profile.flags &= ~SMU_CUSTOM_FAN_SPEED_RPM; + smu->user_dpm_profile.fan_speed_rpm = 0; + } } mutex_unlock(&smu->mutex); @@ -2603,17 +2631,14 @@ static int smu_get_fan_speed_rpm(void *handle, uint32_t *speed) { struct smu_context *smu = handle; int ret = 0; - u32 percent; if (!smu->pm_enabled || !smu->adev->pm.dpm_enabled) return -EOPNOTSUPP; mutex_lock(&smu->mutex); - if (smu->ppt_funcs->get_fan_speed_percent) { - ret = smu->ppt_funcs->get_fan_speed_percent(smu, &percent); - *speed = percent * smu->fan_max_rpm / 100; - } + if (smu->ppt_funcs->get_fan_speed_rpm) + ret = smu->ppt_funcs->get_fan_speed_rpm(smu, speed); mutex_unlock(&smu->mutex); @@ -3030,8 +3055,8 @@ static const struct amd_pm_funcs swsmu_pm_funcs = { /* export for sysfs */ .set_fan_control_mode = smu_pp_set_fan_control_mode, .get_fan_control_mode = smu_get_fan_control_mode, - .set_fan_speed_percent = smu_set_fan_speed_percent, - .get_fan_speed_percent = smu_get_fan_speed_percent, + .set_fan_speed_pwm = smu_set_fan_speed_pwm, + .get_fan_speed_pwm = smu_get_fan_speed_pwm, .force_clock_level = smu_force_ppclk_levels, .print_clock_levels = smu_print_ppclk_levels, .force_performance_level = smu_force_performance_level, diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/Makefile b/drivers/gpu/drm/amd/pm/swsmu/smu11/Makefile index 0138c982dfd3..f9b2e16f6431 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/Makefile +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/Makefile @@ -27,6 +27,7 @@ SMU11_MGR = arcturus_ppt.o \ navi10_ppt.o \ sienna_cichlid_ppt.o \ vangogh_ppt.o \ + cyan_skillfish_ppt.o \ smu_v11_0.o AMD_SWSMU_SMU11MGR = $(addprefix $(AMD_SWSMU_PATH)/smu11/,$(SMU11_MGR)) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c index 094df6f87cfc..273df66cac14 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c @@ -81,6 +81,24 @@ #define smnPCIE_ESM_CTRL 0x111003D0 +#define mmCG_FDO_CTRL0_ARCT 0x8B +#define mmCG_FDO_CTRL0_ARCT_BASE_IDX 0 + +#define mmCG_FDO_CTRL1_ARCT 0x8C +#define mmCG_FDO_CTRL1_ARCT_BASE_IDX 0 + +#define mmCG_FDO_CTRL2_ARCT 0x8D +#define mmCG_FDO_CTRL2_ARCT_BASE_IDX 0 + +#define mmCG_TACH_CTRL_ARCT 0x8E +#define mmCG_TACH_CTRL_ARCT_BASE_IDX 0 + +#define mmCG_TACH_STATUS_ARCT 0x8F +#define mmCG_TACH_STATUS_ARCT_BASE_IDX 0 + +#define mmCG_THERMAL_STATUS_ARCT 0x90 +#define mmCG_THERMAL_STATUS_ARCT_BASE_IDX 0 + static const struct cmn2asic_msg_mapping arcturus_message_map[SMU_MSG_MAX_COUNT] = { MSG_MAP(TestMessage, PPSMC_MSG_TestMessage, 0), MSG_MAP(GetSmuVersion, PPSMC_MSG_GetSmuVersion, 1), @@ -163,14 +181,14 @@ static const struct cmn2asic_mapping arcturus_feature_mask_map[SMU_FEATURE_COUNT FEA_MAP(DPM_SOCCLK), FEA_MAP(DPM_FCLK), FEA_MAP(DPM_MP0CLK), - ARCTURUS_FEA_MAP(SMU_FEATURE_XGMI_BIT, FEATURE_DPM_XGMI_BIT), + FEA_MAP(DPM_XGMI), FEA_MAP(DS_GFXCLK), FEA_MAP(DS_SOCCLK), FEA_MAP(DS_LCLK), FEA_MAP(DS_FCLK), FEA_MAP(DS_UCLK), FEA_MAP(GFX_ULV), - ARCTURUS_FEA_MAP(SMU_FEATURE_VCN_PG_BIT, FEATURE_DPM_VCN_BIT), + ARCTURUS_FEA_MAP(SMU_FEATURE_VCN_DPM_BIT, FEATURE_DPM_VCN_BIT), FEA_MAP(RSMU_SMN_CG), FEA_MAP(WAFL_CG), FEA_MAP(PPT), @@ -721,13 +739,13 @@ static int arcturus_get_current_clk_freq_by_table(struct smu_context *smu, member_type = METRICS_AVERAGE_SOCCLK; break; case PPCLK_VCLK: - if (smu_cmn_feature_is_enabled(smu, SMU_FEATURE_VCN_PG_BIT)) + if (smu_cmn_feature_is_enabled(smu, SMU_FEATURE_VCN_DPM_BIT)) member_type = METRICS_CURR_VCLK; else member_type = METRICS_AVERAGE_VCLK; break; case PPCLK_DCLK: - if (smu_cmn_feature_is_enabled(smu, SMU_FEATURE_VCN_PG_BIT)) + if (smu_cmn_feature_is_enabled(smu, SMU_FEATURE_VCN_DPM_BIT)) member_type = METRICS_CURR_DCLK; else member_type = METRICS_AVERAGE_DCLK; @@ -756,7 +774,7 @@ static int arcturus_print_clk_levels(struct smu_context *smu, uint32_t gen_speed, lane_width; if (amdgpu_ras_intr_triggered()) - return snprintf(buf, PAGE_SIZE, "unavailable\n"); + return sysfs_emit(buf, "unavailable\n"); dpm_context = smu_dpm->dpm_context; @@ -780,7 +798,7 @@ static int arcturus_print_clk_levels(struct smu_context *smu, * And it's safe to assume that is always the current clock. */ for (i = 0; i < clocks.num_levels; i++) - size += sprintf(buf + size, "%d: %uMhz %s\n", i, + size += sysfs_emit_at(buf, size, "%d: %uMhz %s\n", i, clocks.data[i].clocks_in_khz / 1000, (clocks.num_levels == 1) ? "*" : (arcturus_freqs_in_same_level( @@ -803,7 +821,7 @@ static int arcturus_print_clk_levels(struct smu_context *smu, } for (i = 0; i < clocks.num_levels; i++) - size += sprintf(buf + size, "%d: %uMhz %s\n", + size += sysfs_emit_at(buf, size, "%d: %uMhz %s\n", i, clocks.data[i].clocks_in_khz / 1000, (clocks.num_levels == 1) ? "*" : (arcturus_freqs_in_same_level( @@ -826,7 +844,7 @@ static int arcturus_print_clk_levels(struct smu_context *smu, } for (i = 0; i < clocks.num_levels; i++) - size += sprintf(buf + size, "%d: %uMhz %s\n", + size += sysfs_emit_at(buf, size, "%d: %uMhz %s\n", i, clocks.data[i].clocks_in_khz / 1000, (clocks.num_levels == 1) ? "*" : (arcturus_freqs_in_same_level( @@ -849,7 +867,7 @@ static int arcturus_print_clk_levels(struct smu_context *smu, } for (i = 0; i < single_dpm_table->count; i++) - size += sprintf(buf + size, "%d: %uMhz %s\n", + size += sysfs_emit_at(buf, size, "%d: %uMhz %s\n", i, single_dpm_table->dpm_levels[i].value, (clocks.num_levels == 1) ? "*" : (arcturus_freqs_in_same_level( @@ -872,7 +890,7 @@ static int arcturus_print_clk_levels(struct smu_context *smu, } for (i = 0; i < single_dpm_table->count; i++) - size += sprintf(buf + size, "%d: %uMhz %s\n", + size += sysfs_emit_at(buf, size, "%d: %uMhz %s\n", i, single_dpm_table->dpm_levels[i].value, (clocks.num_levels == 1) ? "*" : (arcturus_freqs_in_same_level( @@ -895,7 +913,7 @@ static int arcturus_print_clk_levels(struct smu_context *smu, } for (i = 0; i < single_dpm_table->count; i++) - size += sprintf(buf + size, "%d: %uMhz %s\n", + size += sysfs_emit_at(buf, size, "%d: %uMhz %s\n", i, single_dpm_table->dpm_levels[i].value, (clocks.num_levels == 1) ? "*" : (arcturus_freqs_in_same_level( @@ -906,7 +924,7 @@ static int arcturus_print_clk_levels(struct smu_context *smu, case SMU_PCIE: gen_speed = smu_v11_0_get_current_pcie_link_speed_level(smu); lane_width = smu_v11_0_get_current_pcie_link_width_level(smu); - size += sprintf(buf + size, "0: %s %s %dMhz *\n", + size += sysfs_emit_at(buf, size, "0: %s %s %dMhz *\n", (gen_speed == 0) ? "2.5GT/s," : (gen_speed == 1) ? "5.0GT/s," : (gen_speed == 2) ? "8.0GT/s," : @@ -1162,11 +1180,29 @@ static int arcturus_read_sensor(struct smu_context *smu, return ret; } -static int arcturus_get_fan_speed_percent(struct smu_context *smu, - uint32_t *speed) +static int arcturus_set_fan_static_mode(struct smu_context *smu, + uint32_t mode) { - int ret; - u32 rpm; + struct amdgpu_device *adev = smu->adev; + + WREG32_SOC15(THM, 0, mmCG_FDO_CTRL2_ARCT, + REG_SET_FIELD(RREG32_SOC15(THM, 0, mmCG_FDO_CTRL2_ARCT), + CG_FDO_CTRL2, TMIN, 0)); + WREG32_SOC15(THM, 0, mmCG_FDO_CTRL2_ARCT, + REG_SET_FIELD(RREG32_SOC15(THM, 0, mmCG_FDO_CTRL2_ARCT), + CG_FDO_CTRL2, FDO_PWM_MODE, mode)); + + return 0; +} + +static int arcturus_get_fan_speed_rpm(struct smu_context *smu, + uint32_t *speed) +{ + struct amdgpu_device *adev = smu->adev; + uint32_t crystal_clock_freq = 2500; + uint32_t tach_status; + uint64_t tmp64; + int ret = 0; if (!speed) return -EINVAL; @@ -1175,14 +1211,112 @@ static int arcturus_get_fan_speed_percent(struct smu_context *smu, case AMD_FAN_CTRL_AUTO: ret = arcturus_get_smu_metrics_data(smu, METRICS_CURR_FANSPEED, - &rpm); - if (!ret && smu->fan_max_rpm) - *speed = rpm * 100 / smu->fan_max_rpm; - return ret; + speed); + break; default: - *speed = smu->user_dpm_profile.fan_speed_percent; + /* + * For pre Sienna Cichlid ASICs, the 0 RPM may be not correctly + * detected via register retrieving. To workaround this, we will + * report the fan speed as 0 RPM if user just requested such. + */ + if ((smu->user_dpm_profile.flags & SMU_CUSTOM_FAN_SPEED_RPM) + && !smu->user_dpm_profile.fan_speed_rpm) { + *speed = 0; + return 0; + } + + tmp64 = (uint64_t)crystal_clock_freq * 60 * 10000; + tach_status = RREG32_SOC15(THM, 0, mmCG_TACH_STATUS_ARCT); + if (tach_status) { + do_div(tmp64, tach_status); + *speed = (uint32_t)tmp64; + } else { + *speed = 0; + } + + break; + } + + return ret; +} + +static int arcturus_set_fan_speed_pwm(struct smu_context *smu, + uint32_t speed) +{ + struct amdgpu_device *adev = smu->adev; + uint32_t duty100, duty; + uint64_t tmp64; + + speed = MIN(speed, 255); + + duty100 = REG_GET_FIELD(RREG32_SOC15(THM, 0, mmCG_FDO_CTRL1_ARCT), + CG_FDO_CTRL1, FMAX_DUTY100); + if (!duty100) + return -EINVAL; + + tmp64 = (uint64_t)speed * duty100; + do_div(tmp64, 255); + duty = (uint32_t)tmp64; + + WREG32_SOC15(THM, 0, mmCG_FDO_CTRL0_ARCT, + REG_SET_FIELD(RREG32_SOC15(THM, 0, mmCG_FDO_CTRL0_ARCT), + CG_FDO_CTRL0, FDO_STATIC_DUTY, duty)); + + return arcturus_set_fan_static_mode(smu, FDO_PWM_MODE_STATIC); +} + +static int arcturus_set_fan_speed_rpm(struct smu_context *smu, + uint32_t speed) +{ + struct amdgpu_device *adev = smu->adev; + /* + * crystal_clock_freq used for fan speed rpm calculation is + * always 25Mhz. So, hardcode it as 2500(in 10K unit). + */ + uint32_t crystal_clock_freq = 2500; + uint32_t tach_period; + + tach_period = 60 * crystal_clock_freq * 10000 / (8 * speed); + WREG32_SOC15(THM, 0, mmCG_TACH_CTRL_ARCT, + REG_SET_FIELD(RREG32_SOC15(THM, 0, mmCG_TACH_CTRL_ARCT), + CG_TACH_CTRL, TARGET_PERIOD, + tach_period)); + + return arcturus_set_fan_static_mode(smu, FDO_PWM_MODE_STATIC_RPM); +} + +static int arcturus_get_fan_speed_pwm(struct smu_context *smu, + uint32_t *speed) +{ + struct amdgpu_device *adev = smu->adev; + uint32_t duty100, duty; + uint64_t tmp64; + + /* + * For pre Sienna Cichlid ASICs, the 0 RPM may be not correctly + * detected via register retrieving. To workaround this, we will + * report the fan speed as 0 PWM if user just requested such. + */ + if ((smu->user_dpm_profile.flags & SMU_CUSTOM_FAN_SPEED_PWM) + && !smu->user_dpm_profile.fan_speed_pwm) { + *speed = 0; return 0; } + + duty100 = REG_GET_FIELD(RREG32_SOC15(THM, 0, mmCG_FDO_CTRL1_ARCT), + CG_FDO_CTRL1, FMAX_DUTY100); + duty = REG_GET_FIELD(RREG32_SOC15(THM, 0, mmCG_THERMAL_STATUS_ARCT), + CG_THERMAL_STATUS, FDO_PWM_DUTY); + + if (duty100) { + tmp64 = (uint64_t)duty * 255; + do_div(tmp64, duty100); + *speed = MIN((uint32_t)tmp64, 255); + } else { + *speed = 0; + } + + return 0; } static int arcturus_get_fan_parameters(struct smu_context *smu) @@ -1272,11 +1406,11 @@ static int arcturus_get_power_profile_mode(struct smu_context *smu, return result; if (smu_version >= 0x360d00) - size += sprintf(buf + size, "%16s %s %s %s %s %s %s %s %s %s %s\n", + size += sysfs_emit_at(buf, size, "%16s %s %s %s %s %s %s %s %s %s %s\n", title[0], title[1], title[2], title[3], title[4], title[5], title[6], title[7], title[8], title[9], title[10]); else - size += sprintf(buf + size, "%16s\n", + size += sysfs_emit_at(buf, size, "%16s\n", title[0]); for (i = 0; i <= PP_SMC_POWER_PROFILE_CUSTOM; i++) { @@ -1302,11 +1436,11 @@ static int arcturus_get_power_profile_mode(struct smu_context *smu, } } - size += sprintf(buf + size, "%2d %14s%s\n", + size += sysfs_emit_at(buf, size, "%2d %14s%s\n", i, profile_name[i], (i == smu->power_profile_mode) ? "*" : " "); if (smu_version >= 0x360d00) { - size += sprintf(buf + size, "%19s %d(%13s) %7d %7d %7d %7d %7d %7d %7d %7d %7d\n", + size += sysfs_emit_at(buf, size, "%19s %d(%13s) %7d %7d %7d %7d %7d %7d %7d %7d %7d\n", " ", 0, "GFXCLK", @@ -1320,7 +1454,7 @@ static int arcturus_get_power_profile_mode(struct smu_context *smu, activity_monitor.Gfx_PD_Data_error_coeff, activity_monitor.Gfx_PD_Data_error_rate_coeff); - size += sprintf(buf + size, "%19s %d(%13s) %7d %7d %7d %7d %7d %7d %7d %7d %7d\n", + size += sysfs_emit_at(buf, size, "%19s %d(%13s) %7d %7d %7d %7d %7d %7d %7d %7d %7d\n", " ", 1, "UCLK", @@ -1916,16 +2050,16 @@ static int arcturus_dpm_set_vcn_enable(struct smu_context *smu, bool enable) int ret = 0; if (enable) { - if (!smu_cmn_feature_is_enabled(smu, SMU_FEATURE_VCN_PG_BIT)) { - ret = smu_cmn_feature_set_enabled(smu, SMU_FEATURE_VCN_PG_BIT, 1); + if (!smu_cmn_feature_is_enabled(smu, SMU_FEATURE_VCN_DPM_BIT)) { + ret = smu_cmn_feature_set_enabled(smu, SMU_FEATURE_VCN_DPM_BIT, 1); if (ret) { dev_err(smu->adev->dev, "[EnableVCNDPM] failed!\n"); return ret; } } } else { - if (smu_cmn_feature_is_enabled(smu, SMU_FEATURE_VCN_PG_BIT)) { - ret = smu_cmn_feature_set_enabled(smu, SMU_FEATURE_VCN_PG_BIT, 0); + if (smu_cmn_feature_is_enabled(smu, SMU_FEATURE_VCN_DPM_BIT)) { + ret = smu_cmn_feature_set_enabled(smu, SMU_FEATURE_VCN_DPM_BIT, 0); if (ret) { dev_err(smu->adev->dev, "[DisableVCNDPM] failed!\n"); return ret; @@ -1936,197 +2070,77 @@ static int arcturus_dpm_set_vcn_enable(struct smu_context *smu, bool enable) return ret; } -static void arcturus_fill_i2c_req(SwI2cRequest_t *req, bool write, - uint8_t address, uint32_t numbytes, - uint8_t *data) -{ - int i; - - req->I2CcontrollerPort = 0; - req->I2CSpeed = 2; - req->SlaveAddress = address; - req->NumCmds = numbytes; - - for (i = 0; i < numbytes; i++) { - SwI2cCmd_t *cmd = &req->SwI2cCmds[i]; - - /* First 2 bytes are always write for lower 2b EEPROM address */ - if (i < 2) - cmd->Cmd = 1; - else - cmd->Cmd = write; - - - /* Add RESTART for read after address filled */ - cmd->CmdConfig |= (i == 2 && !write) ? CMDCONFIG_RESTART_MASK : 0; - - /* Add STOP in the end */ - cmd->CmdConfig |= (i == (numbytes - 1)) ? CMDCONFIG_STOP_MASK : 0; - - /* Fill with data regardless if read or write to simplify code */ - cmd->RegisterAddr = data[i]; - } -} - -static int arcturus_i2c_read_data(struct i2c_adapter *control, - uint8_t address, - uint8_t *data, - uint32_t numbytes) +static int arcturus_i2c_xfer(struct i2c_adapter *i2c_adap, + struct i2c_msg *msg, int num_msgs) { - uint32_t i, ret = 0; - SwI2cRequest_t req; - struct amdgpu_device *adev = to_amdgpu_device(control); + struct amdgpu_device *adev = to_amdgpu_device(i2c_adap); struct smu_table_context *smu_table = &adev->smu.smu_table; struct smu_table *table = &smu_table->driver_table; + SwI2cRequest_t *req, *res = (SwI2cRequest_t *)table->cpu_addr; + int i, j, r, c; + u16 dir; - if (numbytes > MAX_SW_I2C_COMMANDS) { - dev_err(adev->dev, "numbytes requested %d is over max allowed %d\n", - numbytes, MAX_SW_I2C_COMMANDS); - return -EINVAL; - } - - memset(&req, 0, sizeof(req)); - arcturus_fill_i2c_req(&req, false, address, numbytes, data); - - mutex_lock(&adev->smu.mutex); - /* Now read data starting with that address */ - ret = smu_cmn_update_table(&adev->smu, SMU_TABLE_I2C_COMMANDS, 0, &req, - true); - mutex_unlock(&adev->smu.mutex); - - if (!ret) { - SwI2cRequest_t *res = (SwI2cRequest_t *)table->cpu_addr; - - /* Assume SMU fills res.SwI2cCmds[i].Data with read bytes */ - for (i = 0; i < numbytes; i++) - data[i] = res->SwI2cCmds[i].Data; - - dev_dbg(adev->dev, "arcturus_i2c_read_data, address = %x, bytes = %d, data :", - (uint16_t)address, numbytes); - - print_hex_dump(KERN_DEBUG, "data: ", DUMP_PREFIX_NONE, - 8, 1, data, numbytes, false); - } else - dev_err(adev->dev, "arcturus_i2c_read_data - error occurred :%x", ret); + req = kzalloc(sizeof(*req), GFP_KERNEL); + if (!req) + return -ENOMEM; - return ret; -} + req->I2CcontrollerPort = 0; + req->I2CSpeed = I2C_SPEED_FAST_400K; + req->SlaveAddress = msg[0].addr << 1; /* wants an 8-bit address */ + dir = msg[0].flags & I2C_M_RD; + + for (c = i = 0; i < num_msgs; i++) { + for (j = 0; j < msg[i].len; j++, c++) { + SwI2cCmd_t *cmd = &req->SwI2cCmds[c]; + + if (!(msg[i].flags & I2C_M_RD)) { + /* write */ + cmd->Cmd = I2C_CMD_WRITE; + cmd->RegisterAddr = msg[i].buf[j]; + } -static int arcturus_i2c_write_data(struct i2c_adapter *control, - uint8_t address, - uint8_t *data, - uint32_t numbytes) -{ - uint32_t ret; - SwI2cRequest_t req; - struct amdgpu_device *adev = to_amdgpu_device(control); + if ((dir ^ msg[i].flags) & I2C_M_RD) { + /* The direction changes. + */ + dir = msg[i].flags & I2C_M_RD; + cmd->CmdConfig |= CMDCONFIG_RESTART_MASK; + } - if (numbytes > MAX_SW_I2C_COMMANDS) { - dev_err(adev->dev, "numbytes requested %d is over max allowed %d\n", - numbytes, MAX_SW_I2C_COMMANDS); - return -EINVAL; + req->NumCmds++; + + /* + * Insert STOP if we are at the last byte of either last + * message for the transaction or the client explicitly + * requires a STOP at this particular message. + */ + if ((j == msg[i].len - 1) && + ((i == num_msgs - 1) || (msg[i].flags & I2C_M_STOP))) { + cmd->CmdConfig &= ~CMDCONFIG_RESTART_MASK; + cmd->CmdConfig |= CMDCONFIG_STOP_MASK; + } + } } - - memset(&req, 0, sizeof(req)); - arcturus_fill_i2c_req(&req, true, address, numbytes, data); - mutex_lock(&adev->smu.mutex); - ret = smu_cmn_update_table(&adev->smu, SMU_TABLE_I2C_COMMANDS, 0, &req, true); + r = smu_cmn_update_table(&adev->smu, SMU_TABLE_I2C_COMMANDS, 0, req, true); mutex_unlock(&adev->smu.mutex); + if (r) + goto fail; - if (!ret) { - dev_dbg(adev->dev, "arcturus_i2c_write(), address = %x, bytes = %d , data: ", - (uint16_t)address, numbytes); - - print_hex_dump(KERN_DEBUG, "data: ", DUMP_PREFIX_NONE, - 8, 1, data, numbytes, false); - /* - * According to EEPROM spec there is a MAX of 10 ms required for - * EEPROM to flush internal RX buffer after STOP was issued at the - * end of write transaction. During this time the EEPROM will not be - * responsive to any more commands - so wait a bit more. - */ - msleep(10); - - } else - dev_err(adev->dev, "arcturus_i2c_write- error occurred :%x", ret); - - return ret; -} - -static int arcturus_i2c_xfer(struct i2c_adapter *i2c_adap, - struct i2c_msg *msgs, int num) -{ - uint32_t i, j, ret, data_size, data_chunk_size, next_eeprom_addr = 0; - uint8_t *data_ptr, data_chunk[MAX_SW_I2C_COMMANDS] = { 0 }; - - for (i = 0; i < num; i++) { - /* - * SMU interface allows at most MAX_SW_I2C_COMMANDS bytes of data at - * once and hence the data needs to be spliced into chunks and sent each - * chunk separately - */ - data_size = msgs[i].len - 2; - data_chunk_size = MAX_SW_I2C_COMMANDS - 2; - next_eeprom_addr = (msgs[i].buf[0] << 8 & 0xff00) | (msgs[i].buf[1] & 0xff); - data_ptr = msgs[i].buf + 2; - - for (j = 0; j < data_size / data_chunk_size; j++) { - /* Insert the EEPROM dest addess, bits 0-15 */ - data_chunk[0] = ((next_eeprom_addr >> 8) & 0xff); - data_chunk[1] = (next_eeprom_addr & 0xff); - - if (msgs[i].flags & I2C_M_RD) { - ret = arcturus_i2c_read_data(i2c_adap, - (uint8_t)msgs[i].addr, - data_chunk, MAX_SW_I2C_COMMANDS); - - memcpy(data_ptr, data_chunk + 2, data_chunk_size); - } else { - - memcpy(data_chunk + 2, data_ptr, data_chunk_size); - - ret = arcturus_i2c_write_data(i2c_adap, - (uint8_t)msgs[i].addr, - data_chunk, MAX_SW_I2C_COMMANDS); - } - - if (ret) { - num = -EIO; - goto fail; - } - - next_eeprom_addr += data_chunk_size; - data_ptr += data_chunk_size; + for (c = i = 0; i < num_msgs; i++) { + if (!(msg[i].flags & I2C_M_RD)) { + c += msg[i].len; + continue; } + for (j = 0; j < msg[i].len; j++, c++) { + SwI2cCmd_t *cmd = &res->SwI2cCmds[c]; - if (data_size % data_chunk_size) { - data_chunk[0] = ((next_eeprom_addr >> 8) & 0xff); - data_chunk[1] = (next_eeprom_addr & 0xff); - - if (msgs[i].flags & I2C_M_RD) { - ret = arcturus_i2c_read_data(i2c_adap, - (uint8_t)msgs[i].addr, - data_chunk, (data_size % data_chunk_size) + 2); - - memcpy(data_ptr, data_chunk + 2, data_size % data_chunk_size); - } else { - memcpy(data_chunk + 2, data_ptr, data_size % data_chunk_size); - - ret = arcturus_i2c_write_data(i2c_adap, - (uint8_t)msgs[i].addr, - data_chunk, (data_size % data_chunk_size) + 2); - } - - if (ret) { - num = -EIO; - goto fail; - } + msg[i].buf[j] = cmd->Data; } } - + r = num_msgs; fail: - return num; + kfree(req); + return r; } static u32 arcturus_i2c_func(struct i2c_adapter *adap) @@ -2140,15 +2154,25 @@ static const struct i2c_algorithm arcturus_i2c_algo = { .functionality = arcturus_i2c_func, }; + +static const struct i2c_adapter_quirks arcturus_i2c_control_quirks = { + .flags = I2C_AQ_COMB | I2C_AQ_COMB_SAME_ADDR | I2C_AQ_NO_ZERO_LEN, + .max_read_len = MAX_SW_I2C_COMMANDS, + .max_write_len = MAX_SW_I2C_COMMANDS, + .max_comb_1st_msg_len = 2, + .max_comb_2nd_msg_len = MAX_SW_I2C_COMMANDS - 2, +}; + static int arcturus_i2c_control_init(struct smu_context *smu, struct i2c_adapter *control) { struct amdgpu_device *adev = to_amdgpu_device(control); int res; control->owner = THIS_MODULE; - control->class = I2C_CLASS_SPD; + control->class = I2C_CLASS_HWMON; control->dev.parent = &adev->pdev->dev; control->algo = &arcturus_i2c_algo; + control->quirks = &arcturus_i2c_control_quirks; snprintf(control->name, sizeof(control->name), "AMDGPU SMU"); res = i2c_add_adapter(control); @@ -2288,7 +2312,9 @@ static void arcturus_log_thermal_throttling_event(struct smu_context *smu) dev_warn(adev->dev, "WARN: GPU thermal throttling temperature reached, expect performance decrease. %s.\n", log_buf); - kgd2kfd_smi_event_throttle(smu->adev->kfd.dev, throttler_status); + kgd2kfd_smi_event_throttle(smu->adev->kfd.dev, + smu_cmn_get_indep_throttler_status(throttler_status, + arcturus_throttler_map)); } static uint16_t arcturus_get_current_pcie_link_speed(struct smu_context *smu) @@ -2378,7 +2404,8 @@ static const struct pptable_funcs arcturus_ppt_funcs = { .print_clk_levels = arcturus_print_clk_levels, .force_clk_levels = arcturus_force_clk_levels, .read_sensor = arcturus_read_sensor, - .get_fan_speed_percent = arcturus_get_fan_speed_percent, + .get_fan_speed_pwm = arcturus_get_fan_speed_pwm, + .get_fan_speed_rpm = arcturus_get_fan_speed_rpm, .get_power_profile_mode = arcturus_get_power_profile_mode, .set_power_profile_mode = arcturus_set_power_profile_mode, .set_performance_level = arcturus_set_performance_level, @@ -2423,7 +2450,8 @@ static const struct pptable_funcs arcturus_ppt_funcs = { .display_clock_voltage_request = smu_v11_0_display_clock_voltage_request, .get_fan_control_mode = smu_v11_0_get_fan_control_mode, .set_fan_control_mode = smu_v11_0_set_fan_control_mode, - .set_fan_speed_percent = smu_v11_0_set_fan_speed_percent, + .set_fan_speed_pwm = arcturus_set_fan_speed_pwm, + .set_fan_speed_rpm = arcturus_set_fan_speed_rpm, .set_xgmi_pstate = smu_v11_0_set_xgmi_pstate, .gfx_off_control = smu_v11_0_gfx_off_control, .register_irq_handler = smu_v11_0_register_irq_handler, diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/cyan_skillfish_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/cyan_skillfish_ppt.c new file mode 100644 index 000000000000..b05f9541accc --- /dev/null +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/cyan_skillfish_ppt.c @@ -0,0 +1,76 @@ +/* + * Copyright 2021 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#define SWSMU_CODE_LAYER_L2 + +#include "amdgpu.h" +#include "amdgpu_smu.h" +#include "smu_v11_0.h" +#include "smu11_driver_if_cyan_skillfish.h" +#include "cyan_skillfish_ppt.h" +#include "smu_v11_8_ppsmc.h" +#include "smu_v11_8_pmfw.h" +#include "smu_cmn.h" +#include "soc15_common.h" + +/* + * DO NOT use these for err/warn/info/debug messages. + * Use dev_err, dev_warn, dev_info and dev_dbg instead. + * They are more MGPU friendly. + */ + +#undef pr_err +#undef pr_warn +#undef pr_info +#undef pr_debug + +static struct cmn2asic_msg_mapping cyan_skillfish_message_map[SMU_MSG_MAX_COUNT] = { + MSG_MAP(TestMessage, PPSMC_MSG_TestMessage, 0), + MSG_MAP(GetSmuVersion, PPSMC_MSG_GetSmuVersion, 0), + MSG_MAP(GetDriverIfVersion, PPSMC_MSG_GetDriverIfVersion, 0), + MSG_MAP(SetDriverDramAddrHigh, PPSMC_MSG_SetDriverTableDramAddrHigh, 0), + MSG_MAP(SetDriverDramAddrLow, PPSMC_MSG_SetDriverTableDramAddrLow, 0), + MSG_MAP(TransferTableSmu2Dram, PPSMC_MSG_TransferTableSmu2Dram, 0), + MSG_MAP(TransferTableDram2Smu, PPSMC_MSG_TransferTableDram2Smu, 0), +}; + +static const struct pptable_funcs cyan_skillfish_ppt_funcs = { + + .check_fw_status = smu_v11_0_check_fw_status, + .check_fw_version = smu_v11_0_check_fw_version, + .init_power = smu_v11_0_init_power, + .fini_power = smu_v11_0_fini_power, + .register_irq_handler = smu_v11_0_register_irq_handler, + .notify_memory_pool_location = smu_v11_0_notify_memory_pool_location, + .send_smc_msg_with_param = smu_cmn_send_smc_msg_with_param, + .send_smc_msg = smu_cmn_send_smc_msg, + .set_driver_table_location = smu_v11_0_set_driver_table_location, + .interrupt_work = smu_v11_0_interrupt_work, +}; + +void cyan_skillfish_set_ppt_funcs(struct smu_context *smu) +{ + smu->ppt_funcs = &cyan_skillfish_ppt_funcs; + smu->message_map = cyan_skillfish_message_map; + smu->is_apu = true; +} diff --git a/include/drm/drm_irq.h b/drivers/gpu/drm/amd/pm/swsmu/smu11/cyan_skillfish_ppt.h index 631b22f9757d..76cd7229e383 100644 --- a/include/drm/drm_irq.h +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/cyan_skillfish_ppt.h @@ -1,5 +1,5 @@ /* - * Copyright 2016 Intel Corp. + * Copyright 2021 Advanced Micro Devices, Inc. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -8,25 +8,22 @@ * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * VA LINUX SYSTEMS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * OTHER DEALINGS IN THE SOFTWARE. + * */ -#ifndef _DRM_IRQ_H_ -#define _DRM_IRQ_H_ +#ifndef __CYAN_SKILLFISH_PPT_H__ +#define __CYAN_SKILLFISH_PPT_H__ -struct drm_device; +extern void cyan_skillfish_set_ppt_funcs(struct smu_context *smu); -int drm_irq_install(struct drm_device *dev, int irq); -int drm_irq_uninstall(struct drm_device *dev); -int devm_drm_irq_install(struct drm_device *dev, int irq); #endif diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c index 1ba42b69ce74..f96681700c41 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c @@ -1303,7 +1303,7 @@ static int navi10_print_clk_levels(struct smu_context *smu, if (ret) return size; - size += sprintf(buf + size, "%d: %uMhz %s\n", i, value, + size += sysfs_emit_at(buf, size, "%d: %uMhz %s\n", i, value, cur_value == value ? "*" : ""); } } else { @@ -1321,7 +1321,7 @@ static int navi10_print_clk_levels(struct smu_context *smu, freq_values[1] = (freq_values[0] + freq_values[2]) / 2; for (i = 0; i < 3; i++) { - size += sprintf(buf + size, "%d: %uMhz %s\n", i, freq_values[i], + size += sysfs_emit_at(buf, size, "%d: %uMhz %s\n", i, freq_values[i], i == mark_index ? "*" : ""); } @@ -1331,7 +1331,7 @@ static int navi10_print_clk_levels(struct smu_context *smu, gen_speed = smu_v11_0_get_current_pcie_link_speed_level(smu); lane_width = smu_v11_0_get_current_pcie_link_width_level(smu); for (i = 0; i < NUM_LINK_LEVELS; i++) - size += sprintf(buf + size, "%d: %s %s %dMhz %s\n", i, + size += sysfs_emit_at(buf, size, "%d: %s %s %dMhz %s\n", i, (dpm_context->dpm_tables.pcie_table.pcie_gen[i] == 0) ? "2.5GT/s," : (dpm_context->dpm_tables.pcie_table.pcie_gen[i] == 1) ? "5.0GT/s," : (dpm_context->dpm_tables.pcie_table.pcie_gen[i] == 2) ? "8.0GT/s," : @@ -1352,23 +1352,24 @@ static int navi10_print_clk_levels(struct smu_context *smu, break; if (!navi10_od_feature_is_supported(od_settings, SMU_11_0_ODCAP_GFXCLK_LIMITS)) break; - size += sprintf(buf + size, "OD_SCLK:\n"); - size += sprintf(buf + size, "0: %uMhz\n1: %uMhz\n", od_table->GfxclkFmin, od_table->GfxclkFmax); + size += sysfs_emit_at(buf, size, "OD_SCLK:\n"); + size += sysfs_emit_at(buf, size, "0: %uMhz\n1: %uMhz\n", + od_table->GfxclkFmin, od_table->GfxclkFmax); break; case SMU_OD_MCLK: if (!smu->od_enabled || !od_table || !od_settings) break; if (!navi10_od_feature_is_supported(od_settings, SMU_11_0_ODCAP_UCLK_MAX)) break; - size += sprintf(buf + size, "OD_MCLK:\n"); - size += sprintf(buf + size, "1: %uMHz\n", od_table->UclkFmax); + size += sysfs_emit_at(buf, size, "OD_MCLK:\n"); + size += sysfs_emit_at(buf, size, "1: %uMHz\n", od_table->UclkFmax); break; case SMU_OD_VDDC_CURVE: if (!smu->od_enabled || !od_table || !od_settings) break; if (!navi10_od_feature_is_supported(od_settings, SMU_11_0_ODCAP_GFXCLK_CURVE)) break; - size += sprintf(buf + size, "OD_VDDC_CURVE:\n"); + size += sysfs_emit_at(buf, size, "OD_VDDC_CURVE:\n"); for (i = 0; i < 3; i++) { switch (i) { case 0: @@ -1383,55 +1384,57 @@ static int navi10_print_clk_levels(struct smu_context *smu, default: break; } - size += sprintf(buf + size, "%d: %uMHz %umV\n", i, curve_settings[0], curve_settings[1] / NAVI10_VOLTAGE_SCALE); + size += sysfs_emit_at(buf, size, "%d: %uMHz %umV\n", + i, curve_settings[0], + curve_settings[1] / NAVI10_VOLTAGE_SCALE); } break; case SMU_OD_RANGE: if (!smu->od_enabled || !od_table || !od_settings) break; - size = sprintf(buf, "%s:\n", "OD_RANGE"); + size = sysfs_emit(buf, "%s:\n", "OD_RANGE"); if (navi10_od_feature_is_supported(od_settings, SMU_11_0_ODCAP_GFXCLK_LIMITS)) { navi10_od_setting_get_range(od_settings, SMU_11_0_ODSETTING_GFXCLKFMIN, &min_value, NULL); navi10_od_setting_get_range(od_settings, SMU_11_0_ODSETTING_GFXCLKFMAX, NULL, &max_value); - size += sprintf(buf + size, "SCLK: %7uMhz %10uMhz\n", + size += sysfs_emit_at(buf, size, "SCLK: %7uMhz %10uMhz\n", min_value, max_value); } if (navi10_od_feature_is_supported(od_settings, SMU_11_0_ODCAP_UCLK_MAX)) { navi10_od_setting_get_range(od_settings, SMU_11_0_ODSETTING_UCLKFMAX, &min_value, &max_value); - size += sprintf(buf + size, "MCLK: %7uMhz %10uMhz\n", + size += sysfs_emit_at(buf, size, "MCLK: %7uMhz %10uMhz\n", min_value, max_value); } if (navi10_od_feature_is_supported(od_settings, SMU_11_0_ODCAP_GFXCLK_CURVE)) { navi10_od_setting_get_range(od_settings, SMU_11_0_ODSETTING_VDDGFXCURVEFREQ_P1, &min_value, &max_value); - size += sprintf(buf + size, "VDDC_CURVE_SCLK[0]: %7uMhz %10uMhz\n", - min_value, max_value); + size += sysfs_emit_at(buf, size, "VDDC_CURVE_SCLK[0]: %7uMhz %10uMhz\n", + min_value, max_value); navi10_od_setting_get_range(od_settings, SMU_11_0_ODSETTING_VDDGFXCURVEVOLTAGE_P1, &min_value, &max_value); - size += sprintf(buf + size, "VDDC_CURVE_VOLT[0]: %7dmV %11dmV\n", - min_value, max_value); + size += sysfs_emit_at(buf, size, "VDDC_CURVE_VOLT[0]: %7dmV %11dmV\n", + min_value, max_value); navi10_od_setting_get_range(od_settings, SMU_11_0_ODSETTING_VDDGFXCURVEFREQ_P2, &min_value, &max_value); - size += sprintf(buf + size, "VDDC_CURVE_SCLK[1]: %7uMhz %10uMhz\n", - min_value, max_value); + size += sysfs_emit_at(buf, size, "VDDC_CURVE_SCLK[1]: %7uMhz %10uMhz\n", + min_value, max_value); navi10_od_setting_get_range(od_settings, SMU_11_0_ODSETTING_VDDGFXCURVEVOLTAGE_P2, &min_value, &max_value); - size += sprintf(buf + size, "VDDC_CURVE_VOLT[1]: %7dmV %11dmV\n", - min_value, max_value); + size += sysfs_emit_at(buf, size, "VDDC_CURVE_VOLT[1]: %7dmV %11dmV\n", + min_value, max_value); navi10_od_setting_get_range(od_settings, SMU_11_0_ODSETTING_VDDGFXCURVEFREQ_P3, &min_value, &max_value); - size += sprintf(buf + size, "VDDC_CURVE_SCLK[2]: %7uMhz %10uMhz\n", - min_value, max_value); + size += sysfs_emit_at(buf, size, "VDDC_CURVE_SCLK[2]: %7uMhz %10uMhz\n", + min_value, max_value); navi10_od_setting_get_range(od_settings, SMU_11_0_ODSETTING_VDDGFXCURVEVOLTAGE_P3, &min_value, &max_value); - size += sprintf(buf + size, "VDDC_CURVE_VOLT[2]: %7dmV %11dmV\n", - min_value, max_value); + size += sysfs_emit_at(buf, size, "VDDC_CURVE_VOLT[2]: %7dmV %11dmV\n", + min_value, max_value); } break; @@ -1668,27 +1671,27 @@ static bool navi10_is_dpm_running(struct smu_context *smu) return !!(feature_enabled & SMC_DPM_FEATURE); } -static int navi10_get_fan_speed_percent(struct smu_context *smu, - uint32_t *speed) +static int navi10_get_fan_speed_rpm(struct smu_context *smu, + uint32_t *speed) { - int ret; - u32 rpm; + int ret = 0; if (!speed) return -EINVAL; switch (smu_v11_0_get_fan_control_mode(smu)) { case AMD_FAN_CTRL_AUTO: - ret = navi1x_get_smu_metrics_data(smu, + ret = navi10_get_smu_metrics_data(smu, METRICS_CURR_FANSPEED, - &rpm); - if (!ret && smu->fan_max_rpm) - *speed = rpm * 100 / smu->fan_max_rpm; - return ret; + speed); + break; default: - *speed = smu->user_dpm_profile.fan_speed_percent; - return 0; + ret = smu_v11_0_get_fan_speed_rpm(smu, + speed); + break; } + + return ret; } static int navi10_get_fan_parameters(struct smu_context *smu) @@ -1730,7 +1733,7 @@ static int navi10_get_power_profile_mode(struct smu_context *smu, char *buf) if (!buf) return -EINVAL; - size += sprintf(buf + size, "%16s %s %s %s %s %s %s %s %s %s %s\n", + size += sysfs_emit_at(buf, size, "%16s %s %s %s %s %s %s %s %s %s %s\n", title[0], title[1], title[2], title[3], title[4], title[5], title[6], title[7], title[8], title[9], title[10]); @@ -1750,10 +1753,10 @@ static int navi10_get_power_profile_mode(struct smu_context *smu, char *buf) return result; } - size += sprintf(buf + size, "%2d %14s%s:\n", + size += sysfs_emit_at(buf, size, "%2d %14s%s:\n", i, profile_name[i], (i == smu->power_profile_mode) ? "*" : " "); - size += sprintf(buf + size, "%19s %d(%13s) %7d %7d %7d %7d %7d %7d %7d %7d %7d\n", + size += sysfs_emit_at(buf, size, "%19s %d(%13s) %7d %7d %7d %7d %7d %7d %7d %7d %7d\n", " ", 0, "GFXCLK", @@ -1767,7 +1770,7 @@ static int navi10_get_power_profile_mode(struct smu_context *smu, char *buf) activity_monitor.Gfx_PD_Data_error_coeff, activity_monitor.Gfx_PD_Data_error_rate_coeff); - size += sprintf(buf + size, "%19s %d(%13s) %7d %7d %7d %7d %7d %7d %7d %7d %7d\n", + size += sysfs_emit_at(buf, size, "%19s %d(%13s) %7d %7d %7d %7d %7d %7d %7d %7d %7d\n", " ", 1, "SOCCLK", @@ -1781,7 +1784,7 @@ static int navi10_get_power_profile_mode(struct smu_context *smu, char *buf) activity_monitor.Soc_PD_Data_error_coeff, activity_monitor.Soc_PD_Data_error_rate_coeff); - size += sprintf(buf + size, "%19s %d(%13s) %7d %7d %7d %7d %7d %7d %7d %7d %7d\n", + size += sysfs_emit_at(buf, size, "%19s %d(%13s) %7d %7d %7d %7d %7d %7d %7d %7d %7d\n", " ", 2, "MEMLK", @@ -2294,41 +2297,52 @@ static int navi10_set_default_od_settings(struct smu_context *smu) (OverDriveTable_t *)smu->smu_table.overdrive_table; OverDriveTable_t *boot_od_table = (OverDriveTable_t *)smu->smu_table.boot_overdrive_table; + OverDriveTable_t *user_od_table = + (OverDriveTable_t *)smu->smu_table.user_overdrive_table; int ret = 0; - ret = smu_cmn_update_table(smu, SMU_TABLE_OVERDRIVE, 0, (void *)od_table, false); + /* + * For S3/S4/Runpm resume, no need to setup those overdrive tables again as + * - either they already have the default OD settings got during cold bootup + * - or they have some user customized OD settings which cannot be overwritten + */ + if (smu->adev->in_suspend) + return 0; + + ret = smu_cmn_update_table(smu, SMU_TABLE_OVERDRIVE, 0, (void *)boot_od_table, false); if (ret) { dev_err(smu->adev->dev, "Failed to get overdrive table!\n"); return ret; } - if (!od_table->GfxclkVolt1) { + if (!boot_od_table->GfxclkVolt1) { ret = navi10_overdrive_get_gfx_clk_base_voltage(smu, - &od_table->GfxclkVolt1, - od_table->GfxclkFreq1); + &boot_od_table->GfxclkVolt1, + boot_od_table->GfxclkFreq1); if (ret) return ret; } - if (!od_table->GfxclkVolt2) { + if (!boot_od_table->GfxclkVolt2) { ret = navi10_overdrive_get_gfx_clk_base_voltage(smu, - &od_table->GfxclkVolt2, - od_table->GfxclkFreq2); + &boot_od_table->GfxclkVolt2, + boot_od_table->GfxclkFreq2); if (ret) return ret; } - if (!od_table->GfxclkVolt3) { + if (!boot_od_table->GfxclkVolt3) { ret = navi10_overdrive_get_gfx_clk_base_voltage(smu, - &od_table->GfxclkVolt3, - od_table->GfxclkFreq3); + &boot_od_table->GfxclkVolt3, + boot_od_table->GfxclkFreq3); if (ret) return ret; } - memcpy(boot_od_table, od_table, sizeof(OverDriveTable_t)); + navi10_dump_od_table(smu, boot_od_table); - navi10_dump_od_table(smu, od_table); + memcpy(od_table, boot_od_table, sizeof(OverDriveTable_t)); + memcpy(user_od_table, boot_od_table, sizeof(OverDriveTable_t)); return 0; } @@ -2429,11 +2443,20 @@ static int navi10_od_edit_dpm_table(struct smu_context *smu, enum PP_OD_DPM_TABL memcpy(table_context->overdrive_table, table_context->boot_overdrive_table, sizeof(OverDriveTable_t)); break; case PP_OD_COMMIT_DPM_TABLE: - navi10_dump_od_table(smu, od_table); - ret = smu_cmn_update_table(smu, SMU_TABLE_OVERDRIVE, 0, (void *)od_table, true); - if (ret) { - dev_err(smu->adev->dev, "Failed to import overdrive table!\n"); - return ret; + if (memcmp(od_table, table_context->user_overdrive_table, sizeof(OverDriveTable_t))) { + navi10_dump_od_table(smu, od_table); + ret = smu_cmn_update_table(smu, SMU_TABLE_OVERDRIVE, 0, (void *)od_table, true); + if (ret) { + dev_err(smu->adev->dev, "Failed to import overdrive table!\n"); + return ret; + } + memcpy(table_context->user_overdrive_table, od_table, sizeof(OverDriveTable_t)); + smu->user_dpm_profile.user_od = true; + + if (!memcmp(table_context->user_overdrive_table, + table_context->boot_overdrive_table, + sizeof(OverDriveTable_t))) + smu->user_dpm_profile.user_od = false; } break; case PP_OD_EDIT_VDDC_CURVE: @@ -2735,6 +2758,122 @@ static ssize_t navi10_get_legacy_gpu_metrics(struct smu_context *smu, return sizeof(struct gpu_metrics_v1_3); } +static int navi10_i2c_xfer(struct i2c_adapter *i2c_adap, + struct i2c_msg *msg, int num_msgs) +{ + struct amdgpu_device *adev = to_amdgpu_device(i2c_adap); + struct smu_table_context *smu_table = &adev->smu.smu_table; + struct smu_table *table = &smu_table->driver_table; + SwI2cRequest_t *req, *res = (SwI2cRequest_t *)table->cpu_addr; + int i, j, r, c; + u16 dir; + + req = kzalloc(sizeof(*req), GFP_KERNEL); + if (!req) + return -ENOMEM; + + req->I2CcontrollerPort = 0; + req->I2CSpeed = I2C_SPEED_FAST_400K; + req->SlaveAddress = msg[0].addr << 1; /* wants an 8-bit address */ + dir = msg[0].flags & I2C_M_RD; + + for (c = i = 0; i < num_msgs; i++) { + for (j = 0; j < msg[i].len; j++, c++) { + SwI2cCmd_t *cmd = &req->SwI2cCmds[c]; + + if (!(msg[i].flags & I2C_M_RD)) { + /* write */ + cmd->Cmd = I2C_CMD_WRITE; + cmd->RegisterAddr = msg[i].buf[j]; + } + + if ((dir ^ msg[i].flags) & I2C_M_RD) { + /* The direction changes. + */ + dir = msg[i].flags & I2C_M_RD; + cmd->CmdConfig |= CMDCONFIG_RESTART_MASK; + } + + req->NumCmds++; + + /* + * Insert STOP if we are at the last byte of either last + * message for the transaction or the client explicitly + * requires a STOP at this particular message. + */ + if ((j == msg[i].len - 1) && + ((i == num_msgs - 1) || (msg[i].flags & I2C_M_STOP))) { + cmd->CmdConfig &= ~CMDCONFIG_RESTART_MASK; + cmd->CmdConfig |= CMDCONFIG_STOP_MASK; + } + } + } + mutex_lock(&adev->smu.mutex); + r = smu_cmn_update_table(&adev->smu, SMU_TABLE_I2C_COMMANDS, 0, req, true); + mutex_unlock(&adev->smu.mutex); + if (r) + goto fail; + + for (c = i = 0; i < num_msgs; i++) { + if (!(msg[i].flags & I2C_M_RD)) { + c += msg[i].len; + continue; + } + for (j = 0; j < msg[i].len; j++, c++) { + SwI2cCmd_t *cmd = &res->SwI2cCmds[c]; + + msg[i].buf[j] = cmd->Data; + } + } + r = num_msgs; +fail: + kfree(req); + return r; +} + +static u32 navi10_i2c_func(struct i2c_adapter *adap) +{ + return I2C_FUNC_I2C | I2C_FUNC_SMBUS_EMUL; +} + + +static const struct i2c_algorithm navi10_i2c_algo = { + .master_xfer = navi10_i2c_xfer, + .functionality = navi10_i2c_func, +}; + +static const struct i2c_adapter_quirks navi10_i2c_control_quirks = { + .flags = I2C_AQ_COMB | I2C_AQ_COMB_SAME_ADDR | I2C_AQ_NO_ZERO_LEN, + .max_read_len = MAX_SW_I2C_COMMANDS, + .max_write_len = MAX_SW_I2C_COMMANDS, + .max_comb_1st_msg_len = 2, + .max_comb_2nd_msg_len = MAX_SW_I2C_COMMANDS - 2, +}; + +static int navi10_i2c_control_init(struct smu_context *smu, struct i2c_adapter *control) +{ + struct amdgpu_device *adev = to_amdgpu_device(control); + int res; + + control->owner = THIS_MODULE; + control->class = I2C_CLASS_HWMON; + control->dev.parent = &adev->pdev->dev; + control->algo = &navi10_i2c_algo; + snprintf(control->name, sizeof(control->name), "AMDGPU SMU"); + control->quirks = &navi10_i2c_control_quirks; + + res = i2c_add_adapter(control); + if (res) + DRM_ERROR("Failed to register hw i2c, err: %d\n", res); + + return res; +} + +static void navi10_i2c_control_fini(struct smu_context *smu, struct i2c_adapter *control) +{ + i2c_del_adapter(control); +} + static ssize_t navi10_get_gpu_metrics(struct smu_context *smu, void **table) { @@ -3078,6 +3217,8 @@ static const struct pptable_funcs navi10_ppt_funcs = { .set_default_dpm_table = navi10_set_default_dpm_table, .dpm_set_vcn_enable = navi10_dpm_set_vcn_enable, .dpm_set_jpeg_enable = navi10_dpm_set_jpeg_enable, + .i2c_init = navi10_i2c_control_init, + .i2c_fini = navi10_i2c_control_fini, .print_clk_levels = navi10_print_clk_levels, .force_clk_levels = navi10_force_clk_levels, .populate_umd_state_clk = navi10_populate_umd_state_clk, @@ -3086,7 +3227,8 @@ static const struct pptable_funcs navi10_ppt_funcs = { .display_config_changed = navi10_display_config_changed, .notify_smc_display_config = navi10_notify_smc_display_config, .is_dpm_running = navi10_is_dpm_running, - .get_fan_speed_percent = navi10_get_fan_speed_percent, + .get_fan_speed_pwm = smu_v11_0_get_fan_speed_pwm, + .get_fan_speed_rpm = navi10_get_fan_speed_rpm, .get_power_profile_mode = navi10_get_power_profile_mode, .set_power_profile_mode = navi10_set_power_profile_mode, .set_watermarks_table = navi10_set_watermarks_table, @@ -3129,7 +3271,8 @@ static const struct pptable_funcs navi10_ppt_funcs = { .display_clock_voltage_request = smu_v11_0_display_clock_voltage_request, .get_fan_control_mode = smu_v11_0_get_fan_control_mode, .set_fan_control_mode = smu_v11_0_set_fan_control_mode, - .set_fan_speed_percent = smu_v11_0_set_fan_speed_percent, + .set_fan_speed_pwm = smu_v11_0_set_fan_speed_pwm, + .set_fan_speed_rpm = smu_v11_0_set_fan_speed_rpm, .set_xgmi_pstate = smu_v11_0_set_xgmi_pstate, .gfx_off_control = smu_v11_0_gfx_off_control, .register_irq_handler = smu_v11_0_register_irq_handler, @@ -3144,6 +3287,7 @@ static const struct pptable_funcs navi10_ppt_funcs = { .set_soft_freq_limited_range = smu_v11_0_set_soft_freq_limited_range, .set_default_od_settings = navi10_set_default_od_settings, .od_edit_dpm_table = navi10_od_edit_dpm_table, + .restore_user_od_settings = smu_v11_0_restore_user_od_settings, .run_btc = navi10_run_btc, .set_power_source = smu_v11_0_set_power_source, .get_pp_feature_mask = smu_cmn_get_pp_feature_mask, diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c index d92dd2c7448e..5e292c3f5050 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c @@ -488,6 +488,26 @@ err0_out: return -ENOMEM; } +static uint32_t sienna_cichlid_get_throttler_status_locked(struct smu_context *smu) +{ + struct smu_table_context *smu_table= &smu->smu_table; + SmuMetricsExternal_t *metrics_ext = + (SmuMetricsExternal_t *)(smu_table->metrics_table); + uint32_t throttler_status = 0; + int i; + + if ((smu->adev->asic_type == CHIP_SIENNA_CICHLID) && + (smu->smc_fw_version >= 0x3A4300)) { + for (i = 0; i < THROTTLER_COUNT; i++) + throttler_status |= + (metrics_ext->SmuMetrics_V2.ThrottlingPercentage[i] ? 1U << i : 0); + } else { + throttler_status = metrics_ext->SmuMetrics.ThrottlerStatus; + } + + return throttler_status; +} + static int sienna_cichlid_get_smu_metrics_data(struct smu_context *smu, MetricsMember_t member, uint32_t *value) @@ -495,6 +515,11 @@ static int sienna_cichlid_get_smu_metrics_data(struct smu_context *smu, struct smu_table_context *smu_table= &smu->smu_table; SmuMetrics_t *metrics = &(((SmuMetricsExternal_t *)(smu_table->metrics_table))->SmuMetrics); + SmuMetrics_V2_t *metrics_v2 = + &(((SmuMetricsExternal_t *)(smu_table->metrics_table))->SmuMetrics_V2); + bool use_metrics_v2 = ((smu->adev->asic_type == CHIP_SIENNA_CICHLID) && + (smu->smc_fw_version >= 0x3A4300)) ? true : false; + uint16_t average_gfx_activity; int ret = 0; mutex_lock(&smu->metrics_lock); @@ -509,78 +534,96 @@ static int sienna_cichlid_get_smu_metrics_data(struct smu_context *smu, switch (member) { case METRICS_CURR_GFXCLK: - *value = metrics->CurrClock[PPCLK_GFXCLK]; + *value = use_metrics_v2 ? metrics_v2->CurrClock[PPCLK_GFXCLK] : + metrics->CurrClock[PPCLK_GFXCLK]; break; case METRICS_CURR_SOCCLK: - *value = metrics->CurrClock[PPCLK_SOCCLK]; + *value = use_metrics_v2 ? metrics_v2->CurrClock[PPCLK_SOCCLK] : + metrics->CurrClock[PPCLK_SOCCLK]; break; case METRICS_CURR_UCLK: - *value = metrics->CurrClock[PPCLK_UCLK]; + *value = use_metrics_v2 ? metrics_v2->CurrClock[PPCLK_UCLK] : + metrics->CurrClock[PPCLK_UCLK]; break; case METRICS_CURR_VCLK: - *value = metrics->CurrClock[PPCLK_VCLK_0]; + *value = use_metrics_v2 ? metrics_v2->CurrClock[PPCLK_VCLK_0] : + metrics->CurrClock[PPCLK_VCLK_0]; break; case METRICS_CURR_VCLK1: - *value = metrics->CurrClock[PPCLK_VCLK_1]; + *value = use_metrics_v2 ? metrics_v2->CurrClock[PPCLK_VCLK_1] : + metrics->CurrClock[PPCLK_VCLK_1]; break; case METRICS_CURR_DCLK: - *value = metrics->CurrClock[PPCLK_DCLK_0]; + *value = use_metrics_v2 ? metrics_v2->CurrClock[PPCLK_DCLK_0] : + metrics->CurrClock[PPCLK_DCLK_0]; break; case METRICS_CURR_DCLK1: - *value = metrics->CurrClock[PPCLK_DCLK_1]; + *value = use_metrics_v2 ? metrics_v2->CurrClock[PPCLK_DCLK_1] : + metrics->CurrClock[PPCLK_DCLK_1]; break; case METRICS_CURR_DCEFCLK: - *value = metrics->CurrClock[PPCLK_DCEFCLK]; + *value = use_metrics_v2 ? metrics_v2->CurrClock[PPCLK_DCEFCLK] : + metrics->CurrClock[PPCLK_DCEFCLK]; break; case METRICS_CURR_FCLK: - *value = metrics->CurrClock[PPCLK_FCLK]; + *value = use_metrics_v2 ? metrics_v2->CurrClock[PPCLK_FCLK] : + metrics->CurrClock[PPCLK_FCLK]; break; case METRICS_AVERAGE_GFXCLK: - if (metrics->AverageGfxActivity <= SMU_11_0_7_GFX_BUSY_THRESHOLD) - *value = metrics->AverageGfxclkFrequencyPostDs; + average_gfx_activity = use_metrics_v2 ? metrics_v2->AverageGfxActivity : + metrics->AverageGfxActivity; + if (average_gfx_activity <= SMU_11_0_7_GFX_BUSY_THRESHOLD) + *value = use_metrics_v2 ? metrics_v2->AverageGfxclkFrequencyPostDs : + metrics->AverageGfxclkFrequencyPostDs; else - *value = metrics->AverageGfxclkFrequencyPreDs; + *value = use_metrics_v2 ? metrics_v2->AverageGfxclkFrequencyPreDs : + metrics->AverageGfxclkFrequencyPreDs; break; case METRICS_AVERAGE_FCLK: - *value = metrics->AverageFclkFrequencyPostDs; + *value = use_metrics_v2 ? metrics_v2->AverageFclkFrequencyPostDs : + metrics->AverageFclkFrequencyPostDs; break; case METRICS_AVERAGE_UCLK: - *value = metrics->AverageUclkFrequencyPostDs; + *value = use_metrics_v2 ? metrics_v2->AverageUclkFrequencyPostDs : + metrics->AverageUclkFrequencyPostDs; break; case METRICS_AVERAGE_GFXACTIVITY: - *value = metrics->AverageGfxActivity; + *value = use_metrics_v2 ? metrics_v2->AverageGfxActivity : + metrics->AverageGfxActivity; break; case METRICS_AVERAGE_MEMACTIVITY: - *value = metrics->AverageUclkActivity; + *value = use_metrics_v2 ? metrics_v2->AverageUclkActivity : + metrics->AverageUclkActivity; break; case METRICS_AVERAGE_SOCKETPOWER: - *value = metrics->AverageSocketPower << 8; + *value = use_metrics_v2 ? metrics_v2->AverageSocketPower << 8 : + metrics->AverageSocketPower << 8; break; case METRICS_TEMPERATURE_EDGE: - *value = metrics->TemperatureEdge * + *value = (use_metrics_v2 ? metrics_v2->TemperatureEdge : metrics->TemperatureEdge) * SMU_TEMPERATURE_UNITS_PER_CENTIGRADES; break; case METRICS_TEMPERATURE_HOTSPOT: - *value = metrics->TemperatureHotspot * + *value = (use_metrics_v2 ? metrics_v2->TemperatureHotspot : metrics->TemperatureHotspot) * SMU_TEMPERATURE_UNITS_PER_CENTIGRADES; break; case METRICS_TEMPERATURE_MEM: - *value = metrics->TemperatureMem * + *value = (use_metrics_v2 ? metrics_v2->TemperatureMem : metrics->TemperatureMem) * SMU_TEMPERATURE_UNITS_PER_CENTIGRADES; break; case METRICS_TEMPERATURE_VRGFX: - *value = metrics->TemperatureVrGfx * + *value = (use_metrics_v2 ? metrics_v2->TemperatureVrGfx : metrics->TemperatureVrGfx) * SMU_TEMPERATURE_UNITS_PER_CENTIGRADES; break; case METRICS_TEMPERATURE_VRSOC: - *value = metrics->TemperatureVrSoc * + *value = (use_metrics_v2 ? metrics_v2->TemperatureVrSoc : metrics->TemperatureVrSoc) * SMU_TEMPERATURE_UNITS_PER_CENTIGRADES; break; case METRICS_THROTTLER_STATUS: - *value = metrics->ThrottlerStatus; + *value = sienna_cichlid_get_throttler_status_locked(smu); break; case METRICS_CURR_FANSPEED: - *value = metrics->CurrFanSpeed; + *value = use_metrics_v2 ? metrics_v2->CurrFanSpeed : metrics->CurrFanSpeed; break; default: *value = UINT_MAX; @@ -1045,7 +1088,7 @@ static int sienna_cichlid_print_clk_levels(struct smu_context *smu, if (ret) goto print_clk_out; - size += sprintf(buf + size, "%d: %uMhz %s\n", i, value, + size += sysfs_emit_at(buf, size, "%d: %uMhz %s\n", i, value, cur_value == value ? "*" : ""); } } else { @@ -1067,7 +1110,7 @@ static int sienna_cichlid_print_clk_levels(struct smu_context *smu, } for (i = 0; i < count; i++) { - size += sprintf(buf + size, "%d: %uMhz %s\n", i, freq_values[i], + size += sysfs_emit_at(buf, size, "%d: %uMhz %s\n", i, freq_values[i], cur_value == freq_values[i] ? "*" : ""); } @@ -1078,7 +1121,7 @@ static int sienna_cichlid_print_clk_levels(struct smu_context *smu, lane_width = smu_v11_0_get_current_pcie_link_width_level(smu); GET_PPTABLE_MEMBER(LclkFreq, &table_member); for (i = 0; i < NUM_LINK_LEVELS; i++) - size += sprintf(buf + size, "%d: %s %s %dMhz %s\n", i, + size += sysfs_emit_at(buf, size, "%d: %s %s %dMhz %s\n", i, (dpm_context->dpm_tables.pcie_table.pcie_gen[i] == 0) ? "2.5GT/s," : (dpm_context->dpm_tables.pcie_table.pcie_gen[i] == 1) ? "5.0GT/s," : (dpm_context->dpm_tables.pcie_table.pcie_gen[i] == 2) ? "8.0GT/s," : @@ -1101,8 +1144,8 @@ static int sienna_cichlid_print_clk_levels(struct smu_context *smu, if (!sienna_cichlid_is_od_feature_supported(od_settings, SMU_11_0_7_ODCAP_GFXCLK_LIMITS)) break; - size += sprintf(buf + size, "OD_SCLK:\n"); - size += sprintf(buf + size, "0: %uMhz\n1: %uMhz\n", od_table->GfxclkFmin, od_table->GfxclkFmax); + size += sysfs_emit_at(buf, size, "OD_SCLK:\n"); + size += sysfs_emit_at(buf, size, "0: %uMhz\n1: %uMhz\n", od_table->GfxclkFmin, od_table->GfxclkFmax); break; case SMU_OD_MCLK: @@ -1112,8 +1155,8 @@ static int sienna_cichlid_print_clk_levels(struct smu_context *smu, if (!sienna_cichlid_is_od_feature_supported(od_settings, SMU_11_0_7_ODCAP_UCLK_LIMITS)) break; - size += sprintf(buf + size, "OD_MCLK:\n"); - size += sprintf(buf + size, "0: %uMhz\n1: %uMHz\n", od_table->UclkFmin, od_table->UclkFmax); + size += sysfs_emit_at(buf, size, "OD_MCLK:\n"); + size += sysfs_emit_at(buf, size, "0: %uMhz\n1: %uMHz\n", od_table->UclkFmin, od_table->UclkFmax); break; case SMU_OD_VDDGFX_OFFSET: @@ -1129,22 +1172,22 @@ static int sienna_cichlid_print_clk_levels(struct smu_context *smu, (smu_version < 0x003a2900)) break; - size += sprintf(buf + size, "OD_VDDGFX_OFFSET:\n"); - size += sprintf(buf + size, "%dmV\n", od_table->VddGfxOffset); + size += sysfs_emit_at(buf, size, "OD_VDDGFX_OFFSET:\n"); + size += sysfs_emit_at(buf, size, "%dmV\n", od_table->VddGfxOffset); break; case SMU_OD_RANGE: if (!smu->od_enabled || !od_table || !od_settings) break; - size = sprintf(buf, "%s:\n", "OD_RANGE"); + size = sysfs_emit(buf, "%s:\n", "OD_RANGE"); if (sienna_cichlid_is_od_feature_supported(od_settings, SMU_11_0_7_ODCAP_GFXCLK_LIMITS)) { sienna_cichlid_get_od_setting_range(od_settings, SMU_11_0_7_ODSETTING_GFXCLKFMIN, &min_value, NULL); sienna_cichlid_get_od_setting_range(od_settings, SMU_11_0_7_ODSETTING_GFXCLKFMAX, NULL, &max_value); - size += sprintf(buf + size, "SCLK: %7uMhz %10uMhz\n", + size += sysfs_emit_at(buf, size, "SCLK: %7uMhz %10uMhz\n", min_value, max_value); } @@ -1153,7 +1196,7 @@ static int sienna_cichlid_print_clk_levels(struct smu_context *smu, &min_value, NULL); sienna_cichlid_get_od_setting_range(od_settings, SMU_11_0_7_ODSETTING_UCLKFMAX, NULL, &max_value); - size += sprintf(buf + size, "MCLK: %7uMhz %10uMhz\n", + size += sysfs_emit_at(buf, size, "MCLK: %7uMhz %10uMhz\n", min_value, max_value); } break; @@ -1311,27 +1354,20 @@ static bool sienna_cichlid_is_dpm_running(struct smu_context *smu) return !!(feature_enabled & SMC_DPM_FEATURE); } -static int sienna_cichlid_get_fan_speed_percent(struct smu_context *smu, - uint32_t *speed) +static int sienna_cichlid_get_fan_speed_rpm(struct smu_context *smu, + uint32_t *speed) { - int ret; - u32 rpm; - if (!speed) return -EINVAL; - switch (smu_v11_0_get_fan_control_mode(smu)) { - case AMD_FAN_CTRL_AUTO: - ret = sienna_cichlid_get_smu_metrics_data(smu, - METRICS_CURR_FANSPEED, - &rpm); - if (!ret && smu->fan_max_rpm) - *speed = rpm * 100 / smu->fan_max_rpm; - return ret; - default: - *speed = smu->user_dpm_profile.fan_speed_percent; - return 0; - } + /* + * For Sienna_Cichlid and later, the fan speed(rpm) reported + * by pmfw is always trustable(even when the fan control feature + * disabled or 0 RPM kicked in). + */ + return sienna_cichlid_get_smu_metrics_data(smu, + METRICS_CURR_FANSPEED, + speed); } static int sienna_cichlid_get_fan_parameters(struct smu_context *smu) @@ -1376,7 +1412,7 @@ static int sienna_cichlid_get_power_profile_mode(struct smu_context *smu, char * if (!buf) return -EINVAL; - size += sprintf(buf + size, "%16s %s %s %s %s %s %s %s %s %s %s\n", + size += sysfs_emit_at(buf, size, "%16s %s %s %s %s %s %s %s %s %s %s\n", title[0], title[1], title[2], title[3], title[4], title[5], title[6], title[7], title[8], title[9], title[10]); @@ -1396,10 +1432,10 @@ static int sienna_cichlid_get_power_profile_mode(struct smu_context *smu, char * return result; } - size += sprintf(buf + size, "%2d %14s%s:\n", + size += sysfs_emit_at(buf, size, "%2d %14s%s:\n", i, profile_name[i], (i == smu->power_profile_mode) ? "*" : " "); - size += sprintf(buf + size, "%19s %d(%13s) %7d %7d %7d %7d %7d %7d %7d %7d %7d\n", + size += sysfs_emit_at(buf, size, "%19s %d(%13s) %7d %7d %7d %7d %7d %7d %7d %7d %7d\n", " ", 0, "GFXCLK", @@ -1413,7 +1449,7 @@ static int sienna_cichlid_get_power_profile_mode(struct smu_context *smu, char * activity_monitor->Gfx_PD_Data_error_coeff, activity_monitor->Gfx_PD_Data_error_rate_coeff); - size += sprintf(buf + size, "%19s %d(%13s) %7d %7d %7d %7d %7d %7d %7d %7d %7d\n", + size += sysfs_emit_at(buf, size, "%19s %d(%13s) %7d %7d %7d %7d %7d %7d %7d %7d %7d\n", " ", 1, "SOCCLK", @@ -1427,7 +1463,7 @@ static int sienna_cichlid_get_power_profile_mode(struct smu_context *smu, char * activity_monitor->Fclk_PD_Data_error_coeff, activity_monitor->Fclk_PD_Data_error_rate_coeff); - size += sprintf(buf + size, "%19s %d(%13s) %7d %7d %7d %7d %7d %7d %7d %7d %7d\n", + size += sysfs_emit_at(buf, size, "%19s %d(%13s) %7d %7d %7d %7d %7d %7d %7d %7d %7d\n", " ", 2, "MEMLK", @@ -1910,18 +1946,29 @@ static int sienna_cichlid_set_default_od_settings(struct smu_context *smu) (OverDriveTable_t *)smu->smu_table.overdrive_table; OverDriveTable_t *boot_od_table = (OverDriveTable_t *)smu->smu_table.boot_overdrive_table; + OverDriveTable_t *user_od_table = + (OverDriveTable_t *)smu->smu_table.user_overdrive_table; int ret = 0; + /* + * For S3/S4/Runpm resume, no need to setup those overdrive tables again as + * - either they already have the default OD settings got during cold bootup + * - or they have some user customized OD settings which cannot be overwritten + */ + if (smu->adev->in_suspend) + return 0; + ret = smu_cmn_update_table(smu, SMU_TABLE_OVERDRIVE, - 0, (void *)od_table, false); + 0, (void *)boot_od_table, false); if (ret) { dev_err(smu->adev->dev, "Failed to get overdrive table!\n"); return ret; } - memcpy(boot_od_table, od_table, sizeof(OverDriveTable_t)); + sienna_cichlid_dump_od_table(smu, boot_od_table); - sienna_cichlid_dump_od_table(smu, od_table); + memcpy(od_table, boot_od_table, sizeof(OverDriveTable_t)); + memcpy(user_od_table, boot_od_table, sizeof(OverDriveTable_t)); return 0; } @@ -2084,13 +2131,20 @@ static int sienna_cichlid_od_edit_dpm_table(struct smu_context *smu, fallthrough; case PP_OD_COMMIT_DPM_TABLE: - sienna_cichlid_dump_od_table(smu, od_table); + if (memcmp(od_table, table_context->user_overdrive_table, sizeof(OverDriveTable_t))) { + sienna_cichlid_dump_od_table(smu, od_table); + ret = smu_cmn_update_table(smu, SMU_TABLE_OVERDRIVE, 0, (void *)od_table, true); + if (ret) { + dev_err(smu->adev->dev, "Failed to import overdrive table!\n"); + return ret; + } + memcpy(table_context->user_overdrive_table, od_table, sizeof(OverDriveTable_t)); + smu->user_dpm_profile.user_od = true; - ret = smu_cmn_update_table(smu, SMU_TABLE_OVERDRIVE, - 0, (void *)od_table, true); - if (ret) { - dev_err(smu->adev->dev, "Failed to import overdrive table!\n"); - return ret; + if (!memcmp(table_context->user_overdrive_table, + table_context->boot_overdrive_table, + sizeof(OverDriveTable_t))) + smu->user_dpm_profile.user_od = false; } break; @@ -3441,197 +3495,77 @@ static void sienna_cichlid_dump_pptable(struct smu_context *smu) dev_info(smu->adev->dev, "MmHubPadding[7] = 0x%x\n", pptable->MmHubPadding[7]); } -static void sienna_cichlid_fill_i2c_req(SwI2cRequest_t *req, bool write, - uint8_t address, uint32_t numbytes, - uint8_t *data) -{ - int i; - - req->I2CcontrollerPort = 1; - req->I2CSpeed = 2; - req->SlaveAddress = address; - req->NumCmds = numbytes; - - for (i = 0; i < numbytes; i++) { - SwI2cCmd_t *cmd = &req->SwI2cCmds[i]; - - /* First 2 bytes are always write for lower 2b EEPROM address */ - if (i < 2) - cmd->CmdConfig = CMDCONFIG_READWRITE_MASK; - else - cmd->CmdConfig = write ? CMDCONFIG_READWRITE_MASK : 0; - - - /* Add RESTART for read after address filled */ - cmd->CmdConfig |= (i == 2 && !write) ? CMDCONFIG_RESTART_MASK : 0; - - /* Add STOP in the end */ - cmd->CmdConfig |= (i == (numbytes - 1)) ? CMDCONFIG_STOP_MASK : 0; - - /* Fill with data regardless if read or write to simplify code */ - cmd->ReadWriteData = data[i]; - } -} - -static int sienna_cichlid_i2c_read_data(struct i2c_adapter *control, - uint8_t address, - uint8_t *data, - uint32_t numbytes) +static int sienna_cichlid_i2c_xfer(struct i2c_adapter *i2c_adap, + struct i2c_msg *msg, int num_msgs) { - uint32_t i, ret = 0; - SwI2cRequest_t req; - struct amdgpu_device *adev = to_amdgpu_device(control); + struct amdgpu_device *adev = to_amdgpu_device(i2c_adap); struct smu_table_context *smu_table = &adev->smu.smu_table; struct smu_table *table = &smu_table->driver_table; + SwI2cRequest_t *req, *res = (SwI2cRequest_t *)table->cpu_addr; + int i, j, r, c; + u16 dir; - if (numbytes > MAX_SW_I2C_COMMANDS) { - dev_err(adev->dev, "numbytes requested %d is over max allowed %d\n", - numbytes, MAX_SW_I2C_COMMANDS); - return -EINVAL; - } - - memset(&req, 0, sizeof(req)); - sienna_cichlid_fill_i2c_req(&req, false, address, numbytes, data); - - mutex_lock(&adev->smu.mutex); - /* Now read data starting with that address */ - ret = smu_cmn_update_table(&adev->smu, SMU_TABLE_I2C_COMMANDS, 0, &req, - true); - mutex_unlock(&adev->smu.mutex); - - if (!ret) { - SwI2cRequest_t *res = (SwI2cRequest_t *)table->cpu_addr; - - /* Assume SMU fills res.SwI2cCmds[i].Data with read bytes */ - for (i = 0; i < numbytes; i++) - data[i] = res->SwI2cCmds[i].ReadWriteData; - - dev_dbg(adev->dev, "sienna_cichlid_i2c_read_data, address = %x, bytes = %d, data :", - (uint16_t)address, numbytes); - - print_hex_dump(KERN_DEBUG, "data: ", DUMP_PREFIX_NONE, - 8, 1, data, numbytes, false); - } else - dev_err(adev->dev, "sienna_cichlid_i2c_read_data - error occurred :%x", ret); + req = kzalloc(sizeof(*req), GFP_KERNEL); + if (!req) + return -ENOMEM; - return ret; -} + req->I2CcontrollerPort = 1; + req->I2CSpeed = I2C_SPEED_FAST_400K; + req->SlaveAddress = msg[0].addr << 1; /* wants an 8-bit address */ + dir = msg[0].flags & I2C_M_RD; + + for (c = i = 0; i < num_msgs; i++) { + for (j = 0; j < msg[i].len; j++, c++) { + SwI2cCmd_t *cmd = &req->SwI2cCmds[c]; + + if (!(msg[i].flags & I2C_M_RD)) { + /* write */ + cmd->CmdConfig |= CMDCONFIG_READWRITE_MASK; + cmd->ReadWriteData = msg[i].buf[j]; + } -static int sienna_cichlid_i2c_write_data(struct i2c_adapter *control, - uint8_t address, - uint8_t *data, - uint32_t numbytes) -{ - uint32_t ret; - SwI2cRequest_t req; - struct amdgpu_device *adev = to_amdgpu_device(control); + if ((dir ^ msg[i].flags) & I2C_M_RD) { + /* The direction changes. + */ + dir = msg[i].flags & I2C_M_RD; + cmd->CmdConfig |= CMDCONFIG_RESTART_MASK; + } - if (numbytes > MAX_SW_I2C_COMMANDS) { - dev_err(adev->dev, "numbytes requested %d is over max allowed %d\n", - numbytes, MAX_SW_I2C_COMMANDS); - return -EINVAL; + req->NumCmds++; + + /* + * Insert STOP if we are at the last byte of either last + * message for the transaction or the client explicitly + * requires a STOP at this particular message. + */ + if ((j == msg[i].len - 1) && + ((i == num_msgs - 1) || (msg[i].flags & I2C_M_STOP))) { + cmd->CmdConfig &= ~CMDCONFIG_RESTART_MASK; + cmd->CmdConfig |= CMDCONFIG_STOP_MASK; + } + } } - - memset(&req, 0, sizeof(req)); - sienna_cichlid_fill_i2c_req(&req, true, address, numbytes, data); - mutex_lock(&adev->smu.mutex); - ret = smu_cmn_update_table(&adev->smu, SMU_TABLE_I2C_COMMANDS, 0, &req, true); + r = smu_cmn_update_table(&adev->smu, SMU_TABLE_I2C_COMMANDS, 0, req, true); mutex_unlock(&adev->smu.mutex); + if (r) + goto fail; - if (!ret) { - dev_dbg(adev->dev, "sienna_cichlid_i2c_write(), address = %x, bytes = %d , data: ", - (uint16_t)address, numbytes); - - print_hex_dump(KERN_DEBUG, "data: ", DUMP_PREFIX_NONE, - 8, 1, data, numbytes, false); - /* - * According to EEPROM spec there is a MAX of 10 ms required for - * EEPROM to flush internal RX buffer after STOP was issued at the - * end of write transaction. During this time the EEPROM will not be - * responsive to any more commands - so wait a bit more. - */ - msleep(10); - - } else - dev_err(adev->dev, "sienna_cichlid_i2c_write- error occurred :%x", ret); - - return ret; -} - -static int sienna_cichlid_i2c_xfer(struct i2c_adapter *i2c_adap, - struct i2c_msg *msgs, int num) -{ - uint32_t i, j, ret, data_size, data_chunk_size, next_eeprom_addr = 0; - uint8_t *data_ptr, data_chunk[MAX_SW_I2C_COMMANDS] = { 0 }; - - for (i = 0; i < num; i++) { - /* - * SMU interface allows at most MAX_SW_I2C_COMMANDS bytes of data at - * once and hence the data needs to be spliced into chunks and sent each - * chunk separately - */ - data_size = msgs[i].len - 2; - data_chunk_size = MAX_SW_I2C_COMMANDS - 2; - next_eeprom_addr = (msgs[i].buf[0] << 8 & 0xff00) | (msgs[i].buf[1] & 0xff); - data_ptr = msgs[i].buf + 2; - - for (j = 0; j < data_size / data_chunk_size; j++) { - /* Insert the EEPROM dest addess, bits 0-15 */ - data_chunk[0] = ((next_eeprom_addr >> 8) & 0xff); - data_chunk[1] = (next_eeprom_addr & 0xff); - - if (msgs[i].flags & I2C_M_RD) { - ret = sienna_cichlid_i2c_read_data(i2c_adap, - (uint8_t)msgs[i].addr, - data_chunk, MAX_SW_I2C_COMMANDS); - - memcpy(data_ptr, data_chunk + 2, data_chunk_size); - } else { - - memcpy(data_chunk + 2, data_ptr, data_chunk_size); - - ret = sienna_cichlid_i2c_write_data(i2c_adap, - (uint8_t)msgs[i].addr, - data_chunk, MAX_SW_I2C_COMMANDS); - } - - if (ret) { - num = -EIO; - goto fail; - } - - next_eeprom_addr += data_chunk_size; - data_ptr += data_chunk_size; + for (c = i = 0; i < num_msgs; i++) { + if (!(msg[i].flags & I2C_M_RD)) { + c += msg[i].len; + continue; } + for (j = 0; j < msg[i].len; j++, c++) { + SwI2cCmd_t *cmd = &res->SwI2cCmds[c]; - if (data_size % data_chunk_size) { - data_chunk[0] = ((next_eeprom_addr >> 8) & 0xff); - data_chunk[1] = (next_eeprom_addr & 0xff); - - if (msgs[i].flags & I2C_M_RD) { - ret = sienna_cichlid_i2c_read_data(i2c_adap, - (uint8_t)msgs[i].addr, - data_chunk, (data_size % data_chunk_size) + 2); - - memcpy(data_ptr, data_chunk + 2, data_size % data_chunk_size); - } else { - memcpy(data_chunk + 2, data_ptr, data_size % data_chunk_size); - - ret = sienna_cichlid_i2c_write_data(i2c_adap, - (uint8_t)msgs[i].addr, - data_chunk, (data_size % data_chunk_size) + 2); - } - - if (ret) { - num = -EIO; - goto fail; - } + msg[i].buf[j] = cmd->ReadWriteData; } } - + r = num_msgs; fail: - return num; + kfree(req); + return r; } static u32 sienna_cichlid_i2c_func(struct i2c_adapter *adap) @@ -3645,16 +3579,25 @@ static const struct i2c_algorithm sienna_cichlid_i2c_algo = { .functionality = sienna_cichlid_i2c_func, }; +static const struct i2c_adapter_quirks sienna_cichlid_i2c_control_quirks = { + .flags = I2C_AQ_COMB | I2C_AQ_COMB_SAME_ADDR | I2C_AQ_NO_ZERO_LEN, + .max_read_len = MAX_SW_I2C_COMMANDS, + .max_write_len = MAX_SW_I2C_COMMANDS, + .max_comb_1st_msg_len = 2, + .max_comb_2nd_msg_len = MAX_SW_I2C_COMMANDS - 2, +}; + static int sienna_cichlid_i2c_control_init(struct smu_context *smu, struct i2c_adapter *control) { struct amdgpu_device *adev = to_amdgpu_device(control); int res; control->owner = THIS_MODULE; - control->class = I2C_CLASS_SPD; + control->class = I2C_CLASS_HWMON; control->dev.parent = &adev->pdev->dev; control->algo = &sienna_cichlid_i2c_algo; snprintf(control->name, sizeof(control->name), "AMDGPU SMU"); + control->quirks = &sienna_cichlid_i2c_control_quirks; res = i2c_add_adapter(control); if (res) @@ -3677,65 +3620,94 @@ static ssize_t sienna_cichlid_get_gpu_metrics(struct smu_context *smu, SmuMetricsExternal_t metrics_external; SmuMetrics_t *metrics = &(metrics_external.SmuMetrics); + SmuMetrics_V2_t *metrics_v2 = + &(metrics_external.SmuMetrics_V2); struct amdgpu_device *adev = smu->adev; - uint32_t smu_version; + bool use_metrics_v2 = ((adev->asic_type == CHIP_SIENNA_CICHLID) && + (smu->smc_fw_version >= 0x3A4300)) ? true : false; + uint16_t average_gfx_activity; int ret = 0; - ret = smu_cmn_get_metrics_table(smu, - &metrics_external, - true); - if (ret) + mutex_lock(&smu->metrics_lock); + ret = smu_cmn_get_metrics_table_locked(smu, + &metrics_external, + true); + if (ret) { + mutex_unlock(&smu->metrics_lock); return ret; + } smu_cmn_init_soft_gpu_metrics(gpu_metrics, 1, 3); - gpu_metrics->temperature_edge = metrics->TemperatureEdge; - gpu_metrics->temperature_hotspot = metrics->TemperatureHotspot; - gpu_metrics->temperature_mem = metrics->TemperatureMem; - gpu_metrics->temperature_vrgfx = metrics->TemperatureVrGfx; - gpu_metrics->temperature_vrsoc = metrics->TemperatureVrSoc; - gpu_metrics->temperature_vrmem = metrics->TemperatureVrMem0; - - gpu_metrics->average_gfx_activity = metrics->AverageGfxActivity; - gpu_metrics->average_umc_activity = metrics->AverageUclkActivity; - gpu_metrics->average_mm_activity = metrics->VcnActivityPercentage; - - gpu_metrics->average_socket_power = metrics->AverageSocketPower; - gpu_metrics->energy_accumulator = metrics->EnergyAccumulator; - - if (metrics->AverageGfxActivity <= SMU_11_0_7_GFX_BUSY_THRESHOLD) - gpu_metrics->average_gfxclk_frequency = metrics->AverageGfxclkFrequencyPostDs; + gpu_metrics->temperature_edge = + use_metrics_v2 ? metrics_v2->TemperatureEdge : metrics->TemperatureEdge; + gpu_metrics->temperature_hotspot = + use_metrics_v2 ? metrics_v2->TemperatureHotspot : metrics->TemperatureHotspot; + gpu_metrics->temperature_mem = + use_metrics_v2 ? metrics_v2->TemperatureMem : metrics->TemperatureMem; + gpu_metrics->temperature_vrgfx = + use_metrics_v2 ? metrics_v2->TemperatureVrGfx : metrics->TemperatureVrGfx; + gpu_metrics->temperature_vrsoc = + use_metrics_v2 ? metrics_v2->TemperatureVrSoc : metrics->TemperatureVrSoc; + gpu_metrics->temperature_vrmem = + use_metrics_v2 ? metrics_v2->TemperatureVrMem0 : metrics->TemperatureVrMem0; + + gpu_metrics->average_gfx_activity = + use_metrics_v2 ? metrics_v2->AverageGfxActivity : metrics->AverageGfxActivity; + gpu_metrics->average_umc_activity = + use_metrics_v2 ? metrics_v2->AverageUclkActivity : metrics->AverageUclkActivity; + gpu_metrics->average_mm_activity = + use_metrics_v2 ? metrics_v2->VcnActivityPercentage : metrics->VcnActivityPercentage; + + gpu_metrics->average_socket_power = + use_metrics_v2 ? metrics_v2->AverageSocketPower : metrics->AverageSocketPower; + gpu_metrics->energy_accumulator = + use_metrics_v2 ? metrics_v2->EnergyAccumulator : metrics->EnergyAccumulator; + + average_gfx_activity = use_metrics_v2 ? metrics_v2->AverageGfxActivity : metrics->AverageGfxActivity; + if (average_gfx_activity <= SMU_11_0_7_GFX_BUSY_THRESHOLD) + gpu_metrics->average_gfxclk_frequency = + use_metrics_v2 ? metrics_v2->AverageGfxclkFrequencyPostDs : metrics->AverageGfxclkFrequencyPostDs; else - gpu_metrics->average_gfxclk_frequency = metrics->AverageGfxclkFrequencyPreDs; - gpu_metrics->average_uclk_frequency = metrics->AverageUclkFrequencyPostDs; - gpu_metrics->average_vclk0_frequency = metrics->AverageVclk0Frequency; - gpu_metrics->average_dclk0_frequency = metrics->AverageDclk0Frequency; - gpu_metrics->average_vclk1_frequency = metrics->AverageVclk1Frequency; - gpu_metrics->average_dclk1_frequency = metrics->AverageDclk1Frequency; - - gpu_metrics->current_gfxclk = metrics->CurrClock[PPCLK_GFXCLK]; - gpu_metrics->current_socclk = metrics->CurrClock[PPCLK_SOCCLK]; - gpu_metrics->current_uclk = metrics->CurrClock[PPCLK_UCLK]; - gpu_metrics->current_vclk0 = metrics->CurrClock[PPCLK_VCLK_0]; - gpu_metrics->current_dclk0 = metrics->CurrClock[PPCLK_DCLK_0]; - gpu_metrics->current_vclk1 = metrics->CurrClock[PPCLK_VCLK_1]; - gpu_metrics->current_dclk1 = metrics->CurrClock[PPCLK_DCLK_1]; - - gpu_metrics->throttle_status = metrics->ThrottlerStatus; + gpu_metrics->average_gfxclk_frequency = + use_metrics_v2 ? metrics_v2->AverageGfxclkFrequencyPreDs : metrics->AverageGfxclkFrequencyPreDs; + gpu_metrics->average_uclk_frequency = + use_metrics_v2 ? metrics_v2->AverageUclkFrequencyPostDs : metrics->AverageUclkFrequencyPostDs; + gpu_metrics->average_vclk0_frequency = + use_metrics_v2 ? metrics_v2->AverageVclk0Frequency : metrics->AverageVclk0Frequency; + gpu_metrics->average_dclk0_frequency = + use_metrics_v2 ? metrics_v2->AverageDclk0Frequency : metrics->AverageDclk0Frequency; + gpu_metrics->average_vclk1_frequency = + use_metrics_v2 ? metrics_v2->AverageVclk1Frequency : metrics->AverageVclk1Frequency; + gpu_metrics->average_dclk1_frequency = + use_metrics_v2 ? metrics_v2->AverageDclk1Frequency : metrics->AverageDclk1Frequency; + + gpu_metrics->current_gfxclk = + use_metrics_v2 ? metrics_v2->CurrClock[PPCLK_GFXCLK] : metrics->CurrClock[PPCLK_GFXCLK]; + gpu_metrics->current_socclk = + use_metrics_v2 ? metrics_v2->CurrClock[PPCLK_SOCCLK] : metrics->CurrClock[PPCLK_SOCCLK]; + gpu_metrics->current_uclk = + use_metrics_v2 ? metrics_v2->CurrClock[PPCLK_UCLK] : metrics->CurrClock[PPCLK_UCLK]; + gpu_metrics->current_vclk0 = + use_metrics_v2 ? metrics_v2->CurrClock[PPCLK_VCLK_0] : metrics->CurrClock[PPCLK_VCLK_0]; + gpu_metrics->current_dclk0 = + use_metrics_v2 ? metrics_v2->CurrClock[PPCLK_DCLK_0] : metrics->CurrClock[PPCLK_DCLK_0]; + gpu_metrics->current_vclk1 = + use_metrics_v2 ? metrics_v2->CurrClock[PPCLK_VCLK_1] : metrics->CurrClock[PPCLK_VCLK_1]; + gpu_metrics->current_dclk1 = + use_metrics_v2 ? metrics_v2->CurrClock[PPCLK_DCLK_1] : metrics->CurrClock[PPCLK_DCLK_1]; + + gpu_metrics->throttle_status = sienna_cichlid_get_throttler_status_locked(smu); gpu_metrics->indep_throttle_status = - smu_cmn_get_indep_throttler_status(metrics->ThrottlerStatus, + smu_cmn_get_indep_throttler_status(gpu_metrics->throttle_status, sienna_cichlid_throttler_map); - gpu_metrics->current_fan_speed = metrics->CurrFanSpeed; + gpu_metrics->current_fan_speed = use_metrics_v2 ? metrics_v2->CurrFanSpeed : metrics->CurrFanSpeed; - ret = smu_cmn_get_smc_version(smu, NULL, &smu_version); - if (ret) - return ret; - - if (((adev->asic_type == CHIP_SIENNA_CICHLID) && smu_version > 0x003A1E00) || - ((adev->asic_type == CHIP_NAVY_FLOUNDER) && smu_version > 0x00410400)) { - gpu_metrics->pcie_link_width = metrics->PcieWidth; - gpu_metrics->pcie_link_speed = link_speed[metrics->PcieRate]; + if (((adev->asic_type == CHIP_SIENNA_CICHLID) && smu->smc_fw_version > 0x003A1E00) || + ((adev->asic_type == CHIP_NAVY_FLOUNDER) && smu->smc_fw_version > 0x00410400)) { + gpu_metrics->pcie_link_width = use_metrics_v2 ? metrics_v2->PcieWidth : metrics->PcieWidth; + gpu_metrics->pcie_link_speed = link_speed[use_metrics_v2 ? metrics_v2->PcieRate : metrics->PcieRate]; } else { gpu_metrics->pcie_link_width = smu_v11_0_get_current_pcie_link_width(smu); @@ -3743,6 +3715,8 @@ static ssize_t sienna_cichlid_get_gpu_metrics(struct smu_context *smu, smu_v11_0_get_current_pcie_link_speed(smu); } + mutex_unlock(&smu->metrics_lock); + gpu_metrics->system_clock_counter = ktime_get_boottime_ns(); *table = (void *)gpu_metrics; @@ -3878,7 +3852,8 @@ static const struct pptable_funcs sienna_cichlid_ppt_funcs = { .display_config_changed = sienna_cichlid_display_config_changed, .notify_smc_display_config = sienna_cichlid_notify_smc_display_config, .is_dpm_running = sienna_cichlid_is_dpm_running, - .get_fan_speed_percent = sienna_cichlid_get_fan_speed_percent, + .get_fan_speed_pwm = smu_v11_0_get_fan_speed_pwm, + .get_fan_speed_rpm = sienna_cichlid_get_fan_speed_rpm, .get_power_profile_mode = sienna_cichlid_get_power_profile_mode, .set_power_profile_mode = sienna_cichlid_set_power_profile_mode, .set_watermarks_table = sienna_cichlid_set_watermarks_table, @@ -3921,7 +3896,8 @@ static const struct pptable_funcs sienna_cichlid_ppt_funcs = { .display_clock_voltage_request = smu_v11_0_display_clock_voltage_request, .get_fan_control_mode = smu_v11_0_get_fan_control_mode, .set_fan_control_mode = smu_v11_0_set_fan_control_mode, - .set_fan_speed_percent = smu_v11_0_set_fan_speed_percent, + .set_fan_speed_pwm = smu_v11_0_set_fan_speed_pwm, + .set_fan_speed_rpm = smu_v11_0_set_fan_speed_rpm, .set_xgmi_pstate = smu_v11_0_set_xgmi_pstate, .gfx_off_control = smu_v11_0_gfx_off_control, .register_irq_handler = smu_v11_0_register_irq_handler, @@ -3938,6 +3914,7 @@ static const struct pptable_funcs sienna_cichlid_ppt_funcs = { .set_soft_freq_limited_range = smu_v11_0_set_soft_freq_limited_range, .set_default_od_settings = sienna_cichlid_set_default_od_settings, .od_edit_dpm_table = sienna_cichlid_od_edit_dpm_table, + .restore_user_od_settings = smu_v11_0_restore_user_od_settings, .run_btc = sienna_cichlid_run_btc, .set_power_source = smu_v11_0_set_power_source, .get_pp_feature_mask = smu_cmn_get_pp_feature_mask, diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c index 0a5d46ac9ccd..87b055466a33 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c @@ -266,6 +266,9 @@ int smu_v11_0_check_fw_version(struct smu_context *smu) case CHIP_BEIGE_GOBY: smu->smc_driver_if_version = SMU11_DRIVER_IF_VERSION_Beige_Goby; break; + case CHIP_CYAN_SKILLFISH: + smu->smc_driver_if_version = SMU11_DRIVER_IF_VERSION_Cyan_Skillfish; + break; default: dev_err(smu->adev->dev, "smu unsupported asic type:%d.\n", smu->adev->asic_type); smu->smc_driver_if_version = SMU11_DRIVER_IF_VERSION_INV; @@ -422,10 +425,20 @@ int smu_v11_0_init_smc_tables(struct smu_context *smu) ret = -ENOMEM; goto err3_out; } + + smu_table->user_overdrive_table = + kzalloc(tables[SMU_TABLE_OVERDRIVE].size, GFP_KERNEL); + if (!smu_table->user_overdrive_table) { + ret = -ENOMEM; + goto err4_out; + } + } return 0; +err4_out: + kfree(smu_table->boot_overdrive_table); err3_out: kfree(smu_table->overdrive_table); err2_out: @@ -442,12 +455,14 @@ int smu_v11_0_fini_smc_tables(struct smu_context *smu) struct smu_dpm_context *smu_dpm = &smu->smu_dpm; kfree(smu_table->gpu_metrics_table); + kfree(smu_table->user_overdrive_table); kfree(smu_table->boot_overdrive_table); kfree(smu_table->overdrive_table); kfree(smu_table->max_sustainable_clocks); kfree(smu_table->driver_pptable); kfree(smu_table->clocks_table); smu_table->gpu_metrics_table = NULL; + smu_table->user_overdrive_table = NULL; smu_table->boot_overdrive_table = NULL; smu_table->overdrive_table = NULL; smu_table->max_sustainable_clocks = NULL; @@ -1185,17 +1200,13 @@ smu_v11_0_set_fan_static_mode(struct smu_context *smu, uint32_t mode) } int -smu_v11_0_set_fan_speed_percent(struct smu_context *smu, uint32_t speed) +smu_v11_0_set_fan_speed_pwm(struct smu_context *smu, uint32_t speed) { struct amdgpu_device *adev = smu->adev; uint32_t duty100, duty; uint64_t tmp64; - if (speed > 100) - speed = 100; - - if (smu_v11_0_auto_fan_control(smu, 0)) - return -EINVAL; + speed = MIN(speed, 255); duty100 = REG_GET_FIELD(RREG32_SOC15(THM, 0, mmCG_FDO_CTRL1), CG_FDO_CTRL1, FMAX_DUTY100); @@ -1203,7 +1214,7 @@ smu_v11_0_set_fan_speed_percent(struct smu_context *smu, uint32_t speed) return -EINVAL; tmp64 = (uint64_t)speed * duty100; - do_div(tmp64, 100); + do_div(tmp64, 255); duty = (uint32_t)tmp64; WREG32_SOC15(THM, 0, mmCG_FDO_CTRL0, @@ -1213,6 +1224,99 @@ smu_v11_0_set_fan_speed_percent(struct smu_context *smu, uint32_t speed) return smu_v11_0_set_fan_static_mode(smu, FDO_PWM_MODE_STATIC); } +int smu_v11_0_set_fan_speed_rpm(struct smu_context *smu, + uint32_t speed) +{ + struct amdgpu_device *adev = smu->adev; + /* + * crystal_clock_freq used for fan speed rpm calculation is + * always 25Mhz. So, hardcode it as 2500(in 10K unit). + */ + uint32_t crystal_clock_freq = 2500; + uint32_t tach_period; + + /* + * To prevent from possible overheat, some ASICs may have requirement + * for minimum fan speed: + * - For some NV10 SKU, the fan speed cannot be set lower than + * 700 RPM. + * - For some Sienna Cichlid SKU, the fan speed cannot be set + * lower than 500 RPM. + */ + tach_period = 60 * crystal_clock_freq * 10000 / (8 * speed); + WREG32_SOC15(THM, 0, mmCG_TACH_CTRL, + REG_SET_FIELD(RREG32_SOC15(THM, 0, mmCG_TACH_CTRL), + CG_TACH_CTRL, TARGET_PERIOD, + tach_period)); + + return smu_v11_0_set_fan_static_mode(smu, FDO_PWM_MODE_STATIC_RPM); +} + +int smu_v11_0_get_fan_speed_pwm(struct smu_context *smu, + uint32_t *speed) +{ + struct amdgpu_device *adev = smu->adev; + uint32_t duty100, duty; + uint64_t tmp64; + + /* + * For pre Sienna Cichlid ASICs, the 0 RPM may be not correctly + * detected via register retrieving. To workaround this, we will + * report the fan speed as 0 PWM if user just requested such. + */ + if ((smu->user_dpm_profile.flags & SMU_CUSTOM_FAN_SPEED_PWM) + && !smu->user_dpm_profile.fan_speed_pwm) { + *speed = 0; + return 0; + } + + duty100 = REG_GET_FIELD(RREG32_SOC15(THM, 0, mmCG_FDO_CTRL1), + CG_FDO_CTRL1, FMAX_DUTY100); + duty = REG_GET_FIELD(RREG32_SOC15(THM, 0, mmCG_THERMAL_STATUS), + CG_THERMAL_STATUS, FDO_PWM_DUTY); + if (!duty100) + return -EINVAL; + + tmp64 = (uint64_t)duty * 255; + do_div(tmp64, duty100); + *speed = MIN((uint32_t)tmp64, 255); + + return 0; +} + +int smu_v11_0_get_fan_speed_rpm(struct smu_context *smu, + uint32_t *speed) +{ + struct amdgpu_device *adev = smu->adev; + uint32_t crystal_clock_freq = 2500; + uint32_t tach_status; + uint64_t tmp64; + + /* + * For pre Sienna Cichlid ASICs, the 0 RPM may be not correctly + * detected via register retrieving. To workaround this, we will + * report the fan speed as 0 RPM if user just requested such. + */ + if ((smu->user_dpm_profile.flags & SMU_CUSTOM_FAN_SPEED_RPM) + && !smu->user_dpm_profile.fan_speed_rpm) { + *speed = 0; + return 0; + } + + tmp64 = (uint64_t)crystal_clock_freq * 60 * 10000; + + tach_status = RREG32_SOC15(THM, 0, mmCG_TACH_STATUS); + if (tach_status) { + do_div(tmp64, tach_status); + *speed = (uint32_t)tmp64; + } else { + dev_warn_once(adev->dev, "Got zero output on CG_TACH_STATUS reading!\n"); + *speed = 0; + } + + return 0; +} + int smu_v11_0_set_fan_control_mode(struct smu_context *smu, uint32_t mode) @@ -1221,7 +1325,9 @@ smu_v11_0_set_fan_control_mode(struct smu_context *smu, switch (mode) { case AMD_FAN_CTRL_NONE: - ret = smu_v11_0_set_fan_speed_percent(smu, 100); + ret = smu_v11_0_auto_fan_control(smu, 0); + if (!ret) + ret = smu_v11_0_set_fan_speed_pwm(smu, 255); break; case AMD_FAN_CTRL_MANUAL: ret = smu_v11_0_auto_fan_control(smu, 0); @@ -2101,3 +2207,16 @@ int smu_v11_0_deep_sleep_control(struct smu_context *smu, return ret; } + +int smu_v11_0_restore_user_od_settings(struct smu_context *smu) +{ + struct smu_table_context *table_context = &smu->smu_table; + void *user_od_table = table_context->user_overdrive_table; + int ret = 0; + + ret = smu_cmn_update_table(smu, SMU_TABLE_OVERDRIVE, 0, (void *)user_od_table, true); + if (ret) + dev_err(smu->adev->dev, "Failed to import overdrive table!\n"); + + return ret; +} diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c index bcaaa086fc2f..6eb50b05a33c 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c @@ -592,28 +592,28 @@ static int vangogh_print_legacy_clk_levels(struct smu_context *smu, switch (clk_type) { case SMU_OD_SCLK: if (smu_dpm_ctx->dpm_level == AMD_DPM_FORCED_LEVEL_MANUAL) { - size = sprintf(buf, "%s:\n", "OD_SCLK"); - size += sprintf(buf + size, "0: %10uMhz\n", + size = sysfs_emit(buf, "%s:\n", "OD_SCLK"); + size += sysfs_emit_at(buf, size, "0: %10uMhz\n", (smu->gfx_actual_hard_min_freq > 0) ? smu->gfx_actual_hard_min_freq : smu->gfx_default_hard_min_freq); - size += sprintf(buf + size, "1: %10uMhz\n", + size += sysfs_emit_at(buf, size, "1: %10uMhz\n", (smu->gfx_actual_soft_max_freq > 0) ? smu->gfx_actual_soft_max_freq : smu->gfx_default_soft_max_freq); } break; case SMU_OD_CCLK: if (smu_dpm_ctx->dpm_level == AMD_DPM_FORCED_LEVEL_MANUAL) { - size = sprintf(buf, "CCLK_RANGE in Core%d:\n", smu->cpu_core_id_select); - size += sprintf(buf + size, "0: %10uMhz\n", + size = sysfs_emit(buf, "CCLK_RANGE in Core%d:\n", smu->cpu_core_id_select); + size += sysfs_emit_at(buf, size, "0: %10uMhz\n", (smu->cpu_actual_soft_min_freq > 0) ? smu->cpu_actual_soft_min_freq : smu->cpu_default_soft_min_freq); - size += sprintf(buf + size, "1: %10uMhz\n", + size += sysfs_emit_at(buf, size, "1: %10uMhz\n", (smu->cpu_actual_soft_max_freq > 0) ? smu->cpu_actual_soft_max_freq : smu->cpu_default_soft_max_freq); } break; case SMU_OD_RANGE: if (smu_dpm_ctx->dpm_level == AMD_DPM_FORCED_LEVEL_MANUAL) { - size = sprintf(buf, "%s:\n", "OD_RANGE"); - size += sprintf(buf + size, "SCLK: %7uMhz %10uMhz\n", + size = sysfs_emit(buf, "%s:\n", "OD_RANGE"); + size += sysfs_emit_at(buf, size, "SCLK: %7uMhz %10uMhz\n", smu->gfx_default_hard_min_freq, smu->gfx_default_soft_max_freq); - size += sprintf(buf + size, "CCLK: %7uMhz %10uMhz\n", + size += sysfs_emit_at(buf, size, "CCLK: %7uMhz %10uMhz\n", smu->cpu_default_soft_min_freq, smu->cpu_default_soft_max_freq); } break; @@ -656,14 +656,14 @@ static int vangogh_print_legacy_clk_levels(struct smu_context *smu, return ret; if (!value) continue; - size += sprintf(buf + size, "%d: %uMhz %s\n", i, value, + size += sysfs_emit_at(buf, size, "%d: %uMhz %s\n", i, value, cur_value == value ? "*" : ""); if (cur_value == value) cur_value_match_level = true; } if (!cur_value_match_level) - size += sprintf(buf + size, " %uMhz *\n", cur_value); + size += sysfs_emit_at(buf, size, " %uMhz *\n", cur_value); break; default: break; @@ -691,28 +691,28 @@ static int vangogh_print_clk_levels(struct smu_context *smu, switch (clk_type) { case SMU_OD_SCLK: if (smu_dpm_ctx->dpm_level == AMD_DPM_FORCED_LEVEL_MANUAL) { - size = sprintf(buf, "%s:\n", "OD_SCLK"); - size += sprintf(buf + size, "0: %10uMhz\n", + size = sysfs_emit(buf, "%s:\n", "OD_SCLK"); + size += sysfs_emit_at(buf, size, "0: %10uMhz\n", (smu->gfx_actual_hard_min_freq > 0) ? smu->gfx_actual_hard_min_freq : smu->gfx_default_hard_min_freq); - size += sprintf(buf + size, "1: %10uMhz\n", + size += sysfs_emit_at(buf, size, "1: %10uMhz\n", (smu->gfx_actual_soft_max_freq > 0) ? smu->gfx_actual_soft_max_freq : smu->gfx_default_soft_max_freq); } break; case SMU_OD_CCLK: if (smu_dpm_ctx->dpm_level == AMD_DPM_FORCED_LEVEL_MANUAL) { - size = sprintf(buf, "CCLK_RANGE in Core%d:\n", smu->cpu_core_id_select); - size += sprintf(buf + size, "0: %10uMhz\n", + size = sysfs_emit(buf, "CCLK_RANGE in Core%d:\n", smu->cpu_core_id_select); + size += sysfs_emit_at(buf, size, "0: %10uMhz\n", (smu->cpu_actual_soft_min_freq > 0) ? smu->cpu_actual_soft_min_freq : smu->cpu_default_soft_min_freq); - size += sprintf(buf + size, "1: %10uMhz\n", + size += sysfs_emit_at(buf, size, "1: %10uMhz\n", (smu->cpu_actual_soft_max_freq > 0) ? smu->cpu_actual_soft_max_freq : smu->cpu_default_soft_max_freq); } break; case SMU_OD_RANGE: if (smu_dpm_ctx->dpm_level == AMD_DPM_FORCED_LEVEL_MANUAL) { - size = sprintf(buf, "%s:\n", "OD_RANGE"); - size += sprintf(buf + size, "SCLK: %7uMhz %10uMhz\n", + size = sysfs_emit(buf, "%s:\n", "OD_RANGE"); + size += sysfs_emit_at(buf, size, "SCLK: %7uMhz %10uMhz\n", smu->gfx_default_hard_min_freq, smu->gfx_default_soft_max_freq); - size += sprintf(buf + size, "CCLK: %7uMhz %10uMhz\n", + size += sysfs_emit_at(buf, size, "CCLK: %7uMhz %10uMhz\n", smu->cpu_default_soft_min_freq, smu->cpu_default_soft_max_freq); } break; @@ -755,14 +755,14 @@ static int vangogh_print_clk_levels(struct smu_context *smu, return ret; if (!value) continue; - size += sprintf(buf + size, "%d: %uMhz %s\n", i, value, + size += sysfs_emit_at(buf, size, "%d: %uMhz %s\n", i, value, cur_value == value ? "*" : ""); if (cur_value == value) cur_value_match_level = true; } if (!cur_value_match_level) - size += sprintf(buf + size, " %uMhz *\n", cur_value); + size += sysfs_emit_at(buf, size, " %uMhz *\n", cur_value); break; default: break; @@ -1035,7 +1035,7 @@ static int vangogh_get_power_profile_mode(struct smu_context *smu, if (workload_type < 0) continue; - size += sprintf(buf + size, "%2d %14s%s\n", + size += sysfs_emit_at(buf, size, "%2d %14s%s\n", i, profile_name[i], (i == smu->power_profile_mode) ? "*" : " "); } diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c index 9a9c24a6ec35..b39138041141 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c @@ -510,16 +510,16 @@ static int renoir_print_clk_levels(struct smu_context *smu, 0, &max); if (ret) return ret; - size += sprintf(buf + size, "OD_RANGE\nSCLK: %10uMhz %10uMhz\n", min, max); + size += sysfs_emit_at(buf, size, "OD_RANGE\nSCLK: %10uMhz %10uMhz\n", min, max); } break; case SMU_OD_SCLK: if (smu_dpm_ctx->dpm_level == AMD_DPM_FORCED_LEVEL_MANUAL) { min = (smu->gfx_actual_hard_min_freq > 0) ? smu->gfx_actual_hard_min_freq : smu->gfx_default_hard_min_freq; max = (smu->gfx_actual_soft_max_freq > 0) ? smu->gfx_actual_soft_max_freq : smu->gfx_default_soft_max_freq; - size += sprintf(buf + size, "OD_SCLK\n"); - size += sprintf(buf + size, "0:%10uMhz\n", min); - size += sprintf(buf + size, "1:%10uMhz\n", max); + size += sysfs_emit_at(buf, size, "OD_SCLK\n"); + size += sysfs_emit_at(buf, size, "0:%10uMhz\n", min); + size += sysfs_emit_at(buf, size, "1:%10uMhz\n", max); } break; case SMU_GFXCLK: @@ -536,12 +536,12 @@ static int renoir_print_clk_levels(struct smu_context *smu, else i = 1; - size += sprintf(buf + size, "0: %uMhz %s\n", min, + size += sysfs_emit_at(buf, size, "0: %uMhz %s\n", min, i == 0 ? "*" : ""); - size += sprintf(buf + size, "1: %uMhz %s\n", + size += sysfs_emit_at(buf, size, "1: %uMhz %s\n", i == 1 ? cur_value : RENOIR_UMD_PSTATE_GFXCLK, i == 1 ? "*" : ""); - size += sprintf(buf + size, "2: %uMhz %s\n", max, + size += sysfs_emit_at(buf, size, "2: %uMhz %s\n", max, i == 2 ? "*" : ""); } return size; @@ -588,14 +588,14 @@ static int renoir_print_clk_levels(struct smu_context *smu, return ret; if (!value) continue; - size += sprintf(buf + size, "%d: %uMhz %s\n", i, value, + size += sysfs_emit_at(buf, size, "%d: %uMhz %s\n", i, value, cur_value == value ? "*" : ""); if (cur_value == value) cur_value_match_level = true; } if (!cur_value_match_level) - size += sprintf(buf + size, " %uMhz *\n", cur_value); + size += sysfs_emit_at(buf, size, " %uMhz *\n", cur_value); break; default: @@ -1118,7 +1118,7 @@ static int renoir_get_power_profile_mode(struct smu_context *smu, if (workload_type < 0) continue; - size += sprintf(buf + size, "%2d %14s%s\n", + size += sysfs_emit_at(buf, size, "%2d %14s%s\n", i, profile_name[i], (i == smu->power_profile_mode) ? "*" : " "); } diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c index cb5485cf243f..ec8c30daf31c 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c @@ -90,8 +90,8 @@ static const struct cmn2asic_msg_mapping aldebaran_message_map[SMU_MSG_MAX_COUNT MSG_MAP(GetDriverIfVersion, PPSMC_MSG_GetDriverIfVersion, 1), MSG_MAP(EnableAllSmuFeatures, PPSMC_MSG_EnableAllSmuFeatures, 0), MSG_MAP(DisableAllSmuFeatures, PPSMC_MSG_DisableAllSmuFeatures, 0), - MSG_MAP(GetEnabledSmuFeaturesLow, PPSMC_MSG_GetEnabledSmuFeaturesLow, 0), - MSG_MAP(GetEnabledSmuFeaturesHigh, PPSMC_MSG_GetEnabledSmuFeaturesHigh, 0), + MSG_MAP(GetEnabledSmuFeaturesLow, PPSMC_MSG_GetEnabledSmuFeaturesLow, 1), + MSG_MAP(GetEnabledSmuFeaturesHigh, PPSMC_MSG_GetEnabledSmuFeaturesHigh, 1), MSG_MAP(SetDriverDramAddrHigh, PPSMC_MSG_SetDriverDramAddrHigh, 1), MSG_MAP(SetDriverDramAddrLow, PPSMC_MSG_SetDriverDramAddrLow, 1), MSG_MAP(SetToolsDramAddrHigh, PPSMC_MSG_SetToolsDramAddrHigh, 0), @@ -150,20 +150,20 @@ static const struct cmn2asic_mapping aldebaran_clk_map[SMU_CLK_COUNT] = { }; static const struct cmn2asic_mapping aldebaran_feature_mask_map[SMU_FEATURE_COUNT] = { - ALDEBARAN_FEA_MAP(SMU_FEATURE_DPM_PREFETCHER_BIT, FEATURE_DATA_CALCULATIONS), + ALDEBARAN_FEA_MAP(SMU_FEATURE_DATA_CALCULATIONS_BIT, FEATURE_DATA_CALCULATIONS), ALDEBARAN_FEA_MAP(SMU_FEATURE_DPM_GFXCLK_BIT, FEATURE_DPM_GFXCLK_BIT), ALDEBARAN_FEA_MAP(SMU_FEATURE_DPM_UCLK_BIT, FEATURE_DPM_UCLK_BIT), ALDEBARAN_FEA_MAP(SMU_FEATURE_DPM_SOCCLK_BIT, FEATURE_DPM_SOCCLK_BIT), ALDEBARAN_FEA_MAP(SMU_FEATURE_DPM_FCLK_BIT, FEATURE_DPM_FCLK_BIT), ALDEBARAN_FEA_MAP(SMU_FEATURE_DPM_LCLK_BIT, FEATURE_DPM_LCLK_BIT), - ALDEBARAN_FEA_MAP(SMU_FEATURE_XGMI_BIT, FEATURE_DPM_XGMI_BIT), + ALDEBARAN_FEA_MAP(SMU_FEATURE_DPM_XGMI_BIT, FEATURE_DPM_XGMI_BIT), ALDEBARAN_FEA_MAP(SMU_FEATURE_DS_GFXCLK_BIT, FEATURE_DS_GFXCLK_BIT), ALDEBARAN_FEA_MAP(SMU_FEATURE_DS_SOCCLK_BIT, FEATURE_DS_SOCCLK_BIT), ALDEBARAN_FEA_MAP(SMU_FEATURE_DS_LCLK_BIT, FEATURE_DS_LCLK_BIT), ALDEBARAN_FEA_MAP(SMU_FEATURE_DS_FCLK_BIT, FEATURE_DS_FCLK_BIT), ALDEBARAN_FEA_MAP(SMU_FEATURE_DS_UCLK_BIT, FEATURE_DS_UCLK_BIT), ALDEBARAN_FEA_MAP(SMU_FEATURE_GFX_SS_BIT, FEATURE_GFX_SS_BIT), - ALDEBARAN_FEA_MAP(SMU_FEATURE_VCN_PG_BIT, FEATURE_DPM_VCN_BIT), + ALDEBARAN_FEA_MAP(SMU_FEATURE_VCN_DPM_BIT, FEATURE_DPM_VCN_BIT), ALDEBARAN_FEA_MAP(SMU_FEATURE_RSMU_SMN_CG_BIT, FEATURE_RSMU_SMN_CG_BIT), ALDEBARAN_FEA_MAP(SMU_FEATURE_WAFL_CG_BIT, FEATURE_WAFL_CG_BIT), ALDEBARAN_FEA_MAP(SMU_FEATURE_PPT_BIT, FEATURE_PPT_BIT), @@ -735,14 +735,14 @@ static int aldebaran_print_clk_levels(struct smu_context *smu, uint32_t min_clk, max_clk; if (amdgpu_ras_intr_triggered()) - return snprintf(buf, PAGE_SIZE, "unavailable\n"); + return sysfs_emit(buf, "unavailable\n"); dpm_context = smu_dpm->dpm_context; switch (type) { case SMU_OD_SCLK: - size = sprintf(buf, "%s:\n", "GFXCLK"); + size = sysfs_emit(buf, "%s:\n", "GFXCLK"); fallthrough; case SMU_SCLK: ret = aldebaran_get_current_clk_freq_by_table(smu, SMU_GFXCLK, &now); @@ -779,8 +779,7 @@ static int aldebaran_print_clk_levels(struct smu_context *smu, */ if (display_levels == clocks.num_levels) { for (i = 0; i < clocks.num_levels; i++) - size += sprintf( - buf + size, "%d: %uMhz %s\n", i, + size += sysfs_emit_at(buf, size, "%d: %uMhz %s\n", i, freq_values[i], (clocks.num_levels == 1) ? "*" : @@ -790,14 +789,14 @@ static int aldebaran_print_clk_levels(struct smu_context *smu, "")); } else { for (i = 0; i < display_levels; i++) - size += sprintf(buf + size, "%d: %uMhz %s\n", i, + size += sysfs_emit_at(buf, size, "%d: %uMhz %s\n", i, freq_values[i], i == 1 ? "*" : ""); } break; case SMU_OD_MCLK: - size = sprintf(buf, "%s:\n", "MCLK"); + size = sysfs_emit(buf, "%s:\n", "MCLK"); fallthrough; case SMU_MCLK: ret = aldebaran_get_current_clk_freq_by_table(smu, SMU_UCLK, &now); @@ -814,7 +813,7 @@ static int aldebaran_print_clk_levels(struct smu_context *smu, } for (i = 0; i < clocks.num_levels; i++) - size += sprintf(buf + size, "%d: %uMhz %s\n", + size += sysfs_emit_at(buf, size, "%d: %uMhz %s\n", i, clocks.data[i].clocks_in_khz / 1000, (clocks.num_levels == 1) ? "*" : (aldebaran_freqs_in_same_level( @@ -837,7 +836,7 @@ static int aldebaran_print_clk_levels(struct smu_context *smu, } for (i = 0; i < clocks.num_levels; i++) - size += sprintf(buf + size, "%d: %uMhz %s\n", + size += sysfs_emit_at(buf, size, "%d: %uMhz %s\n", i, clocks.data[i].clocks_in_khz / 1000, (clocks.num_levels == 1) ? "*" : (aldebaran_freqs_in_same_level( @@ -860,7 +859,7 @@ static int aldebaran_print_clk_levels(struct smu_context *smu, } for (i = 0; i < single_dpm_table->count; i++) - size += sprintf(buf + size, "%d: %uMhz %s\n", + size += sysfs_emit_at(buf, size, "%d: %uMhz %s\n", i, single_dpm_table->dpm_levels[i].value, (clocks.num_levels == 1) ? "*" : (aldebaran_freqs_in_same_level( @@ -883,7 +882,7 @@ static int aldebaran_print_clk_levels(struct smu_context *smu, } for (i = 0; i < single_dpm_table->count; i++) - size += sprintf(buf + size, "%d: %uMhz %s\n", + size += sysfs_emit_at(buf, size, "%d: %uMhz %s\n", i, single_dpm_table->dpm_levels[i].value, (clocks.num_levels == 1) ? "*" : (aldebaran_freqs_in_same_level( @@ -906,7 +905,7 @@ static int aldebaran_print_clk_levels(struct smu_context *smu, } for (i = 0; i < single_dpm_table->count; i++) - size += sprintf(buf + size, "%d: %uMhz %s\n", + size += sysfs_emit_at(buf, size, "%d: %uMhz %s\n", i, single_dpm_table->dpm_levels[i].value, (clocks.num_levels == 1) ? "*" : (aldebaran_freqs_in_same_level( @@ -1194,8 +1193,19 @@ static int aldebaran_get_power_limit(struct smu_context *smu, uint32_t power_limit = 0; int ret; - if (!smu_cmn_feature_is_enabled(smu, SMU_FEATURE_PPT_BIT)) - return -EINVAL; + if (!smu_cmn_feature_is_enabled(smu, SMU_FEATURE_PPT_BIT)) { + if (current_power_limit) + *current_power_limit = 0; + if (default_power_limit) + *default_power_limit = 0; + if (max_power_limit) + *max_power_limit = 0; + + dev_warn(smu->adev->dev, + "PPT feature is not enabled, power values can't be fetched."); + + return 0; + } /* Valid power data is available only from primary die. * For secondary die show the value as 0. @@ -1451,197 +1461,77 @@ static bool aldebaran_is_dpm_running(struct smu_context *smu) return !!(feature_enabled & SMC_DPM_FEATURE); } -static void aldebaran_fill_i2c_req(SwI2cRequest_t *req, bool write, - uint8_t address, uint32_t numbytes, - uint8_t *data) -{ - int i; - - req->I2CcontrollerPort = 0; - req->I2CSpeed = 2; - req->SlaveAddress = address; - req->NumCmds = numbytes; - - for (i = 0; i < numbytes; i++) { - SwI2cCmd_t *cmd = &req->SwI2cCmds[i]; - - /* First 2 bytes are always write for lower 2b EEPROM address */ - if (i < 2) - cmd->CmdConfig = CMDCONFIG_READWRITE_MASK; - else - cmd->CmdConfig = write ? CMDCONFIG_READWRITE_MASK : 0; - - - /* Add RESTART for read after address filled */ - cmd->CmdConfig |= (i == 2 && !write) ? CMDCONFIG_RESTART_MASK : 0; - - /* Add STOP in the end */ - cmd->CmdConfig |= (i == (numbytes - 1)) ? CMDCONFIG_STOP_MASK : 0; - - /* Fill with data regardless if read or write to simplify code */ - cmd->ReadWriteData = data[i]; - } -} - -static int aldebaran_i2c_read_data(struct i2c_adapter *control, - uint8_t address, - uint8_t *data, - uint32_t numbytes) +static int aldebaran_i2c_xfer(struct i2c_adapter *i2c_adap, + struct i2c_msg *msg, int num_msgs) { - uint32_t i, ret = 0; - SwI2cRequest_t req; - struct amdgpu_device *adev = to_amdgpu_device(control); + struct amdgpu_device *adev = to_amdgpu_device(i2c_adap); struct smu_table_context *smu_table = &adev->smu.smu_table; struct smu_table *table = &smu_table->driver_table; + SwI2cRequest_t *req, *res = (SwI2cRequest_t *)table->cpu_addr; + int i, j, r, c; + u16 dir; - if (numbytes > MAX_SW_I2C_COMMANDS) { - dev_err(adev->dev, "numbytes requested %d is over max allowed %d\n", - numbytes, MAX_SW_I2C_COMMANDS); - return -EINVAL; - } - - memset(&req, 0, sizeof(req)); - aldebaran_fill_i2c_req(&req, false, address, numbytes, data); - - mutex_lock(&adev->smu.mutex); - /* Now read data starting with that address */ - ret = smu_cmn_update_table(&adev->smu, SMU_TABLE_I2C_COMMANDS, 0, &req, - true); - mutex_unlock(&adev->smu.mutex); - - if (!ret) { - SwI2cRequest_t *res = (SwI2cRequest_t *)table->cpu_addr; - - /* Assume SMU fills res.SwI2cCmds[i].Data with read bytes */ - for (i = 0; i < numbytes; i++) - data[i] = res->SwI2cCmds[i].ReadWriteData; - - dev_dbg(adev->dev, "aldebaran_i2c_read_data, address = %x, bytes = %d, data :", - (uint16_t)address, numbytes); - - print_hex_dump(KERN_DEBUG, "data: ", DUMP_PREFIX_NONE, - 8, 1, data, numbytes, false); - } else - dev_err(adev->dev, "aldebaran_i2c_read_data - error occurred :%x", ret); + req = kzalloc(sizeof(*req), GFP_KERNEL); + if (!req) + return -ENOMEM; - return ret; -} + req->I2CcontrollerPort = 0; + req->I2CSpeed = I2C_SPEED_FAST_400K; + req->SlaveAddress = msg[0].addr << 1; /* wants an 8-bit address */ + dir = msg[0].flags & I2C_M_RD; + + for (c = i = 0; i < num_msgs; i++) { + for (j = 0; j < msg[i].len; j++, c++) { + SwI2cCmd_t *cmd = &req->SwI2cCmds[c]; + + if (!(msg[i].flags & I2C_M_RD)) { + /* write */ + cmd->CmdConfig |= CMDCONFIG_READWRITE_MASK; + cmd->ReadWriteData = msg[i].buf[j]; + } -static int aldebaran_i2c_write_data(struct i2c_adapter *control, - uint8_t address, - uint8_t *data, - uint32_t numbytes) -{ - uint32_t ret; - SwI2cRequest_t req; - struct amdgpu_device *adev = to_amdgpu_device(control); + if ((dir ^ msg[i].flags) & I2C_M_RD) { + /* The direction changes. + */ + dir = msg[i].flags & I2C_M_RD; + cmd->CmdConfig |= CMDCONFIG_RESTART_MASK; + } - if (numbytes > MAX_SW_I2C_COMMANDS) { - dev_err(adev->dev, "numbytes requested %d is over max allowed %d\n", - numbytes, MAX_SW_I2C_COMMANDS); - return -EINVAL; + req->NumCmds++; + + /* + * Insert STOP if we are at the last byte of either last + * message for the transaction or the client explicitly + * requires a STOP at this particular message. + */ + if ((j == msg[i].len - 1) && + ((i == num_msgs - 1) || (msg[i].flags & I2C_M_STOP))) { + cmd->CmdConfig &= ~CMDCONFIG_RESTART_MASK; + cmd->CmdConfig |= CMDCONFIG_STOP_MASK; + } + } } - - memset(&req, 0, sizeof(req)); - aldebaran_fill_i2c_req(&req, true, address, numbytes, data); - mutex_lock(&adev->smu.mutex); - ret = smu_cmn_update_table(&adev->smu, SMU_TABLE_I2C_COMMANDS, 0, &req, true); + r = smu_cmn_update_table(&adev->smu, SMU_TABLE_I2C_COMMANDS, 0, req, true); mutex_unlock(&adev->smu.mutex); + if (r) + goto fail; - if (!ret) { - dev_dbg(adev->dev, "aldebaran_i2c_write(), address = %x, bytes = %d , data: ", - (uint16_t)address, numbytes); - - print_hex_dump(KERN_DEBUG, "data: ", DUMP_PREFIX_NONE, - 8, 1, data, numbytes, false); - /* - * According to EEPROM spec there is a MAX of 10 ms required for - * EEPROM to flush internal RX buffer after STOP was issued at the - * end of write transaction. During this time the EEPROM will not be - * responsive to any more commands - so wait a bit more. - */ - msleep(10); - - } else - dev_err(adev->dev, "aldebaran_i2c_write- error occurred :%x", ret); - - return ret; -} - -static int aldebaran_i2c_xfer(struct i2c_adapter *i2c_adap, - struct i2c_msg *msgs, int num) -{ - uint32_t i, j, ret, data_size, data_chunk_size, next_eeprom_addr = 0; - uint8_t *data_ptr, data_chunk[MAX_SW_I2C_COMMANDS] = { 0 }; - - for (i = 0; i < num; i++) { - /* - * SMU interface allows at most MAX_SW_I2C_COMMANDS bytes of data at - * once and hence the data needs to be spliced into chunks and sent each - * chunk separately - */ - data_size = msgs[i].len - 2; - data_chunk_size = MAX_SW_I2C_COMMANDS - 2; - next_eeprom_addr = (msgs[i].buf[0] << 8 & 0xff00) | (msgs[i].buf[1] & 0xff); - data_ptr = msgs[i].buf + 2; - - for (j = 0; j < data_size / data_chunk_size; j++) { - /* Insert the EEPROM dest addess, bits 0-15 */ - data_chunk[0] = ((next_eeprom_addr >> 8) & 0xff); - data_chunk[1] = (next_eeprom_addr & 0xff); - - if (msgs[i].flags & I2C_M_RD) { - ret = aldebaran_i2c_read_data(i2c_adap, - (uint8_t)msgs[i].addr, - data_chunk, MAX_SW_I2C_COMMANDS); - - memcpy(data_ptr, data_chunk + 2, data_chunk_size); - } else { - - memcpy(data_chunk + 2, data_ptr, data_chunk_size); - - ret = aldebaran_i2c_write_data(i2c_adap, - (uint8_t)msgs[i].addr, - data_chunk, MAX_SW_I2C_COMMANDS); - } - - if (ret) { - num = -EIO; - goto fail; - } - - next_eeprom_addr += data_chunk_size; - data_ptr += data_chunk_size; + for (c = i = 0; i < num_msgs; i++) { + if (!(msg[i].flags & I2C_M_RD)) { + c += msg[i].len; + continue; } + for (j = 0; j < msg[i].len; j++, c++) { + SwI2cCmd_t *cmd = &res->SwI2cCmds[c]; - if (data_size % data_chunk_size) { - data_chunk[0] = ((next_eeprom_addr >> 8) & 0xff); - data_chunk[1] = (next_eeprom_addr & 0xff); - - if (msgs[i].flags & I2C_M_RD) { - ret = aldebaran_i2c_read_data(i2c_adap, - (uint8_t)msgs[i].addr, - data_chunk, (data_size % data_chunk_size) + 2); - - memcpy(data_ptr, data_chunk + 2, data_size % data_chunk_size); - } else { - memcpy(data_chunk + 2, data_ptr, data_size % data_chunk_size); - - ret = aldebaran_i2c_write_data(i2c_adap, - (uint8_t)msgs[i].addr, - data_chunk, (data_size % data_chunk_size) + 2); - } - - if (ret) { - num = -EIO; - goto fail; - } + msg[i].buf[j] = cmd->ReadWriteData; } } - + r = num_msgs; fail: - return num; + kfree(req); + return r; } static u32 aldebaran_i2c_func(struct i2c_adapter *adap) @@ -1655,6 +1545,14 @@ static const struct i2c_algorithm aldebaran_i2c_algo = { .functionality = aldebaran_i2c_func, }; +static const struct i2c_adapter_quirks aldebaran_i2c_control_quirks = { + .flags = I2C_AQ_COMB | I2C_AQ_COMB_SAME_ADDR | I2C_AQ_NO_ZERO_LEN, + .max_read_len = MAX_SW_I2C_COMMANDS, + .max_write_len = MAX_SW_I2C_COMMANDS, + .max_comb_1st_msg_len = 2, + .max_comb_2nd_msg_len = MAX_SW_I2C_COMMANDS - 2, +}; + static int aldebaran_i2c_control_init(struct smu_context *smu, struct i2c_adapter *control) { struct amdgpu_device *adev = to_amdgpu_device(control); @@ -1665,6 +1563,7 @@ static int aldebaran_i2c_control_init(struct smu_context *smu, struct i2c_adapte control->dev.parent = &adev->pdev->dev; control->algo = &aldebaran_i2c_algo; snprintf(control->name, sizeof(control->name), "AMDGPU SMU"); + control->quirks = &aldebaran_i2c_control_quirks; res = i2c_add_adapter(control); if (res) @@ -1764,7 +1663,9 @@ static void aldebaran_log_thermal_throttling_event(struct smu_context *smu) dev_warn(adev->dev, "WARN: GPU thermal throttling temperature reached, expect performance decrease. %s.\n", log_buf); - kgd2kfd_smi_event_throttle(smu->adev->kfd.dev, throttler_status); + kgd2kfd_smi_event_throttle(smu->adev->kfd.dev, + smu_cmn_get_indep_throttler_status(throttler_status, + aldebaran_throttler_map)); } static int aldebaran_get_current_pcie_link_speed(struct smu_context *smu) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c index a421ba85bd6d..a0e50f23b1dd 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c @@ -85,6 +85,10 @@ int smu_v13_0_init_microcode(struct smu_context *smu) const struct common_firmware_header *header; struct amdgpu_firmware_info *ucode = NULL; + /* doesn't need to load smu firmware in IOV mode */ + if (amdgpu_sriov_vf(adev)) + return 0; + switch (adev->asic_type) { case CHIP_ALDEBARAN: chip_name = "aldebaran"; @@ -268,52 +272,86 @@ static int smu_v13_0_set_pptable_v2_1(struct smu_context *smu, void **table, return 0; } -int smu_v13_0_setup_pptable(struct smu_context *smu) +static int smu_v13_0_get_pptable_from_vbios(struct smu_context *smu, void **table, uint32_t *size) { struct amdgpu_device *adev = smu->adev; - const struct smc_firmware_header_v1_0 *hdr; - int ret, index; - uint32_t size = 0; uint16_t atom_table_size; uint8_t frev, crev; - void *table; - uint16_t version_major, version_minor; + int ret, index; + dev_info(adev->dev, "use vbios provided pptable\n"); + index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1, + powerplayinfo); - if (amdgpu_smu_pptable_id >= 0) { - smu->smu_table.boot_values.pp_table_id = amdgpu_smu_pptable_id; - dev_info(adev->dev, "override pptable id %d\n", amdgpu_smu_pptable_id); - } + ret = amdgpu_atombios_get_data_table(adev, index, &atom_table_size, &frev, &crev, + (uint8_t **)table); + if (ret) + return ret; + + if (size) + *size = atom_table_size; + + return 0; +} + +static int smu_v13_0_get_pptable_from_firmware(struct smu_context *smu, void **table, uint32_t *size, + uint32_t pptable_id) +{ + const struct smc_firmware_header_v1_0 *hdr; + struct amdgpu_device *adev = smu->adev; + uint16_t version_major, version_minor; + int ret; hdr = (const struct smc_firmware_header_v1_0 *) adev->pm.fw->data; + if (!hdr) + return -EINVAL; + + dev_info(adev->dev, "use driver provided pptable %d\n", pptable_id); + version_major = le16_to_cpu(hdr->header.header_version_major); version_minor = le16_to_cpu(hdr->header.header_version_minor); - if (version_major == 2 && smu->smu_table.boot_values.pp_table_id > 0) { - dev_info(adev->dev, "use driver provided pptable %d\n", smu->smu_table.boot_values.pp_table_id); - switch (version_minor) { - case 1: - ret = smu_v13_0_set_pptable_v2_1(smu, &table, &size, - smu->smu_table.boot_values.pp_table_id); - break; - default: - ret = -EINVAL; - break; - } - if (ret) - return ret; + if (version_major != 2) { + dev_err(adev->dev, "Unsupported smu firmware version %d.%d\n", + version_major, version_minor); + return -EINVAL; + } - } else { - dev_info(adev->dev, "use vbios provided pptable\n"); - index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1, - powerplayinfo); + switch (version_minor) { + case 1: + ret = smu_v13_0_set_pptable_v2_1(smu, table, size, pptable_id); + break; + default: + ret = -EINVAL; + break; + } - ret = amdgpu_atombios_get_data_table(adev, index, &atom_table_size, &frev, &crev, - (uint8_t **)&table); - if (ret) - return ret; - size = atom_table_size; + return ret; +} + +int smu_v13_0_setup_pptable(struct smu_context *smu) +{ + struct amdgpu_device *adev = smu->adev; + uint32_t size = 0, pptable_id = 0; + void *table; + int ret = 0; + + /* override pptable_id from driver parameter */ + if (amdgpu_smu_pptable_id >= 0) { + pptable_id = amdgpu_smu_pptable_id; + dev_info(adev->dev, "override pptable id %d\n", pptable_id); + } else { + pptable_id = smu->smu_table.boot_values.pp_table_id; } + /* force using vbios pptable in sriov mode */ + if (amdgpu_sriov_vf(adev) || !pptable_id) + ret = smu_v13_0_get_pptable_from_vbios(smu, &table, &size); + else + ret = smu_v13_0_get_pptable_from_firmware(smu, &table, &size, pptable_id); + + if (ret) + return ret; + if (!smu->smu_table.power_play_table) smu->smu_table.power_play_table = table; if (!smu->smu_table.power_play_table_size) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/yellow_carp_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/yellow_carp_ppt.c index 0cfeb9fc7c03..0f17c2522c85 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/yellow_carp_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/yellow_carp_ppt.c @@ -572,7 +572,7 @@ static int yellow_carp_get_power_profile_mode(struct smu_context *smu, if (workload_type < 0) continue; - size += sprintf(buf + size, "%2d %14s%s\n", + size += sysfs_emit_at(buf, size, "%2d %14s%s\n", i, profile_name[i], (i == smu->power_profile_mode) ? "*" : " "); } @@ -1054,15 +1054,15 @@ static int yellow_carp_print_clk_levels(struct smu_context *smu, switch (clk_type) { case SMU_OD_SCLK: - size = sprintf(buf, "%s:\n", "OD_SCLK"); - size += sprintf(buf + size, "0: %10uMhz\n", + size = sysfs_emit(buf, "%s:\n", "OD_SCLK"); + size += sysfs_emit_at(buf, size, "0: %10uMhz\n", (smu->gfx_actual_hard_min_freq > 0) ? smu->gfx_actual_hard_min_freq : smu->gfx_default_hard_min_freq); - size += sprintf(buf + size, "1: %10uMhz\n", + size += sysfs_emit_at(buf, size, "1: %10uMhz\n", (smu->gfx_actual_soft_max_freq > 0) ? smu->gfx_actual_soft_max_freq : smu->gfx_default_soft_max_freq); break; case SMU_OD_RANGE: - size = sprintf(buf, "%s:\n", "OD_RANGE"); - size += sprintf(buf + size, "SCLK: %7uMhz %10uMhz\n", + size = sysfs_emit(buf, "%s:\n", "OD_RANGE"); + size += sysfs_emit_at(buf, size, "SCLK: %7uMhz %10uMhz\n", smu->gfx_default_hard_min_freq, smu->gfx_default_soft_max_freq); break; case SMU_SOCCLK: @@ -1083,7 +1083,7 @@ static int yellow_carp_print_clk_levels(struct smu_context *smu, if (ret) goto print_clk_out; - size += sprintf(buf + size, "%d: %uMhz %s\n", i, value, + size += sysfs_emit_at(buf, size, "%d: %uMhz %s\n", i, value, cur_value == value ? "*" : ""); } break; diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c index e802f9a95f08..66711ab24c15 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c @@ -55,7 +55,7 @@ #undef __SMU_DUMMY_MAP #define __SMU_DUMMY_MAP(type) #type -static const char* __smu_message_names[] = { +static const char * const __smu_message_names[] = { SMU_MESSAGE_TYPES }; @@ -76,55 +76,256 @@ static void smu_cmn_read_arg(struct smu_context *smu, *arg = RREG32_SOC15(MP1, 0, mmMP1_SMN_C2PMSG_82); } -int smu_cmn_wait_for_response(struct smu_context *smu) +/* Redefine the SMU error codes here. + * + * Note that these definitions are redundant and should be removed + * when the SMU has exported a unified header file containing these + * macros, which header file we can just include and use the SMU's + * macros. At the moment, these error codes are defined by the SMU + * per-ASIC unfortunately, yet we're a one driver for all ASICs. + */ +#define SMU_RESP_NONE 0 +#define SMU_RESP_OK 1 +#define SMU_RESP_CMD_FAIL 0xFF +#define SMU_RESP_CMD_UNKNOWN 0xFE +#define SMU_RESP_CMD_BAD_PREREQ 0xFD +#define SMU_RESP_BUSY_OTHER 0xFC +#define SMU_RESP_DEBUG_END 0xFB + +/** + * __smu_cmn_poll_stat -- poll for a status from the SMU + * smu: a pointer to SMU context + * + * Returns the status of the SMU, which could be, + * 0, the SMU is busy with your previous command; + * 1, execution status: success, execution result: success; + * 0xFF, execution status: success, execution result: failure; + * 0xFE, unknown command; + * 0xFD, valid command, but bad (command) prerequisites; + * 0xFC, the command was rejected as the SMU is busy; + * 0xFB, "SMC_Result_DebugDataDumpEnd". + * + * The values here are not defined by macros, because I'd rather we + * include a single header file which defines them, which is + * maintained by the SMU FW team, so that we're impervious to firmware + * changes. At the moment those values are defined in various header + * files, one for each ASIC, yet here we're a single ASIC-agnostic + * interface. Such a change can be followed-up by a subsequent patch. + */ +static u32 __smu_cmn_poll_stat(struct smu_context *smu) { struct amdgpu_device *adev = smu->adev; - uint32_t cur_value, i, timeout = adev->usec_timeout * 20; + int timeout = adev->usec_timeout * 20; + u32 reg; - for (i = 0; i < timeout; i++) { - cur_value = RREG32_SOC15(MP1, 0, mmMP1_SMN_C2PMSG_90); - if ((cur_value & MP1_C2PMSG_90__CONTENT_MASK) != 0) - return cur_value; + for ( ; timeout > 0; timeout--) { + reg = RREG32_SOC15(MP1, 0, mmMP1_SMN_C2PMSG_90); + if ((reg & MP1_C2PMSG_90__CONTENT_MASK) != 0) + break; udelay(1); } - /* timeout means wrong logic */ - if (i == timeout) - return -ETIME; - - return RREG32_SOC15(MP1, 0, mmMP1_SMN_C2PMSG_90); + return reg; } -int smu_cmn_send_msg_without_waiting(struct smu_context *smu, - uint16_t msg, uint32_t param) +static void __smu_cmn_reg_print_error(struct smu_context *smu, + u32 reg_c2pmsg_90, + int msg_index, + u32 param, + enum smu_message_type msg) { struct amdgpu_device *adev = smu->adev; - int ret; + const char *message = smu_get_message_name(smu, msg); - ret = smu_cmn_wait_for_response(smu); - if (ret != 0x1) { - dev_err(adev->dev, "Msg issuing pre-check failed(0x%x) and " - "SMU may be not in the right state!\n", ret); - if (ret != -ETIME) - ret = -EIO; - return ret; + switch (reg_c2pmsg_90) { + case SMU_RESP_NONE: + dev_err_ratelimited(adev->dev, + "SMU: I'm not done with your previous command!"); + break; + case SMU_RESP_OK: + /* The SMU executed the command. It completed with a + * successful result. + */ + break; + case SMU_RESP_CMD_FAIL: + /* The SMU executed the command. It completed with an + * unsuccessful result. + */ + break; + case SMU_RESP_CMD_UNKNOWN: + dev_err_ratelimited(adev->dev, + "SMU: unknown command: index:%d param:0x%08X message:%s", + msg_index, param, message); + break; + case SMU_RESP_CMD_BAD_PREREQ: + dev_err_ratelimited(adev->dev, + "SMU: valid command, bad prerequisites: index:%d param:0x%08X message:%s", + msg_index, param, message); + break; + case SMU_RESP_BUSY_OTHER: + dev_err_ratelimited(adev->dev, + "SMU: I'm very busy for your command: index:%d param:0x%08X message:%s", + msg_index, param, message); + break; + case SMU_RESP_DEBUG_END: + dev_err_ratelimited(adev->dev, + "SMU: I'm debugging!"); + break; + default: + dev_err_ratelimited(adev->dev, + "SMU: response:0x%08X for index:%d param:0x%08X message:%s?", + reg_c2pmsg_90, msg_index, param, message); + break; } +} + +static int __smu_cmn_reg2errno(struct smu_context *smu, u32 reg_c2pmsg_90) +{ + int res; + + switch (reg_c2pmsg_90) { + case SMU_RESP_NONE: + /* The SMU is busy--still executing your command. + */ + res = -ETIME; + break; + case SMU_RESP_OK: + res = 0; + break; + case SMU_RESP_CMD_FAIL: + /* Command completed successfully, but the command + * status was failure. + */ + res = -EIO; + break; + case SMU_RESP_CMD_UNKNOWN: + /* Unknown command--ignored by the SMU. + */ + res = -EOPNOTSUPP; + break; + case SMU_RESP_CMD_BAD_PREREQ: + /* Valid command--bad prerequisites. + */ + res = -EINVAL; + break; + case SMU_RESP_BUSY_OTHER: + /* The SMU is busy with other commands. The client + * should retry in 10 us. + */ + res = -EBUSY; + break; + default: + /* Unknown or debug response from the SMU. + */ + res = -EREMOTEIO; + break; + } + + return res; +} + +static void __smu_cmn_send_msg(struct smu_context *smu, + u16 msg, + u32 param) +{ + struct amdgpu_device *adev = smu->adev; WREG32_SOC15(MP1, 0, mmMP1_SMN_C2PMSG_90, 0); WREG32_SOC15(MP1, 0, mmMP1_SMN_C2PMSG_82, param); WREG32_SOC15(MP1, 0, mmMP1_SMN_C2PMSG_66, msg); +} - return 0; +/** + * smu_cmn_send_msg_without_waiting -- send the message; don't wait for status + * @smu: pointer to an SMU context + * @msg_index: message index + * @param: message parameter to send to the SMU + * + * Send a message to the SMU with the parameter passed. Do not wait + * for status/result of the message, thus the "without_waiting". + * + * Return 0 on success, -errno on error if we weren't able to _send_ + * the message for some reason. See __smu_cmn_reg2errno() for details + * of the -errno. + */ +int smu_cmn_send_msg_without_waiting(struct smu_context *smu, + uint16_t msg_index, + uint32_t param) +{ + u32 reg; + int res; + + if (smu->adev->no_hw_access) + return 0; + + reg = __smu_cmn_poll_stat(smu); + res = __smu_cmn_reg2errno(smu, reg); + if (reg == SMU_RESP_NONE || + reg == SMU_RESP_BUSY_OTHER || + res == -EREMOTEIO) + goto Out; + __smu_cmn_send_msg(smu, msg_index, param); + res = 0; +Out: + return res; +} + +/** + * smu_cmn_wait_for_response -- wait for response from the SMU + * @smu: pointer to an SMU context + * + * Wait for status from the SMU. + * + * Return 0 on success, -errno on error, indicating the execution + * status and result of the message being waited for. See + * __smu_cmn_reg2errno() for details of the -errno. + */ +int smu_cmn_wait_for_response(struct smu_context *smu) +{ + u32 reg; + + reg = __smu_cmn_poll_stat(smu); + return __smu_cmn_reg2errno(smu, reg); } +/** + * smu_cmn_send_smc_msg_with_param -- send a message with parameter + * @smu: pointer to an SMU context + * @msg: message to send + * @param: parameter to send to the SMU + * @read_arg: pointer to u32 to return a value from the SMU back + * to the caller + * + * Send the message @msg with parameter @param to the SMU, wait for + * completion of the command, and return back a value from the SMU in + * @read_arg pointer. + * + * Return 0 on success, -errno on error, if we weren't able to send + * the message or if the message completed with some kind of + * error. See __smu_cmn_reg2errno() for details of the -errno. + * + * If we weren't able to send the message to the SMU, we also print + * the error to the standard log. + * + * Command completion status is printed only if the -errno is + * -EREMOTEIO, indicating that the SMU returned back an + * undefined/unknown/unspecified result. All other cases are + * well-defined, not printed, but instead given back to the client to + * decide what further to do. + * + * The return value, @read_arg is read back regardless, to give back + * more information to the client, which on error would most likely be + * @param, but we can't assume that. This also eliminates more + * conditionals. + */ int smu_cmn_send_smc_msg_with_param(struct smu_context *smu, enum smu_message_type msg, uint32_t param, uint32_t *read_arg) { - struct amdgpu_device *adev = smu->adev; - int ret = 0, index = 0; + int res, index; + u32 reg; if (smu->adev->no_hw_access) return 0; @@ -136,31 +337,24 @@ int smu_cmn_send_smc_msg_with_param(struct smu_context *smu, return index == -EACCES ? 0 : index; mutex_lock(&smu->message_lock); - ret = smu_cmn_send_msg_without_waiting(smu, (uint16_t)index, param); - if (ret) - goto out; - - ret = smu_cmn_wait_for_response(smu); - if (ret != 0x1) { - if (ret == -ETIME) { - dev_err(adev->dev, "message: %15s (%d) \tparam: 0x%08x is timeout (no response)\n", - smu_get_message_name(smu, msg), index, param); - } else { - dev_err(adev->dev, "failed send message: %15s (%d) \tparam: 0x%08x response %#x\n", - smu_get_message_name(smu, msg), index, param, - ret); - ret = -EIO; - } - goto out; + reg = __smu_cmn_poll_stat(smu); + res = __smu_cmn_reg2errno(smu, reg); + if (reg == SMU_RESP_NONE || + reg == SMU_RESP_BUSY_OTHER || + res == -EREMOTEIO) { + __smu_cmn_reg_print_error(smu, reg, index, param, msg); + goto Out; } - + __smu_cmn_send_msg(smu, (uint16_t) index, param); + reg = __smu_cmn_poll_stat(smu); + res = __smu_cmn_reg2errno(smu, reg); + if (res == -EREMOTEIO) + __smu_cmn_reg_print_error(smu, reg, index, param, msg); if (read_arg) smu_cmn_read_arg(smu, read_arg); - - ret = 0; /* 0 as driver return value */ -out: +Out: mutex_unlock(&smu->message_lock); - return ret; + return res; } int smu_cmn_send_smc_msg(struct smu_context *smu, @@ -516,7 +710,7 @@ size_t smu_cmn_get_pp_feature_mask(struct smu_context *smu, return 0; } - size = sprintf(buf + size, "features high: 0x%08x low: 0x%08x\n", + size = sysfs_emit_at(buf, size, "features high: 0x%08x low: 0x%08x\n", feature_mask[1], feature_mask[0]); memset(sort_feature, -1, sizeof(sort_feature)); @@ -531,14 +725,14 @@ size_t smu_cmn_get_pp_feature_mask(struct smu_context *smu, sort_feature[feature_index] = i; } - size += sprintf(buf + size, "%-2s. %-20s %-3s : %-s\n", + size += sysfs_emit_at(buf, size, "%-2s. %-20s %-3s : %-s\n", "No", "Feature", "Bit", "State"); for (i = 0; i < SMU_FEATURE_COUNT; i++) { if (sort_feature[i] < 0) continue; - size += sprintf(buf + size, "%02d. %-20s (%2d) : %s\n", + size += sysfs_emit_at(buf, size, "%02d. %-20s (%2d) : %s\n", count++, smu_get_feature_name(smu, sort_feature[i]), i, diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.h b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.h index 9add5f16ff56..16993daa2ae0 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.h +++ b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.h @@ -27,7 +27,8 @@ #if defined(SWSMU_CODE_LAYER_L2) || defined(SWSMU_CODE_LAYER_L3) || defined(SWSMU_CODE_LAYER_L4) int smu_cmn_send_msg_without_waiting(struct smu_context *smu, - uint16_t msg, uint32_t param); + uint16_t msg_index, + uint32_t param); int smu_cmn_send_smc_msg_with_param(struct smu_context *smu, enum smu_message_type msg, uint32_t param, diff --git a/drivers/gpu/drm/arm/display/komeda/komeda_kms.c b/drivers/gpu/drm/arm/display/komeda/komeda_kms.c index ff45f23f3d56..93b7f09b96ca 100644 --- a/drivers/gpu/drm/arm/display/komeda/komeda_kms.c +++ b/drivers/gpu/drm/arm/display/komeda/komeda_kms.c @@ -13,7 +13,6 @@ #include <drm/drm_fb_helper.h> #include <drm/drm_gem_cma_helper.h> #include <drm/drm_gem_framebuffer_helper.h> -#include <drm/drm_irq.h> #include <drm/drm_managed.h> #include <drm/drm_probe_helper.h> #include <drm/drm_vblank.h> @@ -301,8 +300,6 @@ struct komeda_kms_dev *komeda_kms_attach(struct komeda_dev *mdev) if (err) goto free_component_binding; - drm->irq_enabled = true; - drm_kms_helper_poll_init(drm); err = drm_dev_register(drm, 0); @@ -313,7 +310,6 @@ struct komeda_kms_dev *komeda_kms_attach(struct komeda_dev *mdev) free_interrupts: drm_kms_helper_poll_fini(drm); - drm->irq_enabled = false; free_component_binding: component_unbind_all(mdev->dev, drm); cleanup_mode_config: @@ -331,7 +327,6 @@ void komeda_kms_detach(struct komeda_kms_dev *kms) drm_dev_unregister(drm); drm_kms_helper_poll_fini(drm); drm_atomic_helper_shutdown(drm); - drm->irq_enabled = false; component_unbind_all(mdev->dev, drm); drm_mode_config_cleanup(drm); komeda_kms_cleanup_private_objs(kms); diff --git a/drivers/gpu/drm/arm/hdlcd_drv.c b/drivers/gpu/drm/arm/hdlcd_drv.c index 81ae92390736..479c2422a2e0 100644 --- a/drivers/gpu/drm/arm/hdlcd_drv.c +++ b/drivers/gpu/drm/arm/hdlcd_drv.c @@ -29,7 +29,6 @@ #include <drm/drm_fb_helper.h> #include <drm/drm_gem_cma_helper.h> #include <drm/drm_gem_framebuffer_helper.h> -#include <drm/drm_irq.h> #include <drm/drm_modeset_helper.h> #include <drm/drm_of.h> #include <drm/drm_probe_helper.h> @@ -38,6 +37,94 @@ #include "hdlcd_drv.h" #include "hdlcd_regs.h" +static irqreturn_t hdlcd_irq(int irq, void *arg) +{ + struct drm_device *drm = arg; + struct hdlcd_drm_private *hdlcd = drm->dev_private; + unsigned long irq_status; + + irq_status = hdlcd_read(hdlcd, HDLCD_REG_INT_STATUS); + +#ifdef CONFIG_DEBUG_FS + if (irq_status & HDLCD_INTERRUPT_UNDERRUN) + atomic_inc(&hdlcd->buffer_underrun_count); + + if (irq_status & HDLCD_INTERRUPT_DMA_END) + atomic_inc(&hdlcd->dma_end_count); + + if (irq_status & HDLCD_INTERRUPT_BUS_ERROR) + atomic_inc(&hdlcd->bus_error_count); + + if (irq_status & HDLCD_INTERRUPT_VSYNC) + atomic_inc(&hdlcd->vsync_count); + +#endif + if (irq_status & HDLCD_INTERRUPT_VSYNC) + drm_crtc_handle_vblank(&hdlcd->crtc); + + /* acknowledge interrupt(s) */ + hdlcd_write(hdlcd, HDLCD_REG_INT_CLEAR, irq_status); + + return IRQ_HANDLED; +} + +static void hdlcd_irq_preinstall(struct drm_device *drm) +{ + struct hdlcd_drm_private *hdlcd = drm->dev_private; + /* Ensure interrupts are disabled */ + hdlcd_write(hdlcd, HDLCD_REG_INT_MASK, 0); + hdlcd_write(hdlcd, HDLCD_REG_INT_CLEAR, ~0); +} + +static void hdlcd_irq_postinstall(struct drm_device *drm) +{ +#ifdef CONFIG_DEBUG_FS + struct hdlcd_drm_private *hdlcd = drm->dev_private; + unsigned long irq_mask = hdlcd_read(hdlcd, HDLCD_REG_INT_MASK); + + /* enable debug interrupts */ + irq_mask |= HDLCD_DEBUG_INT_MASK; + + hdlcd_write(hdlcd, HDLCD_REG_INT_MASK, irq_mask); +#endif +} + +static int hdlcd_irq_install(struct drm_device *drm, int irq) +{ + int ret; + + if (irq == IRQ_NOTCONNECTED) + return -ENOTCONN; + + hdlcd_irq_preinstall(drm); + + ret = request_irq(irq, hdlcd_irq, 0, drm->driver->name, drm); + if (ret) + return ret; + + hdlcd_irq_postinstall(drm); + + return 0; +} + +static void hdlcd_irq_uninstall(struct drm_device *drm) +{ + struct hdlcd_drm_private *hdlcd = drm->dev_private; + /* disable all the interrupts that we might have enabled */ + unsigned long irq_mask = hdlcd_read(hdlcd, HDLCD_REG_INT_MASK); + +#ifdef CONFIG_DEBUG_FS + /* disable debug interrupts */ + irq_mask &= ~HDLCD_DEBUG_INT_MASK; +#endif + + /* disable vsync interrupts */ + irq_mask &= ~HDLCD_INTERRUPT_VSYNC; + hdlcd_write(hdlcd, HDLCD_REG_INT_MASK, irq_mask); + + free_irq(hdlcd->irq, drm); +} + static int hdlcd_load(struct drm_device *drm, unsigned long flags) { struct hdlcd_drm_private *hdlcd = drm->dev_private; @@ -90,7 +177,12 @@ static int hdlcd_load(struct drm_device *drm, unsigned long flags) goto setup_fail; } - ret = drm_irq_install(drm, platform_get_irq(pdev, 0)); + ret = platform_get_irq(pdev, 0); + if (ret < 0) + goto irq_fail; + hdlcd->irq = ret; + + ret = hdlcd_irq_install(drm, hdlcd->irq); if (ret < 0) { DRM_ERROR("failed to install IRQ handler\n"); goto irq_fail; @@ -122,76 +214,6 @@ static void hdlcd_setup_mode_config(struct drm_device *drm) drm->mode_config.funcs = &hdlcd_mode_config_funcs; } -static irqreturn_t hdlcd_irq(int irq, void *arg) -{ - struct drm_device *drm = arg; - struct hdlcd_drm_private *hdlcd = drm->dev_private; - unsigned long irq_status; - - irq_status = hdlcd_read(hdlcd, HDLCD_REG_INT_STATUS); - -#ifdef CONFIG_DEBUG_FS - if (irq_status & HDLCD_INTERRUPT_UNDERRUN) - atomic_inc(&hdlcd->buffer_underrun_count); - - if (irq_status & HDLCD_INTERRUPT_DMA_END) - atomic_inc(&hdlcd->dma_end_count); - - if (irq_status & HDLCD_INTERRUPT_BUS_ERROR) - atomic_inc(&hdlcd->bus_error_count); - - if (irq_status & HDLCD_INTERRUPT_VSYNC) - atomic_inc(&hdlcd->vsync_count); - -#endif - if (irq_status & HDLCD_INTERRUPT_VSYNC) - drm_crtc_handle_vblank(&hdlcd->crtc); - - /* acknowledge interrupt(s) */ - hdlcd_write(hdlcd, HDLCD_REG_INT_CLEAR, irq_status); - - return IRQ_HANDLED; -} - -static void hdlcd_irq_preinstall(struct drm_device *drm) -{ - struct hdlcd_drm_private *hdlcd = drm->dev_private; - /* Ensure interrupts are disabled */ - hdlcd_write(hdlcd, HDLCD_REG_INT_MASK, 0); - hdlcd_write(hdlcd, HDLCD_REG_INT_CLEAR, ~0); -} - -static int hdlcd_irq_postinstall(struct drm_device *drm) -{ -#ifdef CONFIG_DEBUG_FS - struct hdlcd_drm_private *hdlcd = drm->dev_private; - unsigned long irq_mask = hdlcd_read(hdlcd, HDLCD_REG_INT_MASK); - - /* enable debug interrupts */ - irq_mask |= HDLCD_DEBUG_INT_MASK; - - hdlcd_write(hdlcd, HDLCD_REG_INT_MASK, irq_mask); -#endif - return 0; -} - -static void hdlcd_irq_uninstall(struct drm_device *drm) -{ - struct hdlcd_drm_private *hdlcd = drm->dev_private; - /* disable all the interrupts that we might have enabled */ - unsigned long irq_mask = hdlcd_read(hdlcd, HDLCD_REG_INT_MASK); - -#ifdef CONFIG_DEBUG_FS - /* disable debug interrupts */ - irq_mask &= ~HDLCD_DEBUG_INT_MASK; -#endif - - /* disable vsync interrupts */ - irq_mask &= ~HDLCD_INTERRUPT_VSYNC; - - hdlcd_write(hdlcd, HDLCD_REG_INT_MASK, irq_mask); -} - #ifdef CONFIG_DEBUG_FS static int hdlcd_show_underrun_count(struct seq_file *m, void *arg) { @@ -236,10 +258,6 @@ DEFINE_DRM_GEM_CMA_FOPS(fops); static const struct drm_driver hdlcd_driver = { .driver_features = DRIVER_GEM | DRIVER_MODESET | DRIVER_ATOMIC, - .irq_handler = hdlcd_irq, - .irq_preinstall = hdlcd_irq_preinstall, - .irq_postinstall = hdlcd_irq_postinstall, - .irq_uninstall = hdlcd_irq_uninstall, DRM_GEM_CMA_DRIVER_OPS, #ifdef CONFIG_DEBUG_FS .debugfs_init = hdlcd_debugfs_init, @@ -316,7 +334,7 @@ err_pm_active: err_unload: of_node_put(hdlcd->crtc.port); hdlcd->crtc.port = NULL; - drm_irq_uninstall(drm); + hdlcd_irq_uninstall(drm); of_reserved_mem_device_release(drm->dev); err_free: drm_mode_config_cleanup(drm); @@ -338,7 +356,7 @@ static void hdlcd_drm_unbind(struct device *dev) hdlcd->crtc.port = NULL; pm_runtime_get_sync(dev); drm_atomic_helper_shutdown(drm); - drm_irq_uninstall(drm); + hdlcd_irq_uninstall(drm); pm_runtime_put(dev); if (pm_runtime_enabled(dev)) pm_runtime_disable(dev); diff --git a/drivers/gpu/drm/arm/hdlcd_drv.h b/drivers/gpu/drm/arm/hdlcd_drv.h index fd438d177b64..909c39c28487 100644 --- a/drivers/gpu/drm/arm/hdlcd_drv.h +++ b/drivers/gpu/drm/arm/hdlcd_drv.h @@ -11,6 +11,7 @@ struct hdlcd_drm_private { struct clk *clk; struct drm_crtc crtc; struct drm_plane *plane; + unsigned int irq; #ifdef CONFIG_DEBUG_FS atomic_t buffer_underrun_count; atomic_t bus_error_count; diff --git a/drivers/gpu/drm/arm/malidp_drv.c b/drivers/gpu/drm/arm/malidp_drv.c index de59f3302516..78d15b04b105 100644 --- a/drivers/gpu/drm/arm/malidp_drv.c +++ b/drivers/gpu/drm/arm/malidp_drv.c @@ -847,8 +847,6 @@ static int malidp_bind(struct device *dev) if (ret < 0) goto irq_init_fail; - drm->irq_enabled = true; - ret = drm_vblank_init(drm, drm->mode_config.num_crtc); if (ret < 0) { DRM_ERROR("failed to initialise vblank\n"); @@ -874,7 +872,6 @@ register_fail: vblank_fail: malidp_se_irq_fini(hwdev); malidp_de_irq_fini(hwdev); - drm->irq_enabled = false; irq_init_fail: drm_atomic_helper_shutdown(drm); component_unbind_all(dev, drm); @@ -909,7 +906,6 @@ static void malidp_unbind(struct device *dev) drm_atomic_helper_shutdown(drm); malidp_se_irq_fini(hwdev); malidp_de_irq_fini(hwdev); - drm->irq_enabled = false; component_unbind_all(dev, drm); of_node_put(malidp->crtc.port); malidp->crtc.port = NULL; diff --git a/drivers/gpu/drm/armada/armada_drv.c b/drivers/gpu/drm/armada/armada_drv.c index dab0a1f0983b..8e3e98f13db4 100644 --- a/drivers/gpu/drm/armada/armada_drv.c +++ b/drivers/gpu/drm/armada/armada_drv.c @@ -95,7 +95,7 @@ static int armada_drm_bind(struct device *dev) } /* Remove early framebuffers */ - ret = drm_aperture_remove_framebuffers(false, "armada-drm-fb"); + ret = drm_aperture_remove_framebuffers(false, &armada_drm_driver); if (ret) { dev_err(dev, "[" DRM_NAME ":%s] can't kick out simple-fb: %d\n", __func__, ret); @@ -130,8 +130,6 @@ static int armada_drm_bind(struct device *dev) if (ret) goto err_comp; - priv->drm.irq_enabled = true; - drm_mode_config_reset(&priv->drm); ret = armada_fbdev_init(&priv->drm); diff --git a/drivers/gpu/drm/armada/armada_overlay.c b/drivers/gpu/drm/armada/armada_overlay.c index d3e3e5fdc390..424250535fed 100644 --- a/drivers/gpu/drm/armada/armada_overlay.c +++ b/drivers/gpu/drm/armada/armada_overlay.c @@ -247,8 +247,6 @@ static void armada_drm_overlay_plane_atomic_disable(struct drm_plane *plane, } static const struct drm_plane_helper_funcs armada_overlay_plane_helper_funcs = { - .prepare_fb = armada_drm_plane_prepare_fb, - .cleanup_fb = armada_drm_plane_cleanup_fb, .atomic_check = armada_drm_plane_atomic_check, .atomic_update = armada_drm_overlay_plane_atomic_update, .atomic_disable = armada_drm_overlay_plane_atomic_disable, diff --git a/drivers/gpu/drm/armada/armada_plane.c b/drivers/gpu/drm/armada/armada_plane.c index 40209e49f34a..959d7f0a5108 100644 --- a/drivers/gpu/drm/armada/armada_plane.c +++ b/drivers/gpu/drm/armada/armada_plane.c @@ -78,33 +78,6 @@ void armada_drm_plane_calc(struct drm_plane_state *state, u32 addrs[2][3], } } -int armada_drm_plane_prepare_fb(struct drm_plane *plane, - struct drm_plane_state *state) -{ - DRM_DEBUG_KMS("[PLANE:%d:%s] [FB:%d]\n", - plane->base.id, plane->name, - state->fb ? state->fb->base.id : 0); - - /* - * Take a reference on the new framebuffer - we want to - * hold on to it while the hardware is displaying it. - */ - if (state->fb) - drm_framebuffer_get(state->fb); - return 0; -} - -void armada_drm_plane_cleanup_fb(struct drm_plane *plane, - struct drm_plane_state *old_state) -{ - DRM_DEBUG_KMS("[PLANE:%d:%s] [FB:%d]\n", - plane->base.id, plane->name, - old_state->fb ? old_state->fb->base.id : 0); - - if (old_state->fb) - drm_framebuffer_put(old_state->fb); -} - int armada_drm_plane_atomic_check(struct drm_plane *plane, struct drm_atomic_state *state) { @@ -282,8 +255,6 @@ static void armada_drm_primary_plane_atomic_disable(struct drm_plane *plane, } static const struct drm_plane_helper_funcs armada_primary_plane_helper_funcs = { - .prepare_fb = armada_drm_plane_prepare_fb, - .cleanup_fb = armada_drm_plane_cleanup_fb, .atomic_check = armada_drm_plane_atomic_check, .atomic_update = armada_drm_primary_plane_atomic_update, .atomic_disable = armada_drm_primary_plane_atomic_disable, diff --git a/drivers/gpu/drm/armada/armada_plane.h b/drivers/gpu/drm/armada/armada_plane.h index 51dab8d8da22..368415c609a6 100644 --- a/drivers/gpu/drm/armada/armada_plane.h +++ b/drivers/gpu/drm/armada/armada_plane.h @@ -21,8 +21,6 @@ struct armada_plane_state { void armada_drm_plane_calc(struct drm_plane_state *state, u32 addrs[2][3], u16 pitches[3], bool interlaced); -int armada_drm_plane_prepare_fb(struct drm_plane *plane, - struct drm_plane_state *state); void armada_drm_plane_cleanup_fb(struct drm_plane *plane, struct drm_plane_state *old_state); int armada_drm_plane_atomic_check(struct drm_plane *plane, diff --git a/drivers/gpu/drm/aspeed/aspeed_gfx_crtc.c b/drivers/gpu/drm/aspeed/aspeed_gfx_crtc.c index 098f96d4d50d..827e62c1daba 100644 --- a/drivers/gpu/drm/aspeed/aspeed_gfx_crtc.c +++ b/drivers/gpu/drm/aspeed/aspeed_gfx_crtc.c @@ -220,7 +220,6 @@ static const struct drm_simple_display_pipe_funcs aspeed_gfx_funcs = { .enable = aspeed_gfx_pipe_enable, .disable = aspeed_gfx_pipe_disable, .update = aspeed_gfx_pipe_update, - .prepare_fb = drm_gem_simple_display_pipe_prepare_fb, .enable_vblank = aspeed_gfx_enable_vblank, .disable_vblank = aspeed_gfx_disable_vblank, }; diff --git a/drivers/gpu/drm/ast/ast_drv.c b/drivers/gpu/drm/ast/ast_drv.c index 5aa452b4efe6..86d5cd7b6318 100644 --- a/drivers/gpu/drm/ast/ast_drv.c +++ b/drivers/gpu/drm/ast/ast_drv.c @@ -100,7 +100,7 @@ static int ast_remove_conflicting_framebuffers(struct pci_dev *pdev) primary = pdev->resource[PCI_ROM_RESOURCE].flags & IORESOURCE_ROM_SHADOW; #endif - return drm_aperture_remove_conflicting_framebuffers(base, size, primary, "astdrmfb"); + return drm_aperture_remove_conflicting_framebuffers(base, size, primary, &ast_driver); } static int ast_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) diff --git a/drivers/gpu/drm/ast/ast_drv.h b/drivers/gpu/drm/ast/ast_drv.h index 911f9f414774..39ca338eb80b 100644 --- a/drivers/gpu/drm/ast/ast_drv.h +++ b/drivers/gpu/drm/ast/ast_drv.h @@ -337,6 +337,11 @@ int ast_mode_config_init(struct ast_private *ast); #define AST_DP501_LINKRATE 0xf014 #define AST_DP501_EDID_DATA 0xf020 +/* Define for Soc scratched reg */ +#define AST_VRAM_INIT_STATUS_MASK GENMASK(7, 6) +//#define AST_VRAM_INIT_BY_BMC BIT(7) +//#define AST_VRAM_INIT_READY BIT(6) + int ast_mm_init(struct ast_private *ast); /* ast post */ @@ -346,6 +351,7 @@ bool ast_is_vga_enabled(struct drm_device *dev); void ast_post_gpu(struct drm_device *dev); u32 ast_mindwm(struct ast_private *ast, u32 r); void ast_moutdwm(struct ast_private *ast, u32 r, u32 v); +void ast_patch_ahb_2500(struct ast_private *ast); /* ast dp501 */ void ast_set_dp501_video_output(struct drm_device *dev, u8 mode); bool ast_backup_fw(struct drm_device *dev, u8 *addr, u32 size); diff --git a/drivers/gpu/drm/ast/ast_main.c b/drivers/gpu/drm/ast/ast_main.c index 2aff2e6cf450..79a361867955 100644 --- a/drivers/gpu/drm/ast/ast_main.c +++ b/drivers/gpu/drm/ast/ast_main.c @@ -97,6 +97,11 @@ static void ast_detect_config_mode(struct drm_device *dev, u32 *scu_rev) jregd0 = ast_get_index_reg_mask(ast, AST_IO_CRTC_PORT, 0xd0, 0xff); jregd1 = ast_get_index_reg_mask(ast, AST_IO_CRTC_PORT, 0xd1, 0xff); if (!(jregd0 & 0x80) || !(jregd1 & 0x10)) { + /* Patch AST2500 */ + if (((pdev->revision & 0xF0) == 0x40) + && ((jregd0 & AST_VRAM_INIT_STATUS_MASK) == 0)) + ast_patch_ahb_2500(ast); + /* Double check it's actually working */ data = ast_read32(ast, 0xf004); if ((data != 0xFFFFFFFF) && (data != 0x00)) { diff --git a/drivers/gpu/drm/ast/ast_mode.c b/drivers/gpu/drm/ast/ast_mode.c index 36d9575aa27b..6bfaefa01818 100644 --- a/drivers/gpu/drm/ast/ast_mode.c +++ b/drivers/gpu/drm/ast/ast_mode.c @@ -275,7 +275,7 @@ static void ast_set_std_reg(struct ast_private *ast, ast_set_index_reg_mask(ast, AST_IO_SEQ_PORT, 0x01, 0xdf, stdtable->seq[0]); for (i = 1; i < 4; i++) { jreg = stdtable->seq[i]; - ast_set_index_reg(ast, AST_IO_SEQ_PORT, (i + 1) , jreg); + ast_set_index_reg(ast, AST_IO_SEQ_PORT, (i + 1), jreg); } /* Set CRTC; except base address and offset */ @@ -498,13 +498,15 @@ static void ast_set_sync_reg(struct ast_private *ast, jreg = ast_io_read8(ast, AST_IO_MISC_PORT_READ); jreg &= ~0xC0; - if (vbios_mode->enh_table->flags & NVSync) jreg |= 0x80; - if (vbios_mode->enh_table->flags & NHSync) jreg |= 0x40; + if (vbios_mode->enh_table->flags & NVSync) + jreg |= 0x80; + if (vbios_mode->enh_table->flags & NHSync) + jreg |= 0x40; ast_io_write8(ast, AST_IO_MISC_PORT_WRITE, jreg); } static void ast_set_start_address_crt1(struct ast_private *ast, - unsigned offset) + unsigned int offset) { u32 addr; @@ -612,8 +614,7 @@ ast_primary_plane_helper_atomic_disable(struct drm_plane *plane, } static const struct drm_plane_helper_funcs ast_primary_plane_helper_funcs = { - .prepare_fb = drm_gem_vram_plane_helper_prepare_fb, - .cleanup_fb = drm_gem_vram_plane_helper_cleanup_fb, + DRM_GEM_VRAM_PLANE_HELPER_FUNCS, .atomic_check = ast_primary_plane_helper_atomic_check, .atomic_update = ast_primary_plane_helper_atomic_update, .atomic_disable = ast_primary_plane_helper_atomic_disable, @@ -807,7 +808,7 @@ ast_cursor_plane_helper_atomic_update(struct drm_plane *plane, ast_cursor_plane->hwc[ast_cursor_plane->next_hwc_index].map; u64 dst_off = ast_cursor_plane->hwc[ast_cursor_plane->next_hwc_index].off; - struct dma_buf_map src_map = shadow_plane_state->map[0]; + struct dma_buf_map src_map = shadow_plane_state->data[0]; unsigned int offset_x, offset_y; u16 x, y; u8 x_offset, y_offset; @@ -1212,6 +1213,7 @@ static int ast_get_modes(struct drm_connector *connector) struct edid *edid; int ret; bool flags = false; + if (ast->tx_chip_type == AST_TX_DP501) { ast->dp501_maxclk = 0xff; edid = kmalloc(128, GFP_KERNEL); @@ -1231,8 +1233,8 @@ static int ast_get_modes(struct drm_connector *connector) ret = drm_add_edid_modes(connector, edid); kfree(edid); return ret; - } else - drm_connector_update_edid_property(&ast_connector->base, NULL); + } + drm_connector_update_edid_property(&ast_connector->base, NULL); return 0; } @@ -1272,19 +1274,24 @@ static enum drm_mode_status ast_mode_valid(struct drm_connector *connector, } switch (mode->hdisplay) { case 640: - if (mode->vdisplay == 480) flags = MODE_OK; + if (mode->vdisplay == 480) + flags = MODE_OK; break; case 800: - if (mode->vdisplay == 600) flags = MODE_OK; + if (mode->vdisplay == 600) + flags = MODE_OK; break; case 1024: - if (mode->vdisplay == 768) flags = MODE_OK; + if (mode->vdisplay == 768) + flags = MODE_OK; break; case 1280: - if (mode->vdisplay == 1024) flags = MODE_OK; + if (mode->vdisplay == 1024) + flags = MODE_OK; break; case 1600: - if (mode->vdisplay == 1200) flags = MODE_OK; + if (mode->vdisplay == 1200) + flags = MODE_OK; break; default: return flags; @@ -1293,9 +1300,22 @@ static enum drm_mode_status ast_mode_valid(struct drm_connector *connector, return flags; } +static enum drm_connector_status ast_connector_detect(struct drm_connector + *connector, bool force) +{ + int r; + + r = ast_get_modes(connector); + if (r <= 0) + return connector_status_disconnected; + + return connector_status_connected; +} + static void ast_connector_destroy(struct drm_connector *connector) { struct ast_connector *ast_connector = to_ast_connector(connector); + ast_i2c_destroy(ast_connector->i2c); drm_connector_cleanup(connector); } @@ -1307,6 +1327,7 @@ static const struct drm_connector_helper_funcs ast_connector_helper_funcs = { static const struct drm_connector_funcs ast_connector_funcs = { .reset = drm_atomic_helper_connector_reset, + .detect = ast_connector_detect, .fill_modes = drm_helper_probe_single_connector_modes, .destroy = ast_connector_destroy, .atomic_duplicate_state = drm_atomic_helper_connector_duplicate_state, @@ -1334,7 +1355,8 @@ static int ast_connector_init(struct drm_device *dev) connector->interlace_allowed = 0; connector->doublescan_allowed = 0; - connector->polled = DRM_CONNECTOR_POLL_CONNECT; + connector->polled = DRM_CONNECTOR_POLL_CONNECT | + DRM_CONNECTOR_POLL_DISCONNECT; drm_connector_attach_encoder(connector, encoder); @@ -1403,6 +1425,8 @@ int ast_mode_config_init(struct ast_private *ast) drm_mode_config_reset(dev); + drm_kms_helper_poll_init(dev); + return 0; } diff --git a/drivers/gpu/drm/ast/ast_post.c b/drivers/gpu/drm/ast/ast_post.c index 0607658dde51..b5d92f652fd8 100644 --- a/drivers/gpu/drm/ast/ast_post.c +++ b/drivers/gpu/drm/ast/ast_post.c @@ -2028,6 +2028,40 @@ static bool ast_dram_init_2500(struct ast_private *ast) return true; } +void ast_patch_ahb_2500(struct ast_private *ast) +{ + u32 data; + + /* Clear bus lock condition */ + ast_moutdwm(ast, 0x1e600000, 0xAEED1A03); + ast_moutdwm(ast, 0x1e600084, 0x00010000); + ast_moutdwm(ast, 0x1e600088, 0x00000000); + ast_moutdwm(ast, 0x1e6e2000, 0x1688A8A8); + data = ast_mindwm(ast, 0x1e6e2070); + if (data & 0x08000000) { /* check fast reset */ + /* + * If "Fast restet" is enabled for ARM-ICE debugger, + * then WDT needs to enable, that + * WDT04 is WDT#1 Reload reg. + * WDT08 is WDT#1 counter restart reg to avoid system deadlock + * WDT0C is WDT#1 control reg + * [6:5]:= 01:Full chip + * [4]:= 1:1MHz clock source + * [1]:= 1:WDT will be cleeared and disabled after timeout occurs + * [0]:= 1:WDT enable + */ + ast_moutdwm(ast, 0x1E785004, 0x00000010); + ast_moutdwm(ast, 0x1E785008, 0x00004755); + ast_moutdwm(ast, 0x1E78500c, 0x00000033); + udelay(1000); + } + do { + ast_moutdwm(ast, 0x1e6e2000, 0x1688A8A8); + data = ast_mindwm(ast, 0x1e6e2000); + } while (data != 1); + ast_moutdwm(ast, 0x1e6e207c, 0x08000000); /* clear fast reset */ +} + void ast_post_chip_2500(struct drm_device *dev) { struct ast_private *ast = to_ast_private(dev); @@ -2035,39 +2069,44 @@ void ast_post_chip_2500(struct drm_device *dev) u8 reg; reg = ast_get_index_reg_mask(ast, AST_IO_CRTC_PORT, 0xd0, 0xff); - if ((reg & 0x80) == 0) {/* vga only */ + if ((reg & AST_VRAM_INIT_STATUS_MASK) == 0) {/* vga only */ /* Clear bus lock condition */ - ast_moutdwm(ast, 0x1e600000, 0xAEED1A03); - ast_moutdwm(ast, 0x1e600084, 0x00010000); - ast_moutdwm(ast, 0x1e600088, 0x00000000); - ast_moutdwm(ast, 0x1e6e2000, 0x1688A8A8); - ast_write32(ast, 0xf004, 0x1e6e0000); - ast_write32(ast, 0xf000, 0x1); - ast_write32(ast, 0x12000, 0x1688a8a8); - while (ast_read32(ast, 0x12000) != 0x1) - ; - - ast_write32(ast, 0x10000, 0xfc600309); - while (ast_read32(ast, 0x10000) != 0x1) - ; + ast_patch_ahb_2500(ast); + + /* Disable watchdog */ + ast_moutdwm(ast, 0x1E78502C, 0x00000000); + ast_moutdwm(ast, 0x1E78504C, 0x00000000); + + /* + * Reset USB port to patch USB unknown device issue + * SCU90 is Multi-function Pin Control #5 + * [29]:= 1:Enable USB2.0 Host port#1 (that the mutually shared USB2.0 Hub + * port). + * SCU94 is Multi-function Pin Control #6 + * [14:13]:= 1x:USB2.0 Host2 controller + * SCU70 is Hardware Strap reg + * [23]:= 1:CLKIN is 25MHz and USBCK1 = 24/48 MHz (determined by + * [18]: 0(24)/1(48) MHz) + * SCU7C is Write clear reg to SCU70 + * [23]:= write 1 and then SCU70[23] will be clear as 0b. + */ + ast_moutdwm(ast, 0x1E6E2090, 0x20000000); + ast_moutdwm(ast, 0x1E6E2094, 0x00004000); + if (ast_mindwm(ast, 0x1E6E2070) & 0x00800000) { + ast_moutdwm(ast, 0x1E6E207C, 0x00800000); + mdelay(100); + ast_moutdwm(ast, 0x1E6E2070, 0x00800000); + } + /* Modify eSPI reset pin */ + temp = ast_mindwm(ast, 0x1E6E2070); + if (temp & 0x02000000) + ast_moutdwm(ast, 0x1E6E207C, 0x00004000); /* Slow down CPU/AHB CLK in VGA only mode */ temp = ast_read32(ast, 0x12008); temp |= 0x73; ast_write32(ast, 0x12008, temp); - /* Reset USB port to patch USB unknown device issue */ - ast_moutdwm(ast, 0x1e6e2090, 0x20000000); - temp = ast_mindwm(ast, 0x1e6e2094); - temp |= 0x00004000; - ast_moutdwm(ast, 0x1e6e2094, temp); - temp = ast_mindwm(ast, 0x1e6e2070); - if (temp & 0x00800000) { - ast_moutdwm(ast, 0x1e6e207c, 0x00800000); - mdelay(100); - ast_moutdwm(ast, 0x1e6e2070, 0x00800000); - } - if (!ast_dram_init_2500(ast)) drm_err(dev, "DRAM init failed !\n"); diff --git a/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_dc.c b/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_dc.c index f09b6dd8754c..1656d27b78b6 100644 --- a/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_dc.c +++ b/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_dc.c @@ -22,7 +22,6 @@ #include <drm/drm_fb_helper.h> #include <drm/drm_gem_cma_helper.h> #include <drm/drm_gem_framebuffer_helper.h> -#include <drm/drm_irq.h> #include <drm/drm_probe_helper.h> #include <drm/drm_vblank.h> @@ -557,6 +556,51 @@ static irqreturn_t atmel_hlcdc_dc_irq_handler(int irq, void *data) return IRQ_HANDLED; } +static void atmel_hlcdc_dc_irq_postinstall(struct drm_device *dev) +{ + struct atmel_hlcdc_dc *dc = dev->dev_private; + unsigned int cfg = 0; + int i; + + /* Enable interrupts on activated layers */ + for (i = 0; i < ATMEL_HLCDC_MAX_LAYERS; i++) { + if (dc->layers[i]) + cfg |= ATMEL_HLCDC_LAYER_STATUS(i); + } + + regmap_write(dc->hlcdc->regmap, ATMEL_HLCDC_IER, cfg); +} + +static void atmel_hlcdc_dc_irq_disable(struct drm_device *dev) +{ + struct atmel_hlcdc_dc *dc = dev->dev_private; + unsigned int isr; + + regmap_write(dc->hlcdc->regmap, ATMEL_HLCDC_IDR, 0xffffffff); + regmap_read(dc->hlcdc->regmap, ATMEL_HLCDC_ISR, &isr); +} + +static int atmel_hlcdc_dc_irq_install(struct drm_device *dev, unsigned int irq) +{ + int ret; + + atmel_hlcdc_dc_irq_disable(dev); + + ret = devm_request_irq(dev->dev, irq, atmel_hlcdc_dc_irq_handler, 0, + dev->driver->name, dev); + if (ret) + return ret; + + atmel_hlcdc_dc_irq_postinstall(dev); + + return 0; +} + +static void atmel_hlcdc_dc_irq_uninstall(struct drm_device *dev) +{ + atmel_hlcdc_dc_irq_disable(dev); +} + static const struct drm_mode_config_funcs mode_config_funcs = { .fb_create = drm_gem_fb_create, .atomic_check = drm_atomic_helper_check, @@ -647,7 +691,7 @@ static int atmel_hlcdc_dc_load(struct drm_device *dev) drm_mode_config_reset(dev); pm_runtime_get_sync(dev->dev); - ret = drm_irq_install(dev, dc->hlcdc->irq); + ret = atmel_hlcdc_dc_irq_install(dev, dc->hlcdc->irq); pm_runtime_put_sync(dev->dev); if (ret < 0) { dev_err(dev->dev, "failed to install IRQ handler\n"); @@ -676,7 +720,7 @@ static void atmel_hlcdc_dc_unload(struct drm_device *dev) drm_mode_config_cleanup(dev); pm_runtime_get_sync(dev->dev); - drm_irq_uninstall(dev); + atmel_hlcdc_dc_irq_uninstall(dev); pm_runtime_put_sync(dev->dev); dev->dev_private = NULL; @@ -685,40 +729,10 @@ static void atmel_hlcdc_dc_unload(struct drm_device *dev) clk_disable_unprepare(dc->hlcdc->periph_clk); } -static int atmel_hlcdc_dc_irq_postinstall(struct drm_device *dev) -{ - struct atmel_hlcdc_dc *dc = dev->dev_private; - unsigned int cfg = 0; - int i; - - /* Enable interrupts on activated layers */ - for (i = 0; i < ATMEL_HLCDC_MAX_LAYERS; i++) { - if (dc->layers[i]) - cfg |= ATMEL_HLCDC_LAYER_STATUS(i); - } - - regmap_write(dc->hlcdc->regmap, ATMEL_HLCDC_IER, cfg); - - return 0; -} - -static void atmel_hlcdc_dc_irq_uninstall(struct drm_device *dev) -{ - struct atmel_hlcdc_dc *dc = dev->dev_private; - unsigned int isr; - - regmap_write(dc->hlcdc->regmap, ATMEL_HLCDC_IDR, 0xffffffff); - regmap_read(dc->hlcdc->regmap, ATMEL_HLCDC_ISR, &isr); -} - DEFINE_DRM_GEM_CMA_FOPS(fops); static const struct drm_driver atmel_hlcdc_dc_driver = { .driver_features = DRIVER_GEM | DRIVER_MODESET | DRIVER_ATOMIC, - .irq_handler = atmel_hlcdc_dc_irq_handler, - .irq_preinstall = atmel_hlcdc_dc_irq_uninstall, - .irq_postinstall = atmel_hlcdc_dc_irq_postinstall, - .irq_uninstall = atmel_hlcdc_dc_irq_uninstall, DRM_GEM_CMA_DRIVER_OPS, .fops = &fops, .name = "atmel-hlcdc", diff --git a/drivers/gpu/drm/bochs/Kconfig b/drivers/gpu/drm/bochs/Kconfig deleted file mode 100644 index 7bcdf294fed8..000000000000 --- a/drivers/gpu/drm/bochs/Kconfig +++ /dev/null @@ -1,11 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0-only -config DRM_BOCHS - tristate "DRM Support for bochs dispi vga interface (qemu stdvga)" - depends on DRM && PCI && MMU - select DRM_KMS_HELPER - select DRM_VRAM_HELPER - select DRM_TTM - select DRM_TTM_HELPER - help - Choose this option for qemu. - If M is selected the module will be called bochs-drm. diff --git a/drivers/gpu/drm/bochs/Makefile b/drivers/gpu/drm/bochs/Makefile deleted file mode 100644 index 55473371300f..000000000000 --- a/drivers/gpu/drm/bochs/Makefile +++ /dev/null @@ -1,4 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0-only -bochs-drm-y := bochs_drv.o bochs_mm.o bochs_kms.o bochs_hw.o - -obj-$(CONFIG_DRM_BOCHS) += bochs-drm.o diff --git a/drivers/gpu/drm/bochs/bochs.h b/drivers/gpu/drm/bochs/bochs.h deleted file mode 100644 index e9645c612aff..000000000000 --- a/drivers/gpu/drm/bochs/bochs.h +++ /dev/null @@ -1,98 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ - -#include <linux/io.h> -#include <linux/console.h> - -#include <drm/drm_crtc.h> -#include <drm/drm_crtc_helper.h> -#include <drm/drm_encoder.h> -#include <drm/drm_fb_helper.h> -#include <drm/drm_gem.h> -#include <drm/drm_gem_vram_helper.h> -#include <drm/drm_simple_kms_helper.h> - -/* ---------------------------------------------------------------------- */ - -#define VBE_DISPI_IOPORT_INDEX 0x01CE -#define VBE_DISPI_IOPORT_DATA 0x01CF - -#define VBE_DISPI_INDEX_ID 0x0 -#define VBE_DISPI_INDEX_XRES 0x1 -#define VBE_DISPI_INDEX_YRES 0x2 -#define VBE_DISPI_INDEX_BPP 0x3 -#define VBE_DISPI_INDEX_ENABLE 0x4 -#define VBE_DISPI_INDEX_BANK 0x5 -#define VBE_DISPI_INDEX_VIRT_WIDTH 0x6 -#define VBE_DISPI_INDEX_VIRT_HEIGHT 0x7 -#define VBE_DISPI_INDEX_X_OFFSET 0x8 -#define VBE_DISPI_INDEX_Y_OFFSET 0x9 -#define VBE_DISPI_INDEX_VIDEO_MEMORY_64K 0xa - -#define VBE_DISPI_ID0 0xB0C0 -#define VBE_DISPI_ID1 0xB0C1 -#define VBE_DISPI_ID2 0xB0C2 -#define VBE_DISPI_ID3 0xB0C3 -#define VBE_DISPI_ID4 0xB0C4 -#define VBE_DISPI_ID5 0xB0C5 - -#define VBE_DISPI_DISABLED 0x00 -#define VBE_DISPI_ENABLED 0x01 -#define VBE_DISPI_GETCAPS 0x02 -#define VBE_DISPI_8BIT_DAC 0x20 -#define VBE_DISPI_LFB_ENABLED 0x40 -#define VBE_DISPI_NOCLEARMEM 0x80 - -/* ---------------------------------------------------------------------- */ - -enum bochs_types { - BOCHS_QEMU_STDVGA, - BOCHS_UNKNOWN, -}; - -struct bochs_device { - /* hw */ - void __iomem *mmio; - int ioports; - void __iomem *fb_map; - unsigned long fb_base; - unsigned long fb_size; - unsigned long qext_size; - - /* mode */ - u16 xres; - u16 yres; - u16 yres_virtual; - u32 stride; - u32 bpp; - struct edid *edid; - - /* drm */ - struct drm_device *dev; - struct drm_simple_display_pipe pipe; - struct drm_connector connector; -}; - -/* ---------------------------------------------------------------------- */ - -/* bochs_hw.c */ -int bochs_hw_init(struct drm_device *dev); -void bochs_hw_fini(struct drm_device *dev); - -void bochs_hw_blank(struct bochs_device *bochs, bool blank); -void bochs_hw_setmode(struct bochs_device *bochs, - struct drm_display_mode *mode); -void bochs_hw_setformat(struct bochs_device *bochs, - const struct drm_format_info *format); -void bochs_hw_setbase(struct bochs_device *bochs, - int x, int y, int stride, u64 addr); -int bochs_hw_load_edid(struct bochs_device *bochs); - -/* bochs_mm.c */ -int bochs_mm_init(struct bochs_device *bochs); -void bochs_mm_fini(struct bochs_device *bochs); - -/* bochs_kms.c */ -int bochs_kms_init(struct bochs_device *bochs); - -/* bochs_fbdev.c */ -extern const struct drm_mode_config_funcs bochs_mode_funcs; diff --git a/drivers/gpu/drm/bochs/bochs_drv.c b/drivers/gpu/drm/bochs/bochs_drv.c deleted file mode 100644 index c828cadbabff..000000000000 --- a/drivers/gpu/drm/bochs/bochs_drv.c +++ /dev/null @@ -1,205 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - */ - -#include <linux/module.h> -#include <linux/pci.h> - -#include <drm/drm_drv.h> -#include <drm/drm_aperture.h> -#include <drm/drm_atomic_helper.h> -#include <drm/drm_managed.h> - -#include "bochs.h" - -static int bochs_modeset = -1; -module_param_named(modeset, bochs_modeset, int, 0444); -MODULE_PARM_DESC(modeset, "enable/disable kernel modesetting"); - -/* ---------------------------------------------------------------------- */ -/* drm interface */ - -static void bochs_unload(struct drm_device *dev) -{ - struct bochs_device *bochs = dev->dev_private; - - bochs_mm_fini(bochs); -} - -static int bochs_load(struct drm_device *dev) -{ - struct bochs_device *bochs; - int ret; - - bochs = drmm_kzalloc(dev, sizeof(*bochs), GFP_KERNEL); - if (bochs == NULL) - return -ENOMEM; - dev->dev_private = bochs; - bochs->dev = dev; - - ret = bochs_hw_init(dev); - if (ret) - goto err; - - ret = bochs_mm_init(bochs); - if (ret) - goto err; - - ret = bochs_kms_init(bochs); - if (ret) - goto err; - - return 0; - -err: - bochs_unload(dev); - return ret; -} - -DEFINE_DRM_GEM_FOPS(bochs_fops); - -static const struct drm_driver bochs_driver = { - .driver_features = DRIVER_GEM | DRIVER_MODESET | DRIVER_ATOMIC, - .fops = &bochs_fops, - .name = "bochs-drm", - .desc = "bochs dispi vga interface (qemu stdvga)", - .date = "20130925", - .major = 1, - .minor = 0, - DRM_GEM_VRAM_DRIVER, - .release = bochs_unload, -}; - -/* ---------------------------------------------------------------------- */ -/* pm interface */ - -#ifdef CONFIG_PM_SLEEP -static int bochs_pm_suspend(struct device *dev) -{ - struct drm_device *drm_dev = dev_get_drvdata(dev); - - return drm_mode_config_helper_suspend(drm_dev); -} - -static int bochs_pm_resume(struct device *dev) -{ - struct drm_device *drm_dev = dev_get_drvdata(dev); - - return drm_mode_config_helper_resume(drm_dev); -} -#endif - -static const struct dev_pm_ops bochs_pm_ops = { - SET_SYSTEM_SLEEP_PM_OPS(bochs_pm_suspend, - bochs_pm_resume) -}; - -/* ---------------------------------------------------------------------- */ -/* pci interface */ - -static int bochs_pci_probe(struct pci_dev *pdev, - const struct pci_device_id *ent) -{ - struct drm_device *dev; - unsigned long fbsize; - int ret; - - fbsize = pci_resource_len(pdev, 0); - if (fbsize < 4 * 1024 * 1024) { - DRM_ERROR("less than 4 MB video memory, ignoring device\n"); - return -ENOMEM; - } - - ret = drm_aperture_remove_conflicting_pci_framebuffers(pdev, "bochsdrmfb"); - if (ret) - return ret; - - dev = drm_dev_alloc(&bochs_driver, &pdev->dev); - if (IS_ERR(dev)) - return PTR_ERR(dev); - - ret = pci_enable_device(pdev); - if (ret) - goto err_free_dev; - - pci_set_drvdata(pdev, dev); - - ret = bochs_load(dev); - if (ret) - goto err_free_dev; - - ret = drm_dev_register(dev, 0); - if (ret) - goto err_unload; - - drm_fbdev_generic_setup(dev, 32); - return ret; - -err_unload: - bochs_unload(dev); -err_free_dev: - drm_dev_put(dev); - return ret; -} - -static void bochs_pci_remove(struct pci_dev *pdev) -{ - struct drm_device *dev = pci_get_drvdata(pdev); - - drm_dev_unplug(dev); - drm_atomic_helper_shutdown(dev); - bochs_hw_fini(dev); - drm_dev_put(dev); -} - -static const struct pci_device_id bochs_pci_tbl[] = { - { - .vendor = 0x1234, - .device = 0x1111, - .subvendor = PCI_SUBVENDOR_ID_REDHAT_QUMRANET, - .subdevice = PCI_SUBDEVICE_ID_QEMU, - .driver_data = BOCHS_QEMU_STDVGA, - }, - { - .vendor = 0x1234, - .device = 0x1111, - .subvendor = PCI_ANY_ID, - .subdevice = PCI_ANY_ID, - .driver_data = BOCHS_UNKNOWN, - }, - { /* end of list */ } -}; - -static struct pci_driver bochs_pci_driver = { - .name = "bochs-drm", - .id_table = bochs_pci_tbl, - .probe = bochs_pci_probe, - .remove = bochs_pci_remove, - .driver.pm = &bochs_pm_ops, -}; - -/* ---------------------------------------------------------------------- */ -/* module init/exit */ - -static int __init bochs_init(void) -{ - if (vgacon_text_force() && bochs_modeset == -1) - return -EINVAL; - - if (bochs_modeset == 0) - return -EINVAL; - - return pci_register_driver(&bochs_pci_driver); -} - -static void __exit bochs_exit(void) -{ - pci_unregister_driver(&bochs_pci_driver); -} - -module_init(bochs_init); -module_exit(bochs_exit); - -MODULE_DEVICE_TABLE(pci, bochs_pci_tbl); -MODULE_AUTHOR("Gerd Hoffmann <kraxel@redhat.com>"); -MODULE_LICENSE("GPL"); diff --git a/drivers/gpu/drm/bochs/bochs_hw.c b/drivers/gpu/drm/bochs/bochs_hw.c deleted file mode 100644 index 7d3426d8cc69..000000000000 --- a/drivers/gpu/drm/bochs/bochs_hw.c +++ /dev/null @@ -1,323 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - */ - -#include <linux/pci.h> - -#include <drm/drm_drv.h> -#include <drm/drm_fourcc.h> - -#include <video/vga.h> -#include "bochs.h" - -/* ---------------------------------------------------------------------- */ - -static void bochs_vga_writeb(struct bochs_device *bochs, u16 ioport, u8 val) -{ - if (WARN_ON(ioport < 0x3c0 || ioport > 0x3df)) - return; - - if (bochs->mmio) { - int offset = ioport - 0x3c0 + 0x400; - writeb(val, bochs->mmio + offset); - } else { - outb(val, ioport); - } -} - -static u8 bochs_vga_readb(struct bochs_device *bochs, u16 ioport) -{ - if (WARN_ON(ioport < 0x3c0 || ioport > 0x3df)) - return 0xff; - - if (bochs->mmio) { - int offset = ioport - 0x3c0 + 0x400; - return readb(bochs->mmio + offset); - } else { - return inb(ioport); - } -} - -static u16 bochs_dispi_read(struct bochs_device *bochs, u16 reg) -{ - u16 ret = 0; - - if (bochs->mmio) { - int offset = 0x500 + (reg << 1); - ret = readw(bochs->mmio + offset); - } else { - outw(reg, VBE_DISPI_IOPORT_INDEX); - ret = inw(VBE_DISPI_IOPORT_DATA); - } - return ret; -} - -static void bochs_dispi_write(struct bochs_device *bochs, u16 reg, u16 val) -{ - if (bochs->mmio) { - int offset = 0x500 + (reg << 1); - writew(val, bochs->mmio + offset); - } else { - outw(reg, VBE_DISPI_IOPORT_INDEX); - outw(val, VBE_DISPI_IOPORT_DATA); - } -} - -static void bochs_hw_set_big_endian(struct bochs_device *bochs) -{ - if (bochs->qext_size < 8) - return; - - writel(0xbebebebe, bochs->mmio + 0x604); -} - -static void bochs_hw_set_little_endian(struct bochs_device *bochs) -{ - if (bochs->qext_size < 8) - return; - - writel(0x1e1e1e1e, bochs->mmio + 0x604); -} - -#ifdef __BIG_ENDIAN -#define bochs_hw_set_native_endian(_b) bochs_hw_set_big_endian(_b) -#else -#define bochs_hw_set_native_endian(_b) bochs_hw_set_little_endian(_b) -#endif - -static int bochs_get_edid_block(void *data, u8 *buf, - unsigned int block, size_t len) -{ - struct bochs_device *bochs = data; - size_t i, start = block * EDID_LENGTH; - - if (start + len > 0x400 /* vga register offset */) - return -1; - - for (i = 0; i < len; i++) { - buf[i] = readb(bochs->mmio + start + i); - } - return 0; -} - -int bochs_hw_load_edid(struct bochs_device *bochs) -{ - u8 header[8]; - - if (!bochs->mmio) - return -1; - - /* check header to detect whenever edid support is enabled in qemu */ - bochs_get_edid_block(bochs, header, 0, ARRAY_SIZE(header)); - if (drm_edid_header_is_valid(header) != 8) - return -1; - - kfree(bochs->edid); - bochs->edid = drm_do_get_edid(&bochs->connector, - bochs_get_edid_block, bochs); - if (bochs->edid == NULL) - return -1; - - return 0; -} - -int bochs_hw_init(struct drm_device *dev) -{ - struct bochs_device *bochs = dev->dev_private; - struct pci_dev *pdev = to_pci_dev(dev->dev); - unsigned long addr, size, mem, ioaddr, iosize; - u16 id; - - if (pdev->resource[2].flags & IORESOURCE_MEM) { - /* mmio bar with vga and bochs registers present */ - if (pci_request_region(pdev, 2, "bochs-drm") != 0) { - DRM_ERROR("Cannot request mmio region\n"); - return -EBUSY; - } - ioaddr = pci_resource_start(pdev, 2); - iosize = pci_resource_len(pdev, 2); - bochs->mmio = ioremap(ioaddr, iosize); - if (bochs->mmio == NULL) { - DRM_ERROR("Cannot map mmio region\n"); - return -ENOMEM; - } - } else { - ioaddr = VBE_DISPI_IOPORT_INDEX; - iosize = 2; - if (!request_region(ioaddr, iosize, "bochs-drm")) { - DRM_ERROR("Cannot request ioports\n"); - return -EBUSY; - } - bochs->ioports = 1; - } - - id = bochs_dispi_read(bochs, VBE_DISPI_INDEX_ID); - mem = bochs_dispi_read(bochs, VBE_DISPI_INDEX_VIDEO_MEMORY_64K) - * 64 * 1024; - if ((id & 0xfff0) != VBE_DISPI_ID0) { - DRM_ERROR("ID mismatch\n"); - return -ENODEV; - } - - if ((pdev->resource[0].flags & IORESOURCE_MEM) == 0) - return -ENODEV; - addr = pci_resource_start(pdev, 0); - size = pci_resource_len(pdev, 0); - if (addr == 0) - return -ENODEV; - if (size != mem) { - DRM_ERROR("Size mismatch: pci=%ld, bochs=%ld\n", - size, mem); - size = min(size, mem); - } - - if (pci_request_region(pdev, 0, "bochs-drm") != 0) - DRM_WARN("Cannot request framebuffer, boot fb still active?\n"); - - bochs->fb_map = ioremap(addr, size); - if (bochs->fb_map == NULL) { - DRM_ERROR("Cannot map framebuffer\n"); - return -ENOMEM; - } - bochs->fb_base = addr; - bochs->fb_size = size; - - DRM_INFO("Found bochs VGA, ID 0x%x.\n", id); - DRM_INFO("Framebuffer size %ld kB @ 0x%lx, %s @ 0x%lx.\n", - size / 1024, addr, - bochs->ioports ? "ioports" : "mmio", - ioaddr); - - if (bochs->mmio && pdev->revision >= 2) { - bochs->qext_size = readl(bochs->mmio + 0x600); - if (bochs->qext_size < 4 || bochs->qext_size > iosize) { - bochs->qext_size = 0; - goto noext; - } - DRM_DEBUG("Found qemu ext regs, size %ld\n", - bochs->qext_size); - bochs_hw_set_native_endian(bochs); - } - -noext: - return 0; -} - -void bochs_hw_fini(struct drm_device *dev) -{ - struct bochs_device *bochs = dev->dev_private; - - /* TODO: shot down existing vram mappings */ - - if (bochs->mmio) - iounmap(bochs->mmio); - if (bochs->ioports) - release_region(VBE_DISPI_IOPORT_INDEX, 2); - if (bochs->fb_map) - iounmap(bochs->fb_map); - pci_release_regions(to_pci_dev(dev->dev)); - kfree(bochs->edid); -} - -void bochs_hw_blank(struct bochs_device *bochs, bool blank) -{ - DRM_DEBUG_DRIVER("hw_blank %d\n", blank); - /* discard ar_flip_flop */ - (void)bochs_vga_readb(bochs, VGA_IS1_RC); - /* blank or unblank; we need only update index and set 0x20 */ - bochs_vga_writeb(bochs, VGA_ATT_W, blank ? 0 : 0x20); -} - -void bochs_hw_setmode(struct bochs_device *bochs, - struct drm_display_mode *mode) -{ - int idx; - - if (!drm_dev_enter(bochs->dev, &idx)) - return; - - bochs->xres = mode->hdisplay; - bochs->yres = mode->vdisplay; - bochs->bpp = 32; - bochs->stride = mode->hdisplay * (bochs->bpp / 8); - bochs->yres_virtual = bochs->fb_size / bochs->stride; - - DRM_DEBUG_DRIVER("%dx%d @ %d bpp, vy %d\n", - bochs->xres, bochs->yres, bochs->bpp, - bochs->yres_virtual); - - bochs_hw_blank(bochs, false); - - bochs_dispi_write(bochs, VBE_DISPI_INDEX_ENABLE, 0); - bochs_dispi_write(bochs, VBE_DISPI_INDEX_BPP, bochs->bpp); - bochs_dispi_write(bochs, VBE_DISPI_INDEX_XRES, bochs->xres); - bochs_dispi_write(bochs, VBE_DISPI_INDEX_YRES, bochs->yres); - bochs_dispi_write(bochs, VBE_DISPI_INDEX_BANK, 0); - bochs_dispi_write(bochs, VBE_DISPI_INDEX_VIRT_WIDTH, bochs->xres); - bochs_dispi_write(bochs, VBE_DISPI_INDEX_VIRT_HEIGHT, - bochs->yres_virtual); - bochs_dispi_write(bochs, VBE_DISPI_INDEX_X_OFFSET, 0); - bochs_dispi_write(bochs, VBE_DISPI_INDEX_Y_OFFSET, 0); - - bochs_dispi_write(bochs, VBE_DISPI_INDEX_ENABLE, - VBE_DISPI_ENABLED | VBE_DISPI_LFB_ENABLED); - - drm_dev_exit(idx); -} - -void bochs_hw_setformat(struct bochs_device *bochs, - const struct drm_format_info *format) -{ - int idx; - - if (!drm_dev_enter(bochs->dev, &idx)) - return; - - DRM_DEBUG_DRIVER("format %c%c%c%c\n", - (format->format >> 0) & 0xff, - (format->format >> 8) & 0xff, - (format->format >> 16) & 0xff, - (format->format >> 24) & 0xff); - - switch (format->format) { - case DRM_FORMAT_XRGB8888: - bochs_hw_set_little_endian(bochs); - break; - case DRM_FORMAT_BGRX8888: - bochs_hw_set_big_endian(bochs); - break; - default: - /* should not happen */ - DRM_ERROR("%s: Huh? Got framebuffer format 0x%x", - __func__, format->format); - break; - } - - drm_dev_exit(idx); -} - -void bochs_hw_setbase(struct bochs_device *bochs, - int x, int y, int stride, u64 addr) -{ - unsigned long offset; - unsigned int vx, vy, vwidth, idx; - - if (!drm_dev_enter(bochs->dev, &idx)) - return; - - bochs->stride = stride; - offset = (unsigned long)addr + - y * bochs->stride + - x * (bochs->bpp / 8); - vy = offset / bochs->stride; - vx = (offset % bochs->stride) * 8 / bochs->bpp; - vwidth = stride * 8 / bochs->bpp; - - DRM_DEBUG_DRIVER("x %d, y %d, addr %llx -> offset %lx, vx %d, vy %d\n", - x, y, addr, offset, vx, vy); - bochs_dispi_write(bochs, VBE_DISPI_INDEX_VIRT_WIDTH, vwidth); - bochs_dispi_write(bochs, VBE_DISPI_INDEX_X_OFFSET, vx); - bochs_dispi_write(bochs, VBE_DISPI_INDEX_Y_OFFSET, vy); - - drm_dev_exit(idx); -} diff --git a/drivers/gpu/drm/bochs/bochs_kms.c b/drivers/gpu/drm/bochs/bochs_kms.c deleted file mode 100644 index 99410e77d51a..000000000000 --- a/drivers/gpu/drm/bochs/bochs_kms.c +++ /dev/null @@ -1,178 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - */ - -#include <linux/moduleparam.h> - -#include <drm/drm_atomic_helper.h> -#include <drm/drm_gem_framebuffer_helper.h> -#include <drm/drm_probe_helper.h> - -#include "bochs.h" - -static int defx = 1024; -static int defy = 768; - -module_param(defx, int, 0444); -module_param(defy, int, 0444); -MODULE_PARM_DESC(defx, "default x resolution"); -MODULE_PARM_DESC(defy, "default y resolution"); - -/* ---------------------------------------------------------------------- */ - -static const uint32_t bochs_formats[] = { - DRM_FORMAT_XRGB8888, - DRM_FORMAT_BGRX8888, -}; - -static void bochs_plane_update(struct bochs_device *bochs, - struct drm_plane_state *state) -{ - struct drm_gem_vram_object *gbo; - s64 gpu_addr; - - if (!state->fb || !bochs->stride) - return; - - gbo = drm_gem_vram_of_gem(state->fb->obj[0]); - gpu_addr = drm_gem_vram_offset(gbo); - if (WARN_ON_ONCE(gpu_addr < 0)) - return; /* Bug: we didn't pin the BO to VRAM in prepare_fb. */ - - bochs_hw_setbase(bochs, - state->crtc_x, - state->crtc_y, - state->fb->pitches[0], - state->fb->offsets[0] + gpu_addr); - bochs_hw_setformat(bochs, state->fb->format); -} - -static void bochs_pipe_enable(struct drm_simple_display_pipe *pipe, - struct drm_crtc_state *crtc_state, - struct drm_plane_state *plane_state) -{ - struct bochs_device *bochs = pipe->crtc.dev->dev_private; - - bochs_hw_setmode(bochs, &crtc_state->mode); - bochs_plane_update(bochs, plane_state); -} - -static void bochs_pipe_disable(struct drm_simple_display_pipe *pipe) -{ - struct bochs_device *bochs = pipe->crtc.dev->dev_private; - - bochs_hw_blank(bochs, true); -} - -static void bochs_pipe_update(struct drm_simple_display_pipe *pipe, - struct drm_plane_state *old_state) -{ - struct bochs_device *bochs = pipe->crtc.dev->dev_private; - - bochs_plane_update(bochs, pipe->plane.state); -} - -static const struct drm_simple_display_pipe_funcs bochs_pipe_funcs = { - .enable = bochs_pipe_enable, - .disable = bochs_pipe_disable, - .update = bochs_pipe_update, - .prepare_fb = drm_gem_vram_simple_display_pipe_prepare_fb, - .cleanup_fb = drm_gem_vram_simple_display_pipe_cleanup_fb, -}; - -static int bochs_connector_get_modes(struct drm_connector *connector) -{ - struct bochs_device *bochs = - container_of(connector, struct bochs_device, connector); - int count = 0; - - if (bochs->edid) - count = drm_add_edid_modes(connector, bochs->edid); - - if (!count) { - count = drm_add_modes_noedid(connector, 8192, 8192); - drm_set_preferred_mode(connector, defx, defy); - } - return count; -} - -static const struct drm_connector_helper_funcs bochs_connector_connector_helper_funcs = { - .get_modes = bochs_connector_get_modes, -}; - -static const struct drm_connector_funcs bochs_connector_connector_funcs = { - .fill_modes = drm_helper_probe_single_connector_modes, - .destroy = drm_connector_cleanup, - .reset = drm_atomic_helper_connector_reset, - .atomic_duplicate_state = drm_atomic_helper_connector_duplicate_state, - .atomic_destroy_state = drm_atomic_helper_connector_destroy_state, -}; - -static void bochs_connector_init(struct drm_device *dev) -{ - struct bochs_device *bochs = dev->dev_private; - struct drm_connector *connector = &bochs->connector; - - drm_connector_init(dev, connector, &bochs_connector_connector_funcs, - DRM_MODE_CONNECTOR_VIRTUAL); - drm_connector_helper_add(connector, - &bochs_connector_connector_helper_funcs); - - bochs_hw_load_edid(bochs); - if (bochs->edid) { - DRM_INFO("Found EDID data blob.\n"); - drm_connector_attach_edid_property(connector); - drm_connector_update_edid_property(connector, bochs->edid); - } -} - -static struct drm_framebuffer * -bochs_gem_fb_create(struct drm_device *dev, struct drm_file *file, - const struct drm_mode_fb_cmd2 *mode_cmd) -{ - if (mode_cmd->pixel_format != DRM_FORMAT_XRGB8888 && - mode_cmd->pixel_format != DRM_FORMAT_BGRX8888) - return ERR_PTR(-EINVAL); - - return drm_gem_fb_create(dev, file, mode_cmd); -} - -const struct drm_mode_config_funcs bochs_mode_funcs = { - .fb_create = bochs_gem_fb_create, - .mode_valid = drm_vram_helper_mode_valid, - .atomic_check = drm_atomic_helper_check, - .atomic_commit = drm_atomic_helper_commit, -}; - -int bochs_kms_init(struct bochs_device *bochs) -{ - int ret; - - ret = drmm_mode_config_init(bochs->dev); - if (ret) - return ret; - - bochs->dev->mode_config.max_width = 8192; - bochs->dev->mode_config.max_height = 8192; - - bochs->dev->mode_config.fb_base = bochs->fb_base; - bochs->dev->mode_config.preferred_depth = 24; - bochs->dev->mode_config.prefer_shadow = 0; - bochs->dev->mode_config.prefer_shadow_fbdev = 1; - bochs->dev->mode_config.quirk_addfb_prefer_host_byte_order = true; - - bochs->dev->mode_config.funcs = &bochs_mode_funcs; - - bochs_connector_init(bochs->dev); - drm_simple_display_pipe_init(bochs->dev, - &bochs->pipe, - &bochs_pipe_funcs, - bochs_formats, - ARRAY_SIZE(bochs_formats), - NULL, - &bochs->connector); - - drm_mode_config_reset(bochs->dev); - - return 0; -} diff --git a/drivers/gpu/drm/bochs/bochs_mm.c b/drivers/gpu/drm/bochs/bochs_mm.c deleted file mode 100644 index 1b74f530b07c..000000000000 --- a/drivers/gpu/drm/bochs/bochs_mm.c +++ /dev/null @@ -1,24 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - */ - -#include "bochs.h" - -/* ---------------------------------------------------------------------- */ - -int bochs_mm_init(struct bochs_device *bochs) -{ - struct drm_vram_mm *vmm; - - vmm = drm_vram_helper_alloc_mm(bochs->dev, bochs->fb_base, - bochs->fb_size); - return PTR_ERR_OR_ZERO(vmm); -} - -void bochs_mm_fini(struct bochs_device *bochs) -{ - if (!bochs->dev->vram_mm) - return; - - drm_vram_helper_release_mm(bochs->dev); -} diff --git a/drivers/gpu/drm/bridge/Kconfig b/drivers/gpu/drm/bridge/Kconfig index 85b673613687..431b6e12a81f 100644 --- a/drivers/gpu/drm/bridge/Kconfig +++ b/drivers/gpu/drm/bridge/Kconfig @@ -303,6 +303,7 @@ config DRM_TI_SN65DSI86 select DRM_PANEL select DRM_MIPI_DSI select AUXILIARY_BUS + select DRM_DP_AUX_BUS help Texas Instruments SN65DSI86 DSI to eDP Bridge driver diff --git a/drivers/gpu/drm/bridge/adv7511/adv7533.c b/drivers/gpu/drm/bridge/adv7511/adv7533.c index aa19d5a40e31..59d718bde8c4 100644 --- a/drivers/gpu/drm/bridge/adv7511/adv7533.c +++ b/drivers/gpu/drm/bridge/adv7511/adv7533.c @@ -165,7 +165,7 @@ int adv7533_attach_dsi(struct adv7511 *adv) dsi->lanes = adv->num_dsi_lanes; dsi->format = MIPI_DSI_FMT_RGB888; dsi->mode_flags = MIPI_DSI_MODE_VIDEO | MIPI_DSI_MODE_VIDEO_SYNC_PULSE | - MIPI_DSI_MODE_EOT_PACKET | MIPI_DSI_MODE_VIDEO_HSE; + MIPI_DSI_MODE_NO_EOT_PACKET | MIPI_DSI_MODE_VIDEO_HSE; ret = mipi_dsi_attach(dsi); if (ret < 0) { diff --git a/drivers/gpu/drm/bridge/analogix/analogix_dp_core.c b/drivers/gpu/drm/bridge/analogix/analogix_dp_core.c index 550814ca2139..b7d2e4449cfa 100644 --- a/drivers/gpu/drm/bridge/analogix/analogix_dp_core.c +++ b/drivers/gpu/drm/bridge/analogix/analogix_dp_core.c @@ -1583,7 +1583,6 @@ static int analogix_dp_create_bridge(struct drm_device *drm_dev, struct analogix_dp_device *dp) { struct drm_bridge *bridge; - int ret; bridge = devm_kzalloc(drm_dev->dev, sizeof(*bridge), GFP_KERNEL); if (!bridge) { @@ -1596,13 +1595,7 @@ static int analogix_dp_create_bridge(struct drm_device *drm_dev, bridge->driver_private = dp; bridge->funcs = &analogix_dp_bridge_funcs; - ret = drm_bridge_attach(dp->encoder, bridge, NULL, 0); - if (ret) { - DRM_ERROR("failed to attach drm bridge\n"); - return -EINVAL; - } - - return 0; + return drm_bridge_attach(dp->encoder, bridge, NULL, 0); } static int analogix_dp_dt_parse_pdata(struct analogix_dp_device *dp) diff --git a/drivers/gpu/drm/bridge/analogix/anx7625.c b/drivers/gpu/drm/bridge/analogix/anx7625.c index 7519b7a0f29d..14d73fb1dd15 100644 --- a/drivers/gpu/drm/bridge/analogix/anx7625.c +++ b/drivers/gpu/drm/bridge/analogix/anx7625.c @@ -384,6 +384,25 @@ static int anx7625_odfc_config(struct anx7625_data *ctx, return ret; } +/* + * The MIPI source video data exist large variation (e.g. 59Hz ~ 61Hz), + * anx7625 defined K ratio for matching MIPI input video clock and + * DP output video clock. Increase K value can match bigger video data + * variation. IVO panel has small variation than DP CTS spec, need + * decrease the K value. + */ +static int anx7625_set_k_value(struct anx7625_data *ctx) +{ + struct edid *edid = (struct edid *)ctx->slimport_edid_p.edid_raw_data; + + if (edid->mfg_id[0] == IVO_MID0 && edid->mfg_id[1] == IVO_MID1) + return anx7625_reg_write(ctx, ctx->i2c.rx_p1_client, + MIPI_DIGITAL_ADJ_1, 0x3B); + + return anx7625_reg_write(ctx, ctx->i2c.rx_p1_client, + MIPI_DIGITAL_ADJ_1, 0x3D); +} + static int anx7625_dsi_video_timing_config(struct anx7625_data *ctx) { struct device *dev = &ctx->client->dev; @@ -470,9 +489,8 @@ static int anx7625_dsi_video_timing_config(struct anx7625_data *ctx) MIPI_PLL_N_NUM_15_8, (n >> 8) & 0xff); ret |= anx7625_reg_write(ctx, ctx->i2c.rx_p1_client, MIPI_PLL_N_NUM_7_0, (n & 0xff)); - /* Diff */ - ret |= anx7625_reg_write(ctx, ctx->i2c.rx_p1_client, - MIPI_DIGITAL_ADJ_1, 0x3D); + + anx7625_set_k_value(ctx); ret |= anx7625_odfc_config(ctx, post_divider - 1); @@ -1307,7 +1325,7 @@ static int anx7625_attach_dsi(struct anx7625_data *ctx) dsi->format = MIPI_DSI_FMT_RGB888; dsi->mode_flags = MIPI_DSI_MODE_VIDEO | MIPI_DSI_MODE_VIDEO_SYNC_PULSE | - MIPI_DSI_MODE_EOT_PACKET | + MIPI_DSI_MODE_NO_EOT_PACKET | MIPI_DSI_MODE_VIDEO_HSE; if (mipi_dsi_attach(dsi) < 0) { @@ -1359,11 +1377,8 @@ static int anx7625_bridge_attach(struct drm_bridge *bridge, err = drm_bridge_attach(bridge->encoder, ctx->pdata.panel_bridge, &ctx->bridge, flags); - if (err) { - DRM_DEV_ERROR(dev, - "Fail to attach panel bridge: %d\n", err); + if (err) return err; - } } ctx->bridge_attached = 1; @@ -1730,7 +1745,6 @@ static int __maybe_unused anx7625_suspend(struct device *dev) if (!pm_runtime_enabled(dev) || !pm_runtime_suspended(dev)) { anx7625_runtime_pm_suspend(dev); disable_irq(ctx->pdata.intp_irq); - flush_workqueue(ctx->workqueue); } return 0; @@ -1790,7 +1804,8 @@ static int anx7625_i2c_probe(struct i2c_client *client, platform->pdata.intp_irq = client->irq; if (platform->pdata.intp_irq) { INIT_WORK(&platform->work, anx7625_work_func); - platform->workqueue = create_workqueue("anx7625_work"); + platform->workqueue = alloc_workqueue("anx7625_work", + WQ_FREEZABLE | WQ_MEM_RECLAIM, 1); if (!platform->workqueue) { DRM_DEV_ERROR(dev, "fail to create work queue\n"); ret = -ENOMEM; @@ -1874,6 +1889,7 @@ static const struct of_device_id anx_match_table[] = { {.compatible = "analogix,anx7625",}, {}, }; +MODULE_DEVICE_TABLE(of, anx_match_table); static struct i2c_driver anx7625_driver = { .driver = { diff --git a/drivers/gpu/drm/bridge/analogix/anx7625.h b/drivers/gpu/drm/bridge/analogix/anx7625.h index 034c3840028f..6dcf64c703f9 100644 --- a/drivers/gpu/drm/bridge/analogix/anx7625.h +++ b/drivers/gpu/drm/bridge/analogix/anx7625.h @@ -210,7 +210,9 @@ #define MIPI_VIDEO_STABLE_CNT 0x0A #define MIPI_LANE_CTRL_10 0x0F -#define MIPI_DIGITAL_ADJ_1 0x1B +#define MIPI_DIGITAL_ADJ_1 0x1B +#define IVO_MID0 0x26 +#define IVO_MID1 0xCF #define MIPI_PLL_M_NUM_23_16 0x1E #define MIPI_PLL_M_NUM_15_8 0x1F diff --git a/drivers/gpu/drm/bridge/cdns-dsi.c b/drivers/gpu/drm/bridge/cdns-dsi.c index b31281f76117..e6e331071a00 100644 --- a/drivers/gpu/drm/bridge/cdns-dsi.c +++ b/drivers/gpu/drm/bridge/cdns-dsi.c @@ -829,7 +829,7 @@ static void cdns_dsi_bridge_enable(struct drm_bridge *bridge) tmp = DIV_ROUND_UP(dsi_cfg.htotal, nlanes) - DIV_ROUND_UP(dsi_cfg.hsa, nlanes); - if (!(output->dev->mode_flags & MIPI_DSI_MODE_EOT_PACKET)) + if (!(output->dev->mode_flags & MIPI_DSI_MODE_NO_EOT_PACKET)) tmp -= DIV_ROUND_UP(DSI_EOT_PKT_SIZE, nlanes); tx_byte_period = DIV_ROUND_DOWN_ULL((u64)NSEC_PER_SEC * 8, @@ -902,7 +902,7 @@ static void cdns_dsi_bridge_enable(struct drm_bridge *bridge) tmp = readl(dsi->regs + MCTL_MAIN_DATA_CTL); tmp &= ~(IF_VID_SELECT_MASK | HOST_EOT_GEN | IF_VID_MODE); - if (!(output->dev->mode_flags & MIPI_DSI_MODE_EOT_PACKET)) + if (!(output->dev->mode_flags & MIPI_DSI_MODE_NO_EOT_PACKET)) tmp |= HOST_EOT_GEN; if (output->dev->mode_flags & MIPI_DSI_MODE_VIDEO) diff --git a/drivers/gpu/drm/bridge/ite-it66121.c b/drivers/gpu/drm/bridge/ite-it66121.c index 7149ed40af83..2f2a09adb4bc 100644 --- a/drivers/gpu/drm/bridge/ite-it66121.c +++ b/drivers/gpu/drm/bridge/ite-it66121.c @@ -536,6 +536,8 @@ static int it66121_bridge_attach(struct drm_bridge *bridge, return -EINVAL; ret = drm_bridge_attach(bridge->encoder, ctx->next_bridge, bridge, flags); + if (ret) + return ret; ret = regmap_write_bits(ctx->regmap, IT66121_CLK_BANK_REG, IT66121_CLK_BANK_PWROFF_RCLK, 0); diff --git a/drivers/gpu/drm/bridge/lontium-lt8912b.c b/drivers/gpu/drm/bridge/lontium-lt8912b.c index 76c720b535fb..1b0c7eaf6c84 100644 --- a/drivers/gpu/drm/bridge/lontium-lt8912b.c +++ b/drivers/gpu/drm/bridge/lontium-lt8912b.c @@ -487,7 +487,7 @@ static int lt8912_attach_dsi(struct lt8912 *lt) dsi->mode_flags = MIPI_DSI_MODE_VIDEO | MIPI_DSI_MODE_VIDEO_BURST | MIPI_DSI_MODE_LPM | - MIPI_DSI_MODE_EOT_PACKET; + MIPI_DSI_MODE_NO_EOT_PACKET; ret = mipi_dsi_attach(dsi); if (ret < 0) { diff --git a/drivers/gpu/drm/bridge/nwl-dsi.c b/drivers/gpu/drm/bridge/nwl-dsi.c index 873995f0a741..ed8ac5059cd2 100644 --- a/drivers/gpu/drm/bridge/nwl-dsi.c +++ b/drivers/gpu/drm/bridge/nwl-dsi.c @@ -48,12 +48,6 @@ enum transfer_direction { #define NWL_DSI_ENDPOINT_LCDIF 0 #define NWL_DSI_ENDPOINT_DCSS 1 -struct nwl_dsi_plat_clk_config { - const char *id; - struct clk *clk; - bool present; -}; - struct nwl_dsi_transfer { const struct mipi_dsi_msg *msg; struct mipi_dsi_packet packet; @@ -196,7 +190,7 @@ static u32 ps2bc(struct nwl_dsi *dsi, unsigned long long ps) u32 bpp = mipi_dsi_pixel_format_to_bpp(dsi->format); return DIV64_U64_ROUND_UP(ps * dsi->mode.clock * bpp, - dsi->lanes * 8 * NSEC_PER_SEC); + dsi->lanes * 8ULL * NSEC_PER_SEC); } /* diff --git a/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c b/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c index e7c7c9b9c646..f08d0fded61f 100644 --- a/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c +++ b/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c @@ -143,6 +143,7 @@ struct dw_hdmi_phy_data { struct dw_hdmi { struct drm_connector connector; struct drm_bridge bridge; + struct drm_bridge *next_bridge; unsigned int version; @@ -2775,7 +2776,8 @@ static int dw_hdmi_bridge_attach(struct drm_bridge *bridge, struct dw_hdmi *hdmi = bridge->driver_private; if (flags & DRM_BRIDGE_ATTACH_NO_CONNECTOR) - return 0; + return drm_bridge_attach(bridge->encoder, hdmi->next_bridge, + bridge, flags); return dw_hdmi_connector_create(hdmi); } @@ -3160,6 +3162,52 @@ static void dw_hdmi_init_hw(struct dw_hdmi *hdmi) /* ----------------------------------------------------------------------------- * Probe/remove API, used from platforms based on the DRM bridge API. */ + +static int dw_hdmi_parse_dt(struct dw_hdmi *hdmi) +{ + struct device_node *endpoint; + struct device_node *remote; + + if (!hdmi->plat_data->output_port) + return 0; + + endpoint = of_graph_get_endpoint_by_regs(hdmi->dev->of_node, + hdmi->plat_data->output_port, + -1); + if (!endpoint) { + /* + * On platforms whose bindings don't make the output port + * mandatory (such as Rockchip) the plat_data->output_port + * field isn't set, so it's safe to make this a fatal error. + */ + dev_err(hdmi->dev, "Missing endpoint in port@%u\n", + hdmi->plat_data->output_port); + return -ENODEV; + } + + remote = of_graph_get_remote_port_parent(endpoint); + of_node_put(endpoint); + if (!remote) { + dev_err(hdmi->dev, "Endpoint in port@%u unconnected\n", + hdmi->plat_data->output_port); + return -ENODEV; + } + + if (!of_device_is_available(remote)) { + dev_err(hdmi->dev, "port@%u remote device is disabled\n", + hdmi->plat_data->output_port); + of_node_put(remote); + return -ENODEV; + } + + hdmi->next_bridge = of_drm_find_bridge(remote); + of_node_put(remote); + if (!hdmi->next_bridge) + return -EPROBE_DEFER; + + return 0; +} + struct dw_hdmi *dw_hdmi_probe(struct platform_device *pdev, const struct dw_hdmi_plat_data *plat_data) { @@ -3196,6 +3244,10 @@ struct dw_hdmi *dw_hdmi_probe(struct platform_device *pdev, mutex_init(&hdmi->cec_notifier_mutex); spin_lock_init(&hdmi->audio_lock); + ret = dw_hdmi_parse_dt(hdmi); + if (ret < 0) + return ERR_PTR(ret); + ddc_node = of_parse_phandle(np, "ddc-i2c-bus", 0); if (ddc_node) { hdmi->ddc = of_get_i2c_adapter_by_node(ddc_node); @@ -3474,7 +3526,6 @@ struct dw_hdmi *dw_hdmi_bind(struct platform_device *pdev, ret = drm_bridge_attach(encoder, &hdmi->bridge, NULL, 0); if (ret) { dw_hdmi_remove(hdmi); - DRM_ERROR("Failed to initialize bridge with drm\n"); return ERR_PTR(ret); } diff --git a/drivers/gpu/drm/bridge/synopsys/dw-mipi-dsi.c b/drivers/gpu/drm/bridge/synopsys/dw-mipi-dsi.c index 6b268f9445b3..e44e18a0112a 100644 --- a/drivers/gpu/drm/bridge/synopsys/dw-mipi-dsi.c +++ b/drivers/gpu/drm/bridge/synopsys/dw-mipi-dsi.c @@ -246,6 +246,7 @@ struct dw_mipi_dsi { struct clk *pclk; + bool device_found; unsigned int lane_mbps; /* per lane */ u32 channel; u32 lanes; @@ -309,13 +310,37 @@ static inline u32 dsi_read(struct dw_mipi_dsi *dsi, u32 reg) return readl(dsi->base + reg); } +static int dw_mipi_dsi_panel_or_bridge(struct dw_mipi_dsi *dsi, + struct device_node *node) +{ + struct drm_bridge *bridge; + struct drm_panel *panel; + int ret; + + ret = drm_of_find_panel_or_bridge(node, 1, 0, &panel, &bridge); + if (ret) + return ret; + + if (panel) { + bridge = drm_panel_bridge_add_typed(panel, + DRM_MODE_CONNECTOR_DSI); + if (IS_ERR(bridge)) + return PTR_ERR(bridge); + } + + dsi->panel_bridge = bridge; + + if (!dsi->panel_bridge) + return -EPROBE_DEFER; + + return 0; +} + static int dw_mipi_dsi_host_attach(struct mipi_dsi_host *host, struct mipi_dsi_device *device) { struct dw_mipi_dsi *dsi = host_to_dsi(host); const struct dw_mipi_dsi_plat_data *pdata = dsi->plat_data; - struct drm_bridge *bridge; - struct drm_panel *panel; int ret; if (device->lanes > dsi->plat_data->max_data_lanes) { @@ -329,22 +354,14 @@ static int dw_mipi_dsi_host_attach(struct mipi_dsi_host *host, dsi->format = device->format; dsi->mode_flags = device->mode_flags; - ret = drm_of_find_panel_or_bridge(host->dev->of_node, 1, 0, - &panel, &bridge); - if (ret) - return ret; + if (!dsi->device_found) { + ret = dw_mipi_dsi_panel_or_bridge(dsi, host->dev->of_node); + if (ret) + return ret; - if (panel) { - bridge = drm_panel_bridge_add_typed(panel, - DRM_MODE_CONNECTOR_DSI); - if (IS_ERR(bridge)) - return PTR_ERR(bridge); + dsi->device_found = true; } - dsi->panel_bridge = bridge; - - drm_bridge_add(&dsi->bridge); - if (pdata->host_ops && pdata->host_ops->attach) { ret = pdata->host_ops->attach(pdata->priv_data, device); if (ret < 0) @@ -999,6 +1016,16 @@ static int dw_mipi_dsi_bridge_attach(struct drm_bridge *bridge, /* Set the encoder type as caller does not know it */ bridge->encoder->encoder_type = DRM_MODE_ENCODER_DSI; + if (!dsi->device_found) { + int ret; + + ret = dw_mipi_dsi_panel_or_bridge(dsi, dsi->dev->of_node); + if (ret) + return ret; + + dsi->device_found = true; + } + /* Attach the panel-bridge to the dsi bridge */ return drm_bridge_attach(bridge->encoder, dsi->panel_bridge, bridge, flags); @@ -1181,6 +1208,7 @@ __dw_mipi_dsi_probe(struct platform_device *pdev, #ifdef CONFIG_OF dsi->bridge.of_node = pdev->dev.of_node; #endif + drm_bridge_add(&dsi->bridge); return dsi; } @@ -1229,15 +1257,7 @@ EXPORT_SYMBOL_GPL(dw_mipi_dsi_remove); */ int dw_mipi_dsi_bind(struct dw_mipi_dsi *dsi, struct drm_encoder *encoder) { - int ret; - - ret = drm_bridge_attach(encoder, &dsi->bridge, NULL, 0); - if (ret) { - DRM_ERROR("Failed to initialize bridge with drm\n"); - return ret; - } - - return ret; + return drm_bridge_attach(encoder, &dsi->bridge, NULL, 0); } EXPORT_SYMBOL_GPL(dw_mipi_dsi_bind); diff --git a/drivers/gpu/drm/bridge/tc358768.c b/drivers/gpu/drm/bridge/tc358768.c index 8ed8302d6bbb..a3db532bbdd1 100644 --- a/drivers/gpu/drm/bridge/tc358768.c +++ b/drivers/gpu/drm/bridge/tc358768.c @@ -291,7 +291,7 @@ static int tc358768_calc_pll(struct tc358768_priv *priv, const struct drm_display_mode *mode, bool verify_only) { - const u32 frs_limits[] = { + static const u32 frs_limits[] = { 1000000000, 500000000, 250000000, @@ -825,7 +825,7 @@ static void tc358768_bridge_pre_enable(struct drm_bridge *bridge) if (!(dsi_dev->mode_flags & MIPI_DSI_CLOCK_NON_CONTINUOUS)) val |= TC358768_DSI_CONTROL_HSCKMD; - if (dsi_dev->mode_flags & MIPI_DSI_MODE_EOT_PACKET) + if (dsi_dev->mode_flags & MIPI_DSI_MODE_NO_EOT_PACKET) val |= TC358768_DSI_CONTROL_EOTDIS; tc358768_write(priv, TC358768_DSI_CONFW, val); diff --git a/drivers/gpu/drm/bridge/ti-sn65dsi83.c b/drivers/gpu/drm/bridge/ti-sn65dsi83.c index 750f2172ef08..a32f70bc68ea 100644 --- a/drivers/gpu/drm/bridge/ti-sn65dsi83.c +++ b/drivers/gpu/drm/bridge/ti-sn65dsi83.c @@ -137,7 +137,6 @@ enum sn65dsi83_model { struct sn65dsi83 { struct drm_bridge bridge; - struct drm_display_mode mode; struct device *dev; struct regmap *regmap; struct device_node *host_node; @@ -147,8 +146,6 @@ struct sn65dsi83 { int dsi_lanes; bool lvds_dual_link; bool lvds_dual_link_even_odd_swap; - bool lvds_format_24bpp; - bool lvds_format_jeida; }; static const struct regmap_range sn65dsi83_readable_ranges[] = { @@ -291,7 +288,8 @@ err_dsi_attach: return ret; } -static void sn65dsi83_pre_enable(struct drm_bridge *bridge) +static void sn65dsi83_atomic_pre_enable(struct drm_bridge *bridge, + struct drm_bridge_state *old_bridge_state) { struct sn65dsi83 *ctx = bridge_to_sn65dsi83(bridge); @@ -306,7 +304,8 @@ static void sn65dsi83_pre_enable(struct drm_bridge *bridge) usleep_range(1000, 1100); } -static u8 sn65dsi83_get_lvds_range(struct sn65dsi83 *ctx) +static u8 sn65dsi83_get_lvds_range(struct sn65dsi83 *ctx, + const struct drm_display_mode *mode) { /* * The encoding of the LVDS_CLK_RANGE is as follows: @@ -322,7 +321,7 @@ static u8 sn65dsi83_get_lvds_range(struct sn65dsi83 *ctx) * the clock to 25..154 MHz, the range calculation can be simplified * as follows: */ - int mode_clock = ctx->mode.clock; + int mode_clock = mode->clock; if (ctx->lvds_dual_link) mode_clock /= 2; @@ -330,7 +329,8 @@ static u8 sn65dsi83_get_lvds_range(struct sn65dsi83 *ctx) return (mode_clock - 12500) / 25000; } -static u8 sn65dsi83_get_dsi_range(struct sn65dsi83 *ctx) +static u8 sn65dsi83_get_dsi_range(struct sn65dsi83 *ctx, + const struct drm_display_mode *mode) { /* * The encoding of the CHA_DSI_CLK_RANGE is as follows: @@ -346,7 +346,7 @@ static u8 sn65dsi83_get_dsi_range(struct sn65dsi83 *ctx) * DSI_CLK = mode clock * bpp / dsi_data_lanes / 2 * the 2 is there because the bus is DDR. */ - return DIV_ROUND_UP(clamp((unsigned int)ctx->mode.clock * + return DIV_ROUND_UP(clamp((unsigned int)mode->clock * mipi_dsi_pixel_format_to_bpp(ctx->dsi->format) / ctx->dsi_lanes / 2, 40000U, 500000U), 5000U); } @@ -364,23 +364,73 @@ static u8 sn65dsi83_get_dsi_div(struct sn65dsi83 *ctx) return dsi_div - 1; } -static void sn65dsi83_enable(struct drm_bridge *bridge) +static void sn65dsi83_atomic_enable(struct drm_bridge *bridge, + struct drm_bridge_state *old_bridge_state) { struct sn65dsi83 *ctx = bridge_to_sn65dsi83(bridge); + struct drm_atomic_state *state = old_bridge_state->base.state; + const struct drm_bridge_state *bridge_state; + const struct drm_crtc_state *crtc_state; + const struct drm_display_mode *mode; + struct drm_connector *connector; + struct drm_crtc *crtc; + bool lvds_format_24bpp; + bool lvds_format_jeida; unsigned int pval; + __le16 le16val; u16 val; int ret; + /* Get the LVDS format from the bridge state. */ + bridge_state = drm_atomic_get_new_bridge_state(state, bridge); + + switch (bridge_state->output_bus_cfg.format) { + case MEDIA_BUS_FMT_RGB666_1X7X3_SPWG: + lvds_format_24bpp = false; + lvds_format_jeida = true; + break; + case MEDIA_BUS_FMT_RGB888_1X7X4_JEIDA: + lvds_format_24bpp = true; + lvds_format_jeida = true; + break; + case MEDIA_BUS_FMT_RGB888_1X7X4_SPWG: + lvds_format_24bpp = true; + lvds_format_jeida = false; + break; + default: + /* + * Some bridges still don't set the correct + * LVDS bus pixel format, use SPWG24 default + * format until those are fixed. + */ + lvds_format_24bpp = true; + lvds_format_jeida = false; + dev_warn(ctx->dev, + "Unsupported LVDS bus format 0x%04x, please check output bridge driver. Falling back to SPWG24.\n", + bridge_state->output_bus_cfg.format); + break; + } + + /* + * Retrieve the CRTC adjusted mode. This requires a little dance to go + * from the bridge to the encoder, to the connector and to the CRTC. + */ + connector = drm_atomic_get_new_connector_for_encoder(state, + bridge->encoder); + crtc = drm_atomic_get_new_connector_state(state, connector)->crtc; + crtc_state = drm_atomic_get_new_crtc_state(state, crtc); + mode = &crtc_state->adjusted_mode; + /* Clear reset, disable PLL */ regmap_write(ctx->regmap, REG_RC_RESET, 0x00); regmap_write(ctx->regmap, REG_RC_PLL_EN, 0x00); /* Reference clock derived from DSI link clock. */ regmap_write(ctx->regmap, REG_RC_LVDS_PLL, - REG_RC_LVDS_PLL_LVDS_CLK_RANGE(sn65dsi83_get_lvds_range(ctx)) | + REG_RC_LVDS_PLL_LVDS_CLK_RANGE(sn65dsi83_get_lvds_range(ctx, mode)) | REG_RC_LVDS_PLL_HS_CLK_SRC_DPHY); regmap_write(ctx->regmap, REG_DSI_CLK, - REG_DSI_CLK_CHA_DSI_CLK_RANGE(sn65dsi83_get_dsi_range(ctx))); + REG_DSI_CLK_CHA_DSI_CLK_RANGE(sn65dsi83_get_dsi_range(ctx, mode))); regmap_write(ctx->regmap, REG_RC_DSI_CLK, REG_RC_DSI_CLK_DSI_CLK_DIVIDER(sn65dsi83_get_dsi_div(ctx))); @@ -394,20 +444,20 @@ static void sn65dsi83_enable(struct drm_bridge *bridge) regmap_write(ctx->regmap, REG_DSI_EQ, 0x00); /* Set up sync signal polarity. */ - val = (ctx->mode.flags & DRM_MODE_FLAG_NHSYNC ? + val = (mode->flags & DRM_MODE_FLAG_NHSYNC ? REG_LVDS_FMT_HS_NEG_POLARITY : 0) | - (ctx->mode.flags & DRM_MODE_FLAG_NVSYNC ? + (mode->flags & DRM_MODE_FLAG_NVSYNC ? REG_LVDS_FMT_VS_NEG_POLARITY : 0); /* Set up bits-per-pixel, 18bpp or 24bpp. */ - if (ctx->lvds_format_24bpp) { + if (lvds_format_24bpp) { val |= REG_LVDS_FMT_CHA_24BPP_MODE; if (ctx->lvds_dual_link) val |= REG_LVDS_FMT_CHB_24BPP_MODE; } /* Set up LVDS format, JEIDA/Format 1 or SPWG/Format 2 */ - if (ctx->lvds_format_jeida) { + if (lvds_format_jeida) { val |= REG_LVDS_FMT_CHA_24BPP_FORMAT1; if (ctx->lvds_dual_link) val |= REG_LVDS_FMT_CHB_24BPP_FORMAT1; @@ -426,29 +476,29 @@ static void sn65dsi83_enable(struct drm_bridge *bridge) REG_LVDS_LANE_CHB_LVDS_TERM); regmap_write(ctx->regmap, REG_LVDS_CM, 0x00); - val = cpu_to_le16(ctx->mode.hdisplay); + le16val = cpu_to_le16(mode->hdisplay); regmap_bulk_write(ctx->regmap, REG_VID_CHA_ACTIVE_LINE_LENGTH_LOW, - &val, 2); - val = cpu_to_le16(ctx->mode.vdisplay); + &le16val, 2); + le16val = cpu_to_le16(mode->vdisplay); regmap_bulk_write(ctx->regmap, REG_VID_CHA_VERTICAL_DISPLAY_SIZE_LOW, - &val, 2); + &le16val, 2); /* 32 + 1 pixel clock to ensure proper operation */ - val = cpu_to_le16(32 + 1); - regmap_bulk_write(ctx->regmap, REG_VID_CHA_SYNC_DELAY_LOW, &val, 2); - val = cpu_to_le16(ctx->mode.hsync_end - ctx->mode.hsync_start); + le16val = cpu_to_le16(32 + 1); + regmap_bulk_write(ctx->regmap, REG_VID_CHA_SYNC_DELAY_LOW, &le16val, 2); + le16val = cpu_to_le16(mode->hsync_end - mode->hsync_start); regmap_bulk_write(ctx->regmap, REG_VID_CHA_HSYNC_PULSE_WIDTH_LOW, - &val, 2); - val = cpu_to_le16(ctx->mode.vsync_end - ctx->mode.vsync_start); + &le16val, 2); + le16val = cpu_to_le16(mode->vsync_end - mode->vsync_start); regmap_bulk_write(ctx->regmap, REG_VID_CHA_VSYNC_PULSE_WIDTH_LOW, - &val, 2); + &le16val, 2); regmap_write(ctx->regmap, REG_VID_CHA_HORIZONTAL_BACK_PORCH, - ctx->mode.htotal - ctx->mode.hsync_end); + mode->htotal - mode->hsync_end); regmap_write(ctx->regmap, REG_VID_CHA_VERTICAL_BACK_PORCH, - ctx->mode.vtotal - ctx->mode.vsync_end); + mode->vtotal - mode->vsync_end); regmap_write(ctx->regmap, REG_VID_CHA_HORIZONTAL_FRONT_PORCH, - ctx->mode.hsync_start - ctx->mode.hdisplay); + mode->hsync_start - mode->hdisplay); regmap_write(ctx->regmap, REG_VID_CHA_VERTICAL_FRONT_PORCH, - ctx->mode.vsync_start - ctx->mode.vdisplay); + mode->vsync_start - mode->vdisplay); regmap_write(ctx->regmap, REG_VID_CHA_TEST_PATTERN, 0x00); /* Enable PLL */ @@ -472,7 +522,8 @@ static void sn65dsi83_enable(struct drm_bridge *bridge) regmap_write(ctx->regmap, REG_IRQ_STAT, pval); } -static void sn65dsi83_disable(struct drm_bridge *bridge) +static void sn65dsi83_atomic_disable(struct drm_bridge *bridge, + struct drm_bridge_state *old_bridge_state) { struct sn65dsi83 *ctx = bridge_to_sn65dsi83(bridge); @@ -481,7 +532,8 @@ static void sn65dsi83_disable(struct drm_bridge *bridge) regmap_write(ctx->regmap, REG_RC_PLL_EN, 0x00); } -static void sn65dsi83_post_disable(struct drm_bridge *bridge) +static void sn65dsi83_atomic_post_disable(struct drm_bridge *bridge, + struct drm_bridge_state *old_bridge_state) { struct sn65dsi83 *ctx = bridge_to_sn65dsi83(bridge); @@ -503,70 +555,44 @@ sn65dsi83_mode_valid(struct drm_bridge *bridge, return MODE_OK; } -static void sn65dsi83_mode_set(struct drm_bridge *bridge, - const struct drm_display_mode *mode, - const struct drm_display_mode *adj) -{ - struct sn65dsi83 *ctx = bridge_to_sn65dsi83(bridge); +#define MAX_INPUT_SEL_FORMATS 1 - ctx->mode = *adj; -} - -static bool sn65dsi83_mode_fixup(struct drm_bridge *bridge, - const struct drm_display_mode *mode, - struct drm_display_mode *adj) +static u32 * +sn65dsi83_atomic_get_input_bus_fmts(struct drm_bridge *bridge, + struct drm_bridge_state *bridge_state, + struct drm_crtc_state *crtc_state, + struct drm_connector_state *conn_state, + u32 output_fmt, + unsigned int *num_input_fmts) { - struct sn65dsi83 *ctx = bridge_to_sn65dsi83(bridge); - u32 input_bus_format = MEDIA_BUS_FMT_RGB888_1X24; - struct drm_encoder *encoder = bridge->encoder; - struct drm_device *ddev = encoder->dev; - struct drm_connector *connector; + u32 *input_fmts; - /* The DSI format is always RGB888_1X24 */ - list_for_each_entry(connector, &ddev->mode_config.connector_list, head) { - switch (connector->display_info.bus_formats[0]) { - case MEDIA_BUS_FMT_RGB666_1X7X3_SPWG: - ctx->lvds_format_24bpp = false; - ctx->lvds_format_jeida = true; - break; - case MEDIA_BUS_FMT_RGB888_1X7X4_JEIDA: - ctx->lvds_format_24bpp = true; - ctx->lvds_format_jeida = true; - break; - case MEDIA_BUS_FMT_RGB888_1X7X4_SPWG: - ctx->lvds_format_24bpp = true; - ctx->lvds_format_jeida = false; - break; - default: - /* - * Some bridges still don't set the correct - * LVDS bus pixel format, use SPWG24 default - * format until those are fixed. - */ - ctx->lvds_format_24bpp = true; - ctx->lvds_format_jeida = false; - dev_warn(ctx->dev, - "Unsupported LVDS bus format 0x%04x, please check output bridge driver. Falling back to SPWG24.\n", - connector->display_info.bus_formats[0]); - break; - } + *num_input_fmts = 0; - drm_display_info_set_bus_formats(&connector->display_info, - &input_bus_format, 1); - } + input_fmts = kcalloc(MAX_INPUT_SEL_FORMATS, sizeof(*input_fmts), + GFP_KERNEL); + if (!input_fmts) + return NULL; + + /* This is the DSI-end bus format */ + input_fmts[0] = MEDIA_BUS_FMT_RGB888_1X24; + *num_input_fmts = 1; - return true; + return input_fmts; } static const struct drm_bridge_funcs sn65dsi83_funcs = { - .attach = sn65dsi83_attach, - .pre_enable = sn65dsi83_pre_enable, - .enable = sn65dsi83_enable, - .disable = sn65dsi83_disable, - .post_disable = sn65dsi83_post_disable, - .mode_valid = sn65dsi83_mode_valid, - .mode_set = sn65dsi83_mode_set, - .mode_fixup = sn65dsi83_mode_fixup, + .attach = sn65dsi83_attach, + .atomic_pre_enable = sn65dsi83_atomic_pre_enable, + .atomic_enable = sn65dsi83_atomic_enable, + .atomic_disable = sn65dsi83_atomic_disable, + .atomic_post_disable = sn65dsi83_atomic_post_disable, + .mode_valid = sn65dsi83_mode_valid, + + .atomic_duplicate_state = drm_atomic_helper_bridge_duplicate_state, + .atomic_destroy_state = drm_atomic_helper_bridge_destroy_state, + .atomic_reset = drm_atomic_helper_bridge_reset, + .atomic_get_input_bus_fmts = sn65dsi83_atomic_get_input_bus_fmts, }; static int sn65dsi83_parse_dt(struct sn65dsi83 *ctx, enum sn65dsi83_model model) diff --git a/drivers/gpu/drm/bridge/ti-sn65dsi86.c b/drivers/gpu/drm/bridge/ti-sn65dsi86.c index 45a2969afb2b..41d48a393e7f 100644 --- a/drivers/gpu/drm/bridge/ti-sn65dsi86.c +++ b/drivers/gpu/drm/bridge/ti-sn65dsi86.c @@ -23,6 +23,7 @@ #include <drm/drm_atomic.h> #include <drm/drm_atomic_helper.h> #include <drm/drm_bridge.h> +#include <drm/drm_dp_aux_bus.h> #include <drm/drm_dp_helper.h> #include <drm/drm_mipi_dsi.h> #include <drm/drm_of.h> @@ -116,6 +117,7 @@ * struct ti_sn65dsi86 - Platform data for ti-sn65dsi86 driver. * @bridge_aux: AUX-bus sub device for MIPI-to-eDP bridge functionality. * @gpio_aux: AUX-bus sub device for GPIO controller functionality. + * @aux_aux: AUX-bus sub device for eDP AUX channel functionality. * * @dev: Pointer to the top level (i2c) device. * @regmap: Regmap for accessing i2c. @@ -124,9 +126,8 @@ * @connector: Our connector. * @host_node: Remote DSI node. * @dsi: Our MIPI DSI source. - * @edid: Detected EDID of eDP panel. * @refclk: Our reference clock. - * @panel: Our panel. + * @next_bridge: The bridge on the eDP side. * @enable_gpio: The GPIO we toggle to enable the bridge. * @supplies: Data for bulk enabling/disabling our regulators. * @dp_lanes: Count of dp_lanes we're using. @@ -148,17 +149,17 @@ struct ti_sn65dsi86 { struct auxiliary_device bridge_aux; struct auxiliary_device gpio_aux; + struct auxiliary_device aux_aux; struct device *dev; struct regmap *regmap; struct drm_dp_aux aux; struct drm_bridge bridge; struct drm_connector connector; - struct edid *edid; struct device_node *host_node; struct mipi_dsi_device *dsi; struct clk *refclk; - struct drm_panel *panel; + struct drm_bridge *next_bridge; struct gpio_desc *enable_gpio; struct regulator_bulk_data supplies[SN_REGULATOR_SUPPLY_NUM]; int dp_lanes; @@ -306,6 +307,9 @@ static int __maybe_unused ti_sn65dsi86_resume(struct device *dev) return ret; } + /* td2: min 100 us after regulators before enabling the GPIO */ + usleep_range(100, 110); + gpiod_set_value(pdata->enable_gpio, 1); /* @@ -393,35 +397,222 @@ static void ti_sn65dsi86_debugfs_init(struct ti_sn65dsi86 *pdata) debugfs_create_file("status", 0600, debugfs, pdata, &status_fops); } -/* Connector funcs */ -static struct ti_sn65dsi86 * -connector_to_ti_sn65dsi86(struct drm_connector *connector) +/* ----------------------------------------------------------------------------- + * Auxiliary Devices (*not* AUX) + */ + +static void ti_sn65dsi86_uninit_aux(void *data) { - return container_of(connector, struct ti_sn65dsi86, connector); + auxiliary_device_uninit(data); } -static int ti_sn_bridge_connector_get_modes(struct drm_connector *connector) +static void ti_sn65dsi86_delete_aux(void *data) { - struct ti_sn65dsi86 *pdata = connector_to_ti_sn65dsi86(connector); - struct edid *edid = pdata->edid; - int num, ret; + auxiliary_device_delete(data); +} - if (!edid) { - pm_runtime_get_sync(pdata->dev); - edid = pdata->edid = drm_get_edid(connector, &pdata->aux.ddc); - pm_runtime_put_autosuspend(pdata->dev); +/* + * AUX bus docs say that a non-NULL release is mandatory, but it makes no + * sense for the model used here where all of the aux devices are allocated + * in the single shared structure. We'll use this noop as a workaround. + */ +static void ti_sn65dsi86_noop(struct device *dev) {} + +static int ti_sn65dsi86_add_aux_device(struct ti_sn65dsi86 *pdata, + struct auxiliary_device *aux, + const char *name) +{ + struct device *dev = pdata->dev; + int ret; + + aux->name = name; + aux->dev.parent = dev; + aux->dev.release = ti_sn65dsi86_noop; + device_set_of_node_from_dev(&aux->dev, dev); + ret = auxiliary_device_init(aux); + if (ret) + return ret; + ret = devm_add_action_or_reset(dev, ti_sn65dsi86_uninit_aux, aux); + if (ret) + return ret; + + ret = auxiliary_device_add(aux); + if (ret) + return ret; + ret = devm_add_action_or_reset(dev, ti_sn65dsi86_delete_aux, aux); + + return ret; +} + +/* ----------------------------------------------------------------------------- + * AUX Adapter + */ + +static struct ti_sn65dsi86 *aux_to_ti_sn65dsi86(struct drm_dp_aux *aux) +{ + return container_of(aux, struct ti_sn65dsi86, aux); +} + +static ssize_t ti_sn_aux_transfer(struct drm_dp_aux *aux, + struct drm_dp_aux_msg *msg) +{ + struct ti_sn65dsi86 *pdata = aux_to_ti_sn65dsi86(aux); + u32 request = msg->request & ~(DP_AUX_I2C_MOT | DP_AUX_I2C_WRITE_STATUS_UPDATE); + u32 request_val = AUX_CMD_REQ(msg->request); + u8 *buf = msg->buffer; + unsigned int len = msg->size; + unsigned int val; + int ret; + u8 addr_len[SN_AUX_LENGTH_REG + 1 - SN_AUX_ADDR_19_16_REG]; + + if (len > SN_AUX_MAX_PAYLOAD_BYTES) + return -EINVAL; + + pm_runtime_get_sync(pdata->dev); + mutex_lock(&pdata->comms_mutex); + + /* + * If someone tries to do a DDC over AUX transaction before pre_enable() + * on a device without a dedicated reference clock then we just can't + * do it. Fail right away. This prevents non-refclk users from reading + * the EDID before enabling the panel but such is life. + */ + if (!pdata->comms_enabled) { + ret = -EIO; + goto exit; + } + + switch (request) { + case DP_AUX_NATIVE_WRITE: + case DP_AUX_I2C_WRITE: + case DP_AUX_NATIVE_READ: + case DP_AUX_I2C_READ: + regmap_write(pdata->regmap, SN_AUX_CMD_REG, request_val); + /* Assume it's good */ + msg->reply = 0; + break; + default: + ret = -EINVAL; + goto exit; + } + + BUILD_BUG_ON(sizeof(addr_len) != sizeof(__be32)); + put_unaligned_be32((msg->address & SN_AUX_ADDR_MASK) << 8 | len, + addr_len); + regmap_bulk_write(pdata->regmap, SN_AUX_ADDR_19_16_REG, addr_len, + ARRAY_SIZE(addr_len)); + + if (request == DP_AUX_NATIVE_WRITE || request == DP_AUX_I2C_WRITE) + regmap_bulk_write(pdata->regmap, SN_AUX_WDATA_REG(0), buf, len); + + /* Clear old status bits before start so we don't get confused */ + regmap_write(pdata->regmap, SN_AUX_CMD_STATUS_REG, + AUX_IRQ_STATUS_NAT_I2C_FAIL | + AUX_IRQ_STATUS_AUX_RPLY_TOUT | + AUX_IRQ_STATUS_AUX_SHORT); + + regmap_write(pdata->regmap, SN_AUX_CMD_REG, request_val | AUX_CMD_SEND); + + /* Zero delay loop because i2c transactions are slow already */ + ret = regmap_read_poll_timeout(pdata->regmap, SN_AUX_CMD_REG, val, + !(val & AUX_CMD_SEND), 0, 50 * 1000); + if (ret) + goto exit; + + ret = regmap_read(pdata->regmap, SN_AUX_CMD_STATUS_REG, &val); + if (ret) + goto exit; + + if (val & AUX_IRQ_STATUS_AUX_RPLY_TOUT) { + /* + * The hardware tried the message seven times per the DP spec + * but it hit a timeout. We ignore defers here because they're + * handled in hardware. + */ + ret = -ETIMEDOUT; + goto exit; } - if (edid && drm_edid_is_valid(edid)) { - ret = drm_connector_update_edid_property(connector, edid); - if (!ret) { - num = drm_add_edid_modes(connector, edid); - if (num) - return num; + if (val & AUX_IRQ_STATUS_AUX_SHORT) { + ret = regmap_read(pdata->regmap, SN_AUX_LENGTH_REG, &len); + if (ret) + goto exit; + } else if (val & AUX_IRQ_STATUS_NAT_I2C_FAIL) { + switch (request) { + case DP_AUX_I2C_WRITE: + case DP_AUX_I2C_READ: + msg->reply |= DP_AUX_I2C_REPLY_NACK; + break; + case DP_AUX_NATIVE_READ: + case DP_AUX_NATIVE_WRITE: + msg->reply |= DP_AUX_NATIVE_REPLY_NACK; + break; } + len = 0; + goto exit; } - return drm_panel_get_modes(pdata->panel, connector); + if (request != DP_AUX_NATIVE_WRITE && request != DP_AUX_I2C_WRITE && len != 0) + ret = regmap_bulk_read(pdata->regmap, SN_AUX_RDATA_REG(0), buf, len); + +exit: + mutex_unlock(&pdata->comms_mutex); + pm_runtime_mark_last_busy(pdata->dev); + pm_runtime_put_autosuspend(pdata->dev); + + if (ret) + return ret; + return len; +} + +static int ti_sn_aux_probe(struct auxiliary_device *adev, + const struct auxiliary_device_id *id) +{ + struct ti_sn65dsi86 *pdata = dev_get_drvdata(adev->dev.parent); + int ret; + + pdata->aux.name = "ti-sn65dsi86-aux"; + pdata->aux.dev = &adev->dev; + pdata->aux.transfer = ti_sn_aux_transfer; + drm_dp_aux_init(&pdata->aux); + + ret = devm_of_dp_aux_populate_ep_devices(&pdata->aux); + if (ret) + return ret; + + /* + * The eDP to MIPI bridge parts don't work until the AUX channel is + * setup so we don't add it in the main driver probe, we add it now. + */ + return ti_sn65dsi86_add_aux_device(pdata, &pdata->bridge_aux, "bridge"); +} + +static const struct auxiliary_device_id ti_sn_aux_id_table[] = { + { .name = "ti_sn65dsi86.aux", }, + {}, +}; + +static struct auxiliary_driver ti_sn_aux_driver = { + .name = "aux", + .probe = ti_sn_aux_probe, + .id_table = ti_sn_aux_id_table, +}; + +/* ----------------------------------------------------------------------------- + * DRM Connector Operations + */ + +static struct ti_sn65dsi86 * +connector_to_ti_sn65dsi86(struct drm_connector *connector) +{ + return container_of(connector, struct ti_sn65dsi86, connector); +} + +static int ti_sn_bridge_connector_get_modes(struct drm_connector *connector) +{ + struct ti_sn65dsi86 *pdata = connector_to_ti_sn65dsi86(connector); + + return drm_bridge_get_modes(pdata->next_bridge, connector); } static enum drm_mode_status @@ -448,23 +639,32 @@ static const struct drm_connector_funcs ti_sn_bridge_connector_funcs = { .atomic_destroy_state = drm_atomic_helper_connector_destroy_state, }; -static struct ti_sn65dsi86 *bridge_to_ti_sn65dsi86(struct drm_bridge *bridge) +static int ti_sn_bridge_connector_init(struct ti_sn65dsi86 *pdata) { - return container_of(bridge, struct ti_sn65dsi86, bridge); -} + int ret; -static int ti_sn65dsi86_parse_regulators(struct ti_sn65dsi86 *pdata) -{ - unsigned int i; - const char * const ti_sn_bridge_supply_names[] = { - "vcca", "vcc", "vccio", "vpll", - }; + ret = drm_connector_init(pdata->bridge.dev, &pdata->connector, + &ti_sn_bridge_connector_funcs, + DRM_MODE_CONNECTOR_eDP); + if (ret) { + DRM_ERROR("Failed to initialize connector with drm\n"); + return ret; + } - for (i = 0; i < SN_REGULATOR_SUPPLY_NUM; i++) - pdata->supplies[i].supply = ti_sn_bridge_supply_names[i]; + drm_connector_helper_add(&pdata->connector, + &ti_sn_bridge_connector_helper_funcs); + drm_connector_attach_encoder(&pdata->connector, pdata->bridge.encoder); - return devm_regulator_bulk_get(pdata->dev, SN_REGULATOR_SUPPLY_NUM, - pdata->supplies); + return 0; +} + +/*------------------------------------------------------------------------------ + * DRM Bridge + */ + +static struct ti_sn65dsi86 *bridge_to_ti_sn65dsi86(struct drm_bridge *bridge) +{ + return container_of(bridge, struct ti_sn65dsi86, bridge); } static int ti_sn_bridge_attach(struct drm_bridge *bridge, @@ -491,17 +691,9 @@ static int ti_sn_bridge_attach(struct drm_bridge *bridge, return ret; } - ret = drm_connector_init(bridge->dev, &pdata->connector, - &ti_sn_bridge_connector_funcs, - DRM_MODE_CONNECTOR_eDP); - if (ret) { - DRM_ERROR("Failed to initialize connector with drm\n"); + ret = ti_sn_bridge_connector_init(pdata); + if (ret < 0) goto err_conn_init; - } - - drm_connector_helper_add(&pdata->connector, - &ti_sn_bridge_connector_helper_funcs); - drm_connector_attach_encoder(&pdata->connector, bridge->encoder); /* * TODO: ideally finding host resource and dsi dev registration needs @@ -547,8 +739,19 @@ static int ti_sn_bridge_attach(struct drm_bridge *bridge, } pdata->dsi = dsi; + /* We never want the next bridge to *also* create a connector: */ + flags |= DRM_BRIDGE_ATTACH_NO_CONNECTOR; + + /* Attach the next bridge */ + ret = drm_bridge_attach(bridge->encoder, pdata->next_bridge, + &pdata->bridge, flags); + if (ret < 0) + goto err_dsi_detach; + return 0; +err_dsi_detach: + mipi_dsi_detach(dsi); err_dsi_attach: mipi_dsi_device_unregister(dsi); err_dsi_host: @@ -567,14 +770,8 @@ static void ti_sn_bridge_disable(struct drm_bridge *bridge) { struct ti_sn65dsi86 *pdata = bridge_to_ti_sn65dsi86(bridge); - drm_panel_disable(pdata->panel); - /* disable video stream */ regmap_update_bits(pdata->regmap, SN_ENH_FRAME_REG, VSTREAM_ENABLE, 0); - /* semi auto link training mode OFF */ - regmap_write(pdata->regmap, SN_ML_TX_MODE_REG, 0); - /* disable DP PLL */ - regmap_write(pdata->regmap, SN_PLL_ENABLE_REG, 0); } static void ti_sn_bridge_set_dsi_rate(struct ti_sn65dsi86 *pdata) @@ -633,9 +830,9 @@ static int ti_sn_bridge_calc_min_dp_rate_idx(struct ti_sn65dsi86 *pdata) return i; } -static void ti_sn_bridge_read_valid_rates(struct ti_sn65dsi86 *pdata, - bool rate_valid[]) +static unsigned int ti_sn_bridge_read_valid_rates(struct ti_sn65dsi86 *pdata) { + unsigned int valid_rates = 0; unsigned int rate_per_200khz; unsigned int rate_mhz; u8 dpcd_val; @@ -675,13 +872,13 @@ static void ti_sn_bridge_read_valid_rates(struct ti_sn65dsi86 *pdata, j < ARRAY_SIZE(ti_sn_bridge_dp_rate_lut); j++) { if (ti_sn_bridge_dp_rate_lut[j] == rate_mhz) - rate_valid[j] = true; + valid_rates |= BIT(j); } } for (i = 0; i < ARRAY_SIZE(ti_sn_bridge_dp_rate_lut); i++) { - if (rate_valid[i]) - return; + if (valid_rates & BIT(i)) + return valid_rates; } DRM_DEV_ERROR(pdata->dev, "No matching eDP rates in table; falling back\n"); @@ -703,15 +900,17 @@ static void ti_sn_bridge_read_valid_rates(struct ti_sn65dsi86 *pdata, (int)dpcd_val); fallthrough; case DP_LINK_BW_5_4: - rate_valid[7] = 1; + valid_rates |= BIT(7); fallthrough; case DP_LINK_BW_2_7: - rate_valid[4] = 1; + valid_rates |= BIT(4); fallthrough; case DP_LINK_BW_1_62: - rate_valid[1] = 1; + valid_rates |= BIT(1); break; } + + return valid_rates; } static void ti_sn_bridge_set_video_timings(struct ti_sn65dsi86 *pdata) @@ -829,8 +1028,8 @@ exit: static void ti_sn_bridge_enable(struct drm_bridge *bridge) { struct ti_sn65dsi86 *pdata = bridge_to_ti_sn65dsi86(bridge); - bool rate_valid[ARRAY_SIZE(ti_sn_bridge_dp_rate_lut)] = { }; const char *last_err_str = "No supported DP rate"; + unsigned int valid_rates; int dp_rate_idx; unsigned int val; int ret = -EINVAL; @@ -869,13 +1068,13 @@ static void ti_sn_bridge_enable(struct drm_bridge *bridge) regmap_update_bits(pdata->regmap, SN_SSC_CONFIG_REG, DP_NUM_LANES_MASK, val); - ti_sn_bridge_read_valid_rates(pdata, rate_valid); + valid_rates = ti_sn_bridge_read_valid_rates(pdata); /* Train until we run out of rates */ for (dp_rate_idx = ti_sn_bridge_calc_min_dp_rate_idx(pdata); dp_rate_idx < ARRAY_SIZE(ti_sn_bridge_dp_rate_lut); dp_rate_idx++) { - if (!rate_valid[dp_rate_idx]) + if (!(valid_rates & BIT(dp_rate_idx))) continue; ret = ti_sn_link_training(pdata, dp_rate_idx, &last_err_str); @@ -893,8 +1092,6 @@ static void ti_sn_bridge_enable(struct drm_bridge *bridge) /* enable video stream */ regmap_update_bits(pdata->regmap, SN_ENH_FRAME_REG, VSTREAM_ENABLE, VSTREAM_ENABLE); - - drm_panel_enable(pdata->panel); } static void ti_sn_bridge_pre_enable(struct drm_bridge *bridge) @@ -906,14 +1103,20 @@ static void ti_sn_bridge_pre_enable(struct drm_bridge *bridge) if (!pdata->refclk) ti_sn65dsi86_enable_comms(pdata); - drm_panel_prepare(pdata->panel); + /* td7: min 100 us after enable before DSI data */ + usleep_range(100, 110); } static void ti_sn_bridge_post_disable(struct drm_bridge *bridge) { struct ti_sn65dsi86 *pdata = bridge_to_ti_sn65dsi86(bridge); - drm_panel_unprepare(pdata->panel); + /* semi auto link training mode OFF */ + regmap_write(pdata->regmap, SN_ML_TX_MODE_REG, 0); + /* Num lanes to 0 as per power sequencing in data sheet */ + regmap_update_bits(pdata->regmap, SN_SSC_CONFIG_REG, DP_NUM_LANES_MASK, 0); + /* disable DP PLL */ + regmap_write(pdata->regmap, SN_PLL_ENABLE_REG, 0); if (!pdata->refclk) ti_sn65dsi86_disable_comms(pdata); @@ -930,137 +1133,135 @@ static const struct drm_bridge_funcs ti_sn_bridge_funcs = { .post_disable = ti_sn_bridge_post_disable, }; -static struct ti_sn65dsi86 *aux_to_ti_sn65dsi86(struct drm_dp_aux *aux) -{ - return container_of(aux, struct ti_sn65dsi86, aux); -} - -static ssize_t ti_sn_aux_transfer(struct drm_dp_aux *aux, - struct drm_dp_aux_msg *msg) +static void ti_sn_bridge_parse_lanes(struct ti_sn65dsi86 *pdata, + struct device_node *np) { - struct ti_sn65dsi86 *pdata = aux_to_ti_sn65dsi86(aux); - u32 request = msg->request & ~(DP_AUX_I2C_MOT | DP_AUX_I2C_WRITE_STATUS_UPDATE); - u32 request_val = AUX_CMD_REQ(msg->request); - u8 *buf = msg->buffer; - unsigned int len = msg->size; - unsigned int val; - int ret; - u8 addr_len[SN_AUX_LENGTH_REG + 1 - SN_AUX_ADDR_19_16_REG]; - - if (len > SN_AUX_MAX_PAYLOAD_BYTES) - return -EINVAL; - - pm_runtime_get_sync(pdata->dev); - mutex_lock(&pdata->comms_mutex); + u32 lane_assignments[SN_MAX_DP_LANES] = { 0, 1, 2, 3 }; + u32 lane_polarities[SN_MAX_DP_LANES] = { }; + struct device_node *endpoint; + u8 ln_assign = 0; + u8 ln_polrs = 0; + int dp_lanes; + int i; /* - * If someone tries to do a DDC over AUX transaction before pre_enable() - * on a device without a dedicated reference clock then we just can't - * do it. Fail right away. This prevents non-refclk users from reading - * the EDID before enabling the panel but such is life. + * Read config from the device tree about lane remapping and lane + * polarities. These are optional and we assume identity map and + * normal polarity if nothing is specified. It's OK to specify just + * data-lanes but not lane-polarities but not vice versa. + * + * Error checking is light (we just make sure we don't crash or + * buffer overrun) and we assume dts is well formed and specifying + * mappings that the hardware supports. */ - if (!pdata->comms_enabled) { - ret = -EIO; - goto exit; + endpoint = of_graph_get_endpoint_by_regs(np, 1, -1); + dp_lanes = of_property_count_u32_elems(endpoint, "data-lanes"); + if (dp_lanes > 0 && dp_lanes <= SN_MAX_DP_LANES) { + of_property_read_u32_array(endpoint, "data-lanes", + lane_assignments, dp_lanes); + of_property_read_u32_array(endpoint, "lane-polarities", + lane_polarities, dp_lanes); + } else { + dp_lanes = SN_MAX_DP_LANES; } + of_node_put(endpoint); - switch (request) { - case DP_AUX_NATIVE_WRITE: - case DP_AUX_I2C_WRITE: - case DP_AUX_NATIVE_READ: - case DP_AUX_I2C_READ: - regmap_write(pdata->regmap, SN_AUX_CMD_REG, request_val); - /* Assume it's good */ - msg->reply = 0; - break; - default: - ret = -EINVAL; - goto exit; + /* + * Convert into register format. Loop over all lanes even if + * data-lanes had fewer elements so that we nicely initialize + * the LN_ASSIGN register. + */ + for (i = SN_MAX_DP_LANES - 1; i >= 0; i--) { + ln_assign = ln_assign << LN_ASSIGN_WIDTH | lane_assignments[i]; + ln_polrs = ln_polrs << 1 | lane_polarities[i]; } - BUILD_BUG_ON(sizeof(addr_len) != sizeof(__be32)); - put_unaligned_be32((msg->address & SN_AUX_ADDR_MASK) << 8 | len, - addr_len); - regmap_bulk_write(pdata->regmap, SN_AUX_ADDR_19_16_REG, addr_len, - ARRAY_SIZE(addr_len)); + /* Stash in our struct for when we power on */ + pdata->dp_lanes = dp_lanes; + pdata->ln_assign = ln_assign; + pdata->ln_polrs = ln_polrs; +} - if (request == DP_AUX_NATIVE_WRITE || request == DP_AUX_I2C_WRITE) - regmap_bulk_write(pdata->regmap, SN_AUX_WDATA_REG(0), buf, len); +static int ti_sn_bridge_parse_dsi_host(struct ti_sn65dsi86 *pdata) +{ + struct device_node *np = pdata->dev->of_node; - /* Clear old status bits before start so we don't get confused */ - regmap_write(pdata->regmap, SN_AUX_CMD_STATUS_REG, - AUX_IRQ_STATUS_NAT_I2C_FAIL | - AUX_IRQ_STATUS_AUX_RPLY_TOUT | - AUX_IRQ_STATUS_AUX_SHORT); + pdata->host_node = of_graph_get_remote_node(np, 0, 0); - regmap_write(pdata->regmap, SN_AUX_CMD_REG, request_val | AUX_CMD_SEND); + if (!pdata->host_node) { + DRM_ERROR("remote dsi host node not found\n"); + return -ENODEV; + } - /* Zero delay loop because i2c transactions are slow already */ - ret = regmap_read_poll_timeout(pdata->regmap, SN_AUX_CMD_REG, val, - !(val & AUX_CMD_SEND), 0, 50 * 1000); - if (ret) - goto exit; + return 0; +} - ret = regmap_read(pdata->regmap, SN_AUX_CMD_STATUS_REG, &val); - if (ret) - goto exit; +static int ti_sn_bridge_probe(struct auxiliary_device *adev, + const struct auxiliary_device_id *id) +{ + struct ti_sn65dsi86 *pdata = dev_get_drvdata(adev->dev.parent); + struct device_node *np = pdata->dev->of_node; + struct drm_panel *panel; + int ret; - if (val & AUX_IRQ_STATUS_AUX_RPLY_TOUT) { - /* - * The hardware tried the message seven times per the DP spec - * but it hit a timeout. We ignore defers here because they're - * handled in hardware. - */ - ret = -ETIMEDOUT; - goto exit; - } + ret = drm_of_find_panel_or_bridge(np, 1, 0, &panel, NULL); + if (ret) + return dev_err_probe(&adev->dev, ret, + "could not find any panel node\n"); - if (val & AUX_IRQ_STATUS_AUX_SHORT) { - ret = regmap_read(pdata->regmap, SN_AUX_LENGTH_REG, &len); - if (ret) - goto exit; - } else if (val & AUX_IRQ_STATUS_NAT_I2C_FAIL) { - switch (request) { - case DP_AUX_I2C_WRITE: - case DP_AUX_I2C_READ: - msg->reply |= DP_AUX_I2C_REPLY_NACK; - break; - case DP_AUX_NATIVE_READ: - case DP_AUX_NATIVE_WRITE: - msg->reply |= DP_AUX_NATIVE_REPLY_NACK; - break; - } - len = 0; - goto exit; + pdata->next_bridge = devm_drm_panel_bridge_add(pdata->dev, panel); + if (IS_ERR(pdata->next_bridge)) { + DRM_ERROR("failed to create panel bridge\n"); + return PTR_ERR(pdata->next_bridge); } - if (request != DP_AUX_NATIVE_WRITE && request != DP_AUX_I2C_WRITE && len != 0) - ret = regmap_bulk_read(pdata->regmap, SN_AUX_RDATA_REG(0), buf, len); - -exit: - mutex_unlock(&pdata->comms_mutex); - pm_runtime_mark_last_busy(pdata->dev); - pm_runtime_put_autosuspend(pdata->dev); + ti_sn_bridge_parse_lanes(pdata, np); + ret = ti_sn_bridge_parse_dsi_host(pdata); if (ret) return ret; - return len; + + pdata->bridge.funcs = &ti_sn_bridge_funcs; + pdata->bridge.of_node = np; + + drm_bridge_add(&pdata->bridge); + + return 0; } -static int ti_sn_bridge_parse_dsi_host(struct ti_sn65dsi86 *pdata) +static void ti_sn_bridge_remove(struct auxiliary_device *adev) { - struct device_node *np = pdata->dev->of_node; + struct ti_sn65dsi86 *pdata = dev_get_drvdata(adev->dev.parent); - pdata->host_node = of_graph_get_remote_node(np, 0, 0); + if (!pdata) + return; - if (!pdata->host_node) { - DRM_ERROR("remote dsi host node not found\n"); - return -ENODEV; + if (pdata->dsi) { + mipi_dsi_detach(pdata->dsi); + mipi_dsi_device_unregister(pdata->dsi); } - return 0; + drm_bridge_remove(&pdata->bridge); + + of_node_put(pdata->host_node); } +static const struct auxiliary_device_id ti_sn_bridge_id_table[] = { + { .name = "ti_sn65dsi86.bridge", }, + {}, +}; + +static struct auxiliary_driver ti_sn_bridge_driver = { + .name = "bridge", + .probe = ti_sn_bridge_probe, + .remove = ti_sn_bridge_remove, + .id_table = ti_sn_bridge_id_table, +}; + +/* ----------------------------------------------------------------------------- + * GPIO Controller + */ + #if defined(CONFIG_OF_GPIO) static int tn_sn_bridge_of_xlate(struct gpio_chip *chip, @@ -1265,171 +1466,27 @@ static inline void ti_sn_gpio_unregister(void) {} #endif -static void ti_sn_bridge_parse_lanes(struct ti_sn65dsi86 *pdata, - struct device_node *np) -{ - u32 lane_assignments[SN_MAX_DP_LANES] = { 0, 1, 2, 3 }; - u32 lane_polarities[SN_MAX_DP_LANES] = { }; - struct device_node *endpoint; - u8 ln_assign = 0; - u8 ln_polrs = 0; - int dp_lanes; - int i; - - /* - * Read config from the device tree about lane remapping and lane - * polarities. These are optional and we assume identity map and - * normal polarity if nothing is specified. It's OK to specify just - * data-lanes but not lane-polarities but not vice versa. - * - * Error checking is light (we just make sure we don't crash or - * buffer overrun) and we assume dts is well formed and specifying - * mappings that the hardware supports. - */ - endpoint = of_graph_get_endpoint_by_regs(np, 1, -1); - dp_lanes = of_property_count_u32_elems(endpoint, "data-lanes"); - if (dp_lanes > 0 && dp_lanes <= SN_MAX_DP_LANES) { - of_property_read_u32_array(endpoint, "data-lanes", - lane_assignments, dp_lanes); - of_property_read_u32_array(endpoint, "lane-polarities", - lane_polarities, dp_lanes); - } else { - dp_lanes = SN_MAX_DP_LANES; - } - of_node_put(endpoint); - - /* - * Convert into register format. Loop over all lanes even if - * data-lanes had fewer elements so that we nicely initialize - * the LN_ASSIGN register. - */ - for (i = SN_MAX_DP_LANES - 1; i >= 0; i--) { - ln_assign = ln_assign << LN_ASSIGN_WIDTH | lane_assignments[i]; - ln_polrs = ln_polrs << 1 | lane_polarities[i]; - } - - /* Stash in our struct for when we power on */ - pdata->dp_lanes = dp_lanes; - pdata->ln_assign = ln_assign; - pdata->ln_polrs = ln_polrs; -} - -static int ti_sn_bridge_probe(struct auxiliary_device *adev, - const struct auxiliary_device_id *id) -{ - struct ti_sn65dsi86 *pdata = dev_get_drvdata(adev->dev.parent); - struct device_node *np = pdata->dev->of_node; - int ret; - - ret = drm_of_find_panel_or_bridge(np, 1, 0, &pdata->panel, NULL); - if (ret) { - DRM_ERROR("could not find any panel node\n"); - return ret; - } - - ti_sn_bridge_parse_lanes(pdata, np); - - ret = ti_sn_bridge_parse_dsi_host(pdata); - if (ret) - return ret; - - pdata->aux.name = "ti-sn65dsi86-aux"; - pdata->aux.dev = pdata->dev; - pdata->aux.transfer = ti_sn_aux_transfer; - drm_dp_aux_init(&pdata->aux); - - pdata->bridge.funcs = &ti_sn_bridge_funcs; - pdata->bridge.of_node = np; - - drm_bridge_add(&pdata->bridge); - - return 0; -} - -static void ti_sn_bridge_remove(struct auxiliary_device *adev) -{ - struct ti_sn65dsi86 *pdata = dev_get_drvdata(adev->dev.parent); - - if (!pdata) - return; - - if (pdata->dsi) { - mipi_dsi_detach(pdata->dsi); - mipi_dsi_device_unregister(pdata->dsi); - } - - kfree(pdata->edid); - - drm_bridge_remove(&pdata->bridge); - - of_node_put(pdata->host_node); -} - -static const struct auxiliary_device_id ti_sn_bridge_id_table[] = { - { .name = "ti_sn65dsi86.bridge", }, - {}, -}; - -static struct auxiliary_driver ti_sn_bridge_driver = { - .name = "bridge", - .probe = ti_sn_bridge_probe, - .remove = ti_sn_bridge_remove, - .id_table = ti_sn_bridge_id_table, -}; +/* ----------------------------------------------------------------------------- + * Probe & Remove + */ static void ti_sn65dsi86_runtime_disable(void *data) { pm_runtime_disable(data); } -static void ti_sn65dsi86_uninit_aux(void *data) -{ - auxiliary_device_uninit(data); -} - -static void ti_sn65dsi86_delete_aux(void *data) -{ - auxiliary_device_delete(data); -} - -/* - * AUX bus docs say that a non-NULL release is mandatory, but it makes no - * sense for the model used here where all of the aux devices are allocated - * in the single shared structure. We'll use this noop as a workaround. - */ -static void ti_sn65dsi86_noop(struct device *dev) {} - -static int ti_sn65dsi86_add_aux_device(struct ti_sn65dsi86 *pdata, - struct auxiliary_device *aux, - const char *name) +static int ti_sn65dsi86_parse_regulators(struct ti_sn65dsi86 *pdata) { - struct device *dev = pdata->dev; - int ret; - - /* - * NOTE: It would be nice to set the "of_node" of our children to be - * the same "of_node"" that the top-level component has. That doesn't - * work, though, since pinctrl will try (and fail) to reserve the - * pins again. Until that gets sorted out the children will just need - * to look at the of_node of the main device. - */ - - aux->name = name; - aux->dev.parent = dev; - aux->dev.release = ti_sn65dsi86_noop; - ret = auxiliary_device_init(aux); - if (ret) - return ret; - ret = devm_add_action_or_reset(dev, ti_sn65dsi86_uninit_aux, aux); - if (ret) - return ret; + unsigned int i; + const char * const ti_sn_bridge_supply_names[] = { + "vcca", "vcc", "vccio", "vpll", + }; - ret = auxiliary_device_add(aux); - if (ret) - return ret; - ret = devm_add_action_or_reset(dev, ti_sn65dsi86_delete_aux, aux); + for (i = 0; i < SN_REGULATOR_SUPPLY_NUM; i++) + pdata->supplies[i].supply = ti_sn_bridge_supply_names[i]; - return ret; + return devm_regulator_bulk_get(pdata->dev, SN_REGULATOR_SUPPLY_NUM, + pdata->supplies); } static int ti_sn65dsi86_probe(struct i2c_client *client, @@ -1454,27 +1511,24 @@ static int ti_sn65dsi86_probe(struct i2c_client *client, pdata->regmap = devm_regmap_init_i2c(client, &ti_sn65dsi86_regmap_config); - if (IS_ERR(pdata->regmap)) { - DRM_ERROR("regmap i2c init failed\n"); - return PTR_ERR(pdata->regmap); - } + if (IS_ERR(pdata->regmap)) + return dev_err_probe(dev, PTR_ERR(pdata->regmap), + "regmap i2c init failed\n"); - pdata->enable_gpio = devm_gpiod_get(dev, "enable", GPIOD_OUT_LOW); - if (IS_ERR(pdata->enable_gpio)) { - DRM_ERROR("failed to get enable gpio from DT\n"); - ret = PTR_ERR(pdata->enable_gpio); - return ret; - } + pdata->enable_gpio = devm_gpiod_get_optional(dev, "enable", + GPIOD_OUT_LOW); + if (IS_ERR(pdata->enable_gpio)) + return dev_err_probe(dev, PTR_ERR(pdata->enable_gpio), + "failed to get enable gpio from DT\n"); ret = ti_sn65dsi86_parse_regulators(pdata); - if (ret) { - DRM_ERROR("failed to parse regulators\n"); - return ret; - } + if (ret) + return dev_err_probe(dev, ret, "failed to parse regulators\n"); pdata->refclk = devm_clk_get_optional(dev, "refclk"); if (IS_ERR(pdata->refclk)) - return PTR_ERR(pdata->refclk); + return dev_err_probe(dev, PTR_ERR(pdata->refclk), + "failed to get reference clock\n"); pm_runtime_enable(dev); ret = devm_add_action_or_reset(dev, ti_sn65dsi86_runtime_disable, dev); @@ -1490,10 +1544,11 @@ static int ti_sn65dsi86_probe(struct i2c_client *client, * motiviation here is to solve the chicken-and-egg problem of probe * ordering. The bridge wants the panel to be there when it probes. * The panel wants its HPD GPIO (provided by sn65dsi86 on some boards) - * when it probes. There will soon be other devices (DDC I2C bus, PWM) - * that have the same problem. Having sub-devices allows the some sub - * devices to finish probing even if others return -EPROBE_DEFER and - * gets us around the problems. + * when it probes. The panel and maybe backlight might want the DDC + * bus. Soon the PWM provided by the bridge chip will have the same + * problem. Having sub-devices allows the some sub devices to finish + * probing even if others return -EPROBE_DEFER and gets us around the + * problems. */ if (IS_ENABLED(CONFIG_OF_GPIO)) { @@ -1502,7 +1557,13 @@ static int ti_sn65dsi86_probe(struct i2c_client *client, return ret; } - return ti_sn65dsi86_add_aux_device(pdata, &pdata->bridge_aux, "bridge"); + /* + * NOTE: At the end of the AUX channel probe we'll add the aux device + * for the bridge. This is because the bridge can't be used until the + * AUX channel is there and this is a very simple solution to the + * dependency problem. + */ + return ti_sn65dsi86_add_aux_device(pdata, &pdata->aux_aux, "aux"); } static struct i2c_device_id ti_sn65dsi86_id[] = { @@ -1539,12 +1600,18 @@ static int __init ti_sn65dsi86_init(void) if (ret) goto err_main_was_registered; - ret = auxiliary_driver_register(&ti_sn_bridge_driver); + ret = auxiliary_driver_register(&ti_sn_aux_driver); if (ret) goto err_gpio_was_registered; + ret = auxiliary_driver_register(&ti_sn_bridge_driver); + if (ret) + goto err_aux_was_registered; + return 0; +err_aux_was_registered: + auxiliary_driver_unregister(&ti_sn_aux_driver); err_gpio_was_registered: ti_sn_gpio_unregister(); err_main_was_registered: @@ -1557,6 +1624,7 @@ module_init(ti_sn65dsi86_init); static void __exit ti_sn65dsi86_exit(void) { auxiliary_driver_unregister(&ti_sn_bridge_driver); + auxiliary_driver_unregister(&ti_sn_aux_driver); ti_sn_gpio_unregister(); i2c_del_driver(&ti_sn65dsi86_driver); } diff --git a/drivers/gpu/drm/drm_aperture.c b/drivers/gpu/drm/drm_aperture.c index 9335d9d6cf9a..74bd4a76b253 100644 --- a/drivers/gpu/drm/drm_aperture.c +++ b/drivers/gpu/drm/drm_aperture.c @@ -33,6 +33,10 @@ * * .. code-block:: c * + * static const struct drm_driver example_driver = { + * ... + * }; + * * static int remove_conflicting_framebuffers(struct pci_dev *pdev) * { * bool primary = false; @@ -46,7 +50,7 @@ * #endif * * return drm_aperture_remove_conflicting_framebuffers(base, size, primary, - * "example driver"); + * &example_driver); * } * * static int probe(struct pci_dev *pdev) @@ -74,7 +78,7 @@ * * Drivers that are susceptible to being removed by other drivers, such as * generic EFI or VESA drivers, have to register themselves as owners of their - * given framebuffer memory. Ownership of the framebuffer memory is achived + * given framebuffer memory. Ownership of the framebuffer memory is achieved * by calling devm_aperture_acquire_from_firmware(). On success, the driver * is the owner of the framebuffer range. The function fails if the * framebuffer is already by another driver. See below for an example. @@ -274,7 +278,7 @@ static void drm_aperture_detach_drivers(resource_size_t base, resource_size_t si * @base: the aperture's base address in physical memory * @size: aperture size in bytes * @primary: also kick vga16fb if present - * @name: requesting driver name + * @req_driver: requesting DRM driver * * This function removes graphics device drivers which use memory range described by * @base and @size. @@ -283,7 +287,7 @@ static void drm_aperture_detach_drivers(resource_size_t base, resource_size_t si * 0 on success, or a negative errno code otherwise */ int drm_aperture_remove_conflicting_framebuffers(resource_size_t base, resource_size_t size, - bool primary, const char *name) + bool primary, const struct drm_driver *req_driver) { #if IS_REACHABLE(CONFIG_FB) struct apertures_struct *a; @@ -296,7 +300,7 @@ int drm_aperture_remove_conflicting_framebuffers(resource_size_t base, resource_ a->ranges[0].base = base; a->ranges[0].size = size; - ret = remove_conflicting_framebuffers(a, name, primary); + ret = remove_conflicting_framebuffers(a, req_driver->name, primary); kfree(a); if (ret) @@ -312,7 +316,7 @@ EXPORT_SYMBOL(drm_aperture_remove_conflicting_framebuffers); /** * drm_aperture_remove_conflicting_pci_framebuffers - remove existing framebuffers for PCI devices * @pdev: PCI device - * @name: requesting driver name + * @req_driver: requesting DRM driver * * This function removes graphics device drivers using memory range configured * for any of @pdev's memory bars. The function assumes that PCI device with @@ -321,7 +325,8 @@ EXPORT_SYMBOL(drm_aperture_remove_conflicting_framebuffers); * Returns: * 0 on success, or a negative errno code otherwise */ -int drm_aperture_remove_conflicting_pci_framebuffers(struct pci_dev *pdev, const char *name) +int drm_aperture_remove_conflicting_pci_framebuffers(struct pci_dev *pdev, + const struct drm_driver *req_driver) { resource_size_t base, size; int bar, ret = 0; @@ -339,7 +344,7 @@ int drm_aperture_remove_conflicting_pci_framebuffers(struct pci_dev *pdev, const * otherwise the vga fbdev driver falls over. */ #if IS_REACHABLE(CONFIG_FB) - ret = remove_conflicting_pci_framebuffers(pdev, name); + ret = remove_conflicting_pci_framebuffers(pdev, req_driver->name); #endif if (ret == 0) ret = vga_remove_vgacon(pdev); diff --git a/drivers/gpu/drm/drm_atomic.c b/drivers/gpu/drm/drm_atomic.c index d820423fac32..ff1416cd609a 100644 --- a/drivers/gpu/drm/drm_atomic.c +++ b/drivers/gpu/drm/drm_atomic.c @@ -660,7 +660,7 @@ static int drm_atomic_plane_check(const struct drm_plane_state *old_plane_state, return -ENOSPC; } - clips = drm_plane_get_damage_clips(new_plane_state); + clips = __drm_plane_get_damage_clips(new_plane_state); num_clips = drm_plane_get_damage_clips_count(new_plane_state); /* Make sure damage clips are valid and inside the fb. */ @@ -723,7 +723,7 @@ static void drm_atomic_plane_print_state(struct drm_printer *p, * clocks, scaler units, bandwidth and fifo limits shared among a group of * planes or CRTCs, and so on) it makes sense to model these as independent * objects. Drivers then need to do similar state tracking and commit ordering for - * such private (since not exposed to userpace) objects as the atomic core and + * such private (since not exposed to userspace) objects as the atomic core and * helpers already provide for connectors, planes and CRTCs. * * To make this easier on drivers the atomic core provides some support to track diff --git a/drivers/gpu/drm/drm_atomic_helper.c b/drivers/gpu/drm/drm_atomic_helper.c index bc3487964fb5..2c0c6ec92820 100644 --- a/drivers/gpu/drm/drm_atomic_helper.c +++ b/drivers/gpu/drm/drm_atomic_helper.c @@ -35,6 +35,7 @@ #include <drm/drm_damage_helper.h> #include <drm/drm_device.h> #include <drm/drm_drv.h> +#include <drm/drm_gem_atomic_helper.h> #include <drm/drm_plane_helper.h> #include <drm/drm_print.h> #include <drm/drm_self_refresh_helper.h> @@ -633,7 +634,7 @@ drm_atomic_helper_check_modeset(struct drm_device *dev, * connectors and a NULL mode. * * The other way around is true as well. enable != 0 - * iff connectors are attached and a mode is set. + * implies that connectors are attached and a mode is set. */ new_crtc_state->mode_changed = true; new_crtc_state->connectors_changed = true; @@ -1685,7 +1686,7 @@ static void commit_work(struct work_struct *work) } /** - * drm_atomic_helper_async_check - check if state can be commited asynchronously + * drm_atomic_helper_async_check - check if state can be committed asynchronously * @dev: DRM device * @state: the driver state object * @@ -1694,7 +1695,7 @@ static void commit_work(struct work_struct *work) * but just do in-place changes on the current state. * * It will return 0 if the commit can happen in an asynchronous fashion or error - * if not. Note that error just mean it can't be commited asynchronously, if it + * if not. Note that error just mean it can't be committed asynchronously, if it * fails the commit should be treated like a normal synchronous commit. */ int drm_atomic_helper_async_check(struct drm_device *dev, @@ -2405,6 +2406,15 @@ int drm_atomic_helper_prepare_planes(struct drm_device *dev, ret = funcs->prepare_fb(plane, new_plane_state); if (ret) goto fail; + } else { + WARN_ON_ONCE(funcs->cleanup_fb); + + if (!drm_core_check_feature(dev, DRIVER_GEM)) + continue; + + ret = drm_gem_plane_helper_prepare_fb(plane, new_plane_state); + if (ret) + goto fail; } } @@ -2573,7 +2583,7 @@ EXPORT_SYMBOL(drm_atomic_helper_commit_planes); * * This function can only be savely used when planes are not allowed to move * between different CRTCs because this function doesn't handle inter-CRTC - * depencies. Callers need to ensure that either no such depencies exist, + * dependencies. Callers need to ensure that either no such dependencies exist, * resolve them through ordering of commit calls or through some other means. */ void @@ -2710,7 +2720,7 @@ EXPORT_SYMBOL(drm_atomic_helper_cleanup_planes); /** * drm_atomic_helper_swap_state - store atomic state into current sw state * @state: atomic state - * @stall: stall for preceeding commits + * @stall: stall for preceding commits * * This function stores the atomic state into the current state pointers in all * driver objects. It should be called after all failing steps have been done diff --git a/drivers/gpu/drm/drm_atomic_uapi.c b/drivers/gpu/drm/drm_atomic_uapi.c index 7e48d40600ff..909f31833181 100644 --- a/drivers/gpu/drm/drm_atomic_uapi.c +++ b/drivers/gpu/drm/drm_atomic_uapi.c @@ -48,7 +48,7 @@ * in all its forms: The monster ATOMIC IOCTL itself, code for GET_PROPERTY and * SET_PROPERTY IOCTLs. Plus interface functions for compatibility helpers and * drivers which have special needs to construct their own atomic updates, e.g. - * for load detect or similiar. + * for load detect or similar. */ /** @@ -753,7 +753,7 @@ static int drm_atomic_connector_set_property(struct drm_connector *connector, * restore the state it wants on VT switch. So if the userspace * tries to change the link_status from GOOD to BAD, driver * silently rejects it and returns a 0. This prevents userspace - * from accidently breaking the display when it restores the + * from accidentally breaking the display when it restores the * state. */ if (state->link_status != DRM_LINK_STATUS_GOOD) @@ -1064,7 +1064,7 @@ int drm_atomic_set_property(struct drm_atomic_state *state, * DOC: explicit fencing properties * * Explicit fencing allows userspace to control the buffer synchronization - * between devices. A Fence or a group of fences are transfered to/from + * between devices. A Fence or a group of fences are transferred to/from * userspace using Sync File fds and there are two DRM properties for that. * IN_FENCE_FD on each DRM Plane to send fences to the kernel and * OUT_FENCE_PTR on each DRM CRTC to receive fences from the kernel. diff --git a/drivers/gpu/drm/drm_auth.c b/drivers/gpu/drm/drm_auth.c index b59b26a71ad5..60a6b21474b1 100644 --- a/drivers/gpu/drm/drm_auth.c +++ b/drivers/gpu/drm/drm_auth.c @@ -52,7 +52,7 @@ * * In addition only one &drm_master can be the current master for a &drm_device. * It can be switched through the DROP_MASTER and SET_MASTER IOCTL, or - * implicitly through closing/openeing the primary device node. See also + * implicitly through closing/opening the primary device node. See also * drm_is_current_master(). * * Clients can authenticate against the current master (if it matches their own) @@ -61,6 +61,36 @@ * trusted clients. */ +static bool drm_is_current_master_locked(struct drm_file *fpriv) +{ + lockdep_assert_once(lockdep_is_held(&fpriv->master_lookup_lock) || + lockdep_is_held(&fpriv->minor->dev->master_mutex)); + + return fpriv->is_master && drm_lease_owner(fpriv->master) == fpriv->minor->dev->master; +} + +/** + * drm_is_current_master - checks whether @priv is the current master + * @fpriv: DRM file private + * + * Checks whether @fpriv is current master on its device. This decides whether a + * client is allowed to run DRM_MASTER IOCTLs. + * + * Most of the modern IOCTL which require DRM_MASTER are for kernel modesetting + * - the current master is assumed to own the non-shareable display hardware. + */ +bool drm_is_current_master(struct drm_file *fpriv) +{ + bool ret; + + spin_lock(&fpriv->master_lookup_lock); + ret = drm_is_current_master_locked(fpriv); + spin_unlock(&fpriv->master_lookup_lock); + + return ret; +} +EXPORT_SYMBOL(drm_is_current_master); + int drm_getmagic(struct drm_device *dev, void *data, struct drm_file *file_priv) { struct drm_auth *auth = data; @@ -135,16 +165,18 @@ static void drm_set_master(struct drm_device *dev, struct drm_file *fpriv, static int drm_new_set_master(struct drm_device *dev, struct drm_file *fpriv) { struct drm_master *old_master; + struct drm_master *new_master; lockdep_assert_held_once(&dev->master_mutex); WARN_ON(fpriv->is_master); old_master = fpriv->master; - fpriv->master = drm_master_create(dev); - if (!fpriv->master) { - fpriv->master = old_master; + new_master = drm_master_create(dev); + if (!new_master) return -ENOMEM; - } + spin_lock(&fpriv->master_lookup_lock); + fpriv->master = new_master; + spin_unlock(&fpriv->master_lookup_lock); fpriv->is_master = 1; fpriv->authenticated = 1; @@ -223,7 +255,7 @@ int drm_setmaster_ioctl(struct drm_device *dev, void *data, if (ret) goto out_unlock; - if (drm_is_current_master(file_priv)) + if (drm_is_current_master_locked(file_priv)) goto out_unlock; if (dev->master) { @@ -272,7 +304,7 @@ int drm_dropmaster_ioctl(struct drm_device *dev, void *data, if (ret) goto out_unlock; - if (!drm_is_current_master(file_priv)) { + if (!drm_is_current_master_locked(file_priv)) { ret = -EINVAL; goto out_unlock; } @@ -303,10 +335,13 @@ int drm_master_open(struct drm_file *file_priv) * any master object for render clients */ mutex_lock(&dev->master_mutex); - if (!dev->master) + if (!dev->master) { ret = drm_new_set_master(dev, file_priv); - else + } else { + spin_lock(&file_priv->master_lookup_lock); file_priv->master = drm_master_get(dev->master); + spin_unlock(&file_priv->master_lookup_lock); + } mutex_unlock(&dev->master_mutex); return ret; @@ -322,7 +357,7 @@ void drm_master_release(struct drm_file *file_priv) if (file_priv->magic) idr_remove(&file_priv->master->magic_map, file_priv->magic); - if (!drm_is_current_master(file_priv)) + if (!drm_is_current_master_locked(file_priv)) goto out; drm_legacy_lock_master_cleanup(dev, master); @@ -344,22 +379,6 @@ out: } /** - * drm_is_current_master - checks whether @priv is the current master - * @fpriv: DRM file private - * - * Checks whether @fpriv is current master on its device. This decides whether a - * client is allowed to run DRM_MASTER IOCTLs. - * - * Most of the modern IOCTL which require DRM_MASTER are for kernel modesetting - * - the current master is assumed to own the non-shareable display hardware. - */ -bool drm_is_current_master(struct drm_file *fpriv) -{ - return fpriv->is_master && drm_lease_owner(fpriv->master) == fpriv->minor->dev->master; -} -EXPORT_SYMBOL(drm_is_current_master); - -/** * drm_master_get - reference a master pointer * @master: &struct drm_master * @@ -372,6 +391,31 @@ struct drm_master *drm_master_get(struct drm_master *master) } EXPORT_SYMBOL(drm_master_get); +/** + * drm_file_get_master - reference &drm_file.master of @file_priv + * @file_priv: DRM file private + * + * Increments the reference count of @file_priv's &drm_file.master and returns + * the &drm_file.master. If @file_priv has no &drm_file.master, returns NULL. + * + * Master pointers returned from this function should be unreferenced using + * drm_master_put(). + */ +struct drm_master *drm_file_get_master(struct drm_file *file_priv) +{ + struct drm_master *master = NULL; + + spin_lock(&file_priv->master_lookup_lock); + if (!file_priv->master) + goto unlock; + master = drm_master_get(file_priv->master); + +unlock: + spin_unlock(&file_priv->master_lookup_lock); + return master; +} +EXPORT_SYMBOL(drm_file_get_master); + static void drm_master_destroy(struct kref *kref) { struct drm_master *master = container_of(kref, struct drm_master, refcount); diff --git a/drivers/gpu/drm/drm_bridge.c b/drivers/gpu/drm/drm_bridge.c index 044acd07c153..a8ed66751c2d 100644 --- a/drivers/gpu/drm/drm_bridge.c +++ b/drivers/gpu/drm/drm_bridge.c @@ -28,6 +28,7 @@ #include <drm/drm_atomic_state_helper.h> #include <drm/drm_bridge.h> #include <drm/drm_encoder.h> +#include <drm/drm_print.h> #include "drm_crtc_internal.h" @@ -225,6 +226,15 @@ err_reset_bridge: bridge->dev = NULL; bridge->encoder = NULL; list_del(&bridge->chain_node); + +#ifdef CONFIG_OF + DRM_ERROR("failed to attach bridge %pOF to encoder %s: %d\n", + bridge->of_node, encoder->name, ret); +#else + DRM_ERROR("failed to attach bridge to encoder %s: %d\n", + encoder->name, ret); +#endif + return ret; } EXPORT_SYMBOL(drm_bridge_attach); @@ -972,7 +982,7 @@ drm_atomic_bridge_propagate_bus_flags(struct drm_bridge *bridge, bridge_state->output_bus_cfg.flags = output_flags; /* - * Propage the output flags to the input end of the bridge. Again, it's + * Propagate the output flags to the input end of the bridge. Again, it's * not necessarily what all bridges want, but that's what most of them * do, and by doing that by default we avoid forcing drivers to * duplicate the "dummy propagation" logic. diff --git a/drivers/gpu/drm/drm_bufs.c b/drivers/gpu/drm/drm_bufs.c index ae8e4d76209c..fcca21e8efac 100644 --- a/drivers/gpu/drm/drm_bufs.c +++ b/drivers/gpu/drm/drm_bufs.c @@ -1502,7 +1502,7 @@ int drm_legacy_freebufs(struct drm_device *dev, void *data, * * Maps the AGP, SG or PCI buffer region with vm_mmap(), and copies information * about each buffer into user space. For PCI buffers, it calls vm_mmap() with - * offset equal to 0, which drm_mmap() interpretes as PCI buffers and calls + * offset equal to 0, which drm_mmap() interprets as PCI buffers and calls * drm_mmap_dma(). */ int __drm_legacy_mapbufs(struct drm_device *dev, void *data, int *p, diff --git a/drivers/gpu/drm/drm_cache.c b/drivers/gpu/drm/drm_cache.c index 546599f19a93..30cc59fe6ef7 100644 --- a/drivers/gpu/drm/drm_cache.c +++ b/drivers/gpu/drm/drm_cache.c @@ -170,7 +170,7 @@ drm_clflush_virt_range(void *addr, unsigned long length) for (; addr < end; addr += size) clflushopt(addr); clflushopt(end - 1); /* force serialisation */ - mb(); /*Ensure that evry data cache line entry is flushed*/ + mb(); /*Ensure that every data cache line entry is flushed*/ return; } diff --git a/drivers/gpu/drm/drm_connector.c b/drivers/gpu/drm/drm_connector.c index da39e7ff6965..2ba257b1ae20 100644 --- a/drivers/gpu/drm/drm_connector.c +++ b/drivers/gpu/drm/drm_connector.c @@ -2414,6 +2414,7 @@ int drm_mode_getconnector(struct drm_device *dev, void *data, struct drm_mode_modeinfo u_mode; struct drm_mode_modeinfo __user *mode_ptr; uint32_t __user *encoder_ptr; + bool is_current_master; if (!drm_core_check_feature(dev, DRIVER_MODESET)) return -EOPNOTSUPP; @@ -2444,9 +2445,11 @@ int drm_mode_getconnector(struct drm_device *dev, void *data, out_resp->connector_type = connector->connector_type; out_resp->connector_type_id = connector->connector_type_id; + is_current_master = drm_is_current_master(file_priv); + mutex_lock(&dev->mode_config.mutex); if (out_resp->count_modes == 0) { - if (drm_is_current_master(file_priv)) + if (is_current_master) connector->funcs->fill_modes(connector, dev->mode_config.max_width, dev->mode_config.max_height); diff --git a/drivers/gpu/drm/drm_crtc_internal.h b/drivers/gpu/drm/drm_crtc_internal.h index 1ca51addb589..edb772947cb4 100644 --- a/drivers/gpu/drm/drm_crtc_internal.h +++ b/drivers/gpu/drm/drm_crtc_internal.h @@ -262,6 +262,8 @@ int drm_plane_register_all(struct drm_device *dev); void drm_plane_unregister_all(struct drm_device *dev); int drm_plane_check_pixel_format(struct drm_plane *plane, u32 format, u64 modifier); +struct drm_mode_rect * +__drm_plane_get_damage_clips(const struct drm_plane_state *state); /* drm_bridge.c */ void drm_bridge_detach(struct drm_bridge *bridge); diff --git a/drivers/gpu/drm/drm_damage_helper.c b/drivers/gpu/drm/drm_damage_helper.c index 3a4126dc2520..8eeff0c7bdd4 100644 --- a/drivers/gpu/drm/drm_damage_helper.c +++ b/drivers/gpu/drm/drm_damage_helper.c @@ -34,44 +34,6 @@ #include <drm/drm_damage_helper.h> #include <drm/drm_device.h> -/** - * DOC: overview - * - * FB_DAMAGE_CLIPS is an optional plane property which provides a means to - * specify a list of damage rectangles on a plane in framebuffer coordinates of - * the framebuffer attached to the plane. In current context damage is the area - * of plane framebuffer that has changed since last plane update (also called - * page-flip), irrespective of whether currently attached framebuffer is same as - * framebuffer attached during last plane update or not. - * - * FB_DAMAGE_CLIPS is a hint to kernel which could be helpful for some drivers - * to optimize internally especially for virtual devices where each framebuffer - * change needs to be transmitted over network, usb, etc. - * - * Since FB_DAMAGE_CLIPS is a hint so it is an optional property. User-space can - * ignore damage clips property and in that case driver will do a full plane - * update. In case damage clips are provided then it is guaranteed that the area - * inside damage clips will be updated to plane. For efficiency driver can do - * full update or can update more than specified in damage clips. Since driver - * is free to read more, user-space must always render the entire visible - * framebuffer. Otherwise there can be corruptions. Also, if a user-space - * provides damage clips which doesn't encompass the actual damage to - * framebuffer (since last plane update) can result in incorrect rendering. - * - * FB_DAMAGE_CLIPS is a blob property with the layout of blob data is simply an - * array of &drm_mode_rect. Unlike plane &drm_plane_state.src coordinates, - * damage clips are not in 16.16 fixed point. Similar to plane src in - * framebuffer, damage clips cannot be negative. In damage clip, x1/y1 are - * inclusive and x2/y2 are exclusive. While kernel does not error for overlapped - * damage clips, it is strongly discouraged. - * - * Drivers that are interested in damage interface for plane should enable - * FB_DAMAGE_CLIPS property by calling drm_plane_enable_fb_damage_clips(). - * Drivers implementing damage can use drm_atomic_helper_damage_iter_init() and - * drm_atomic_helper_damage_iter_next() helper iterator function to get damage - * rectangles clipped to &drm_plane_state.src. - */ - static void convert_clip_rect_to_rect(const struct drm_clip_rect *src, struct drm_mode_rect *dest, uint32_t num_clips, uint32_t src_inc) @@ -88,22 +50,6 @@ static void convert_clip_rect_to_rect(const struct drm_clip_rect *src, } /** - * drm_plane_enable_fb_damage_clips - Enables plane fb damage clips property. - * @plane: Plane on which to enable damage clips property. - * - * This function lets driver to enable the damage clips property on a plane. - */ -void drm_plane_enable_fb_damage_clips(struct drm_plane *plane) -{ - struct drm_device *dev = plane->dev; - struct drm_mode_config *config = &dev->mode_config; - - drm_object_attach_property(&plane->base, config->prop_fb_damage_clips, - 0); -} -EXPORT_SYMBOL(drm_plane_enable_fb_damage_clips); - -/** * drm_atomic_helper_check_plane_damage - Verify plane damage on atomic_check. * @state: The driver state object. * @plane_state: Plane state for which to verify damage. @@ -170,7 +116,7 @@ int drm_atomic_helper_dirtyfb(struct drm_framebuffer *fb, int ret = 0; /* - * When called from ioctl, we are interruptable, but not when called + * When called from ioctl, we are interruptible, but not when called * internally (ie. defio worker) */ drm_modeset_acquire_init(&ctx, @@ -282,7 +228,7 @@ drm_atomic_helper_damage_iter_init(struct drm_atomic_helper_damage_iter *iter, if (!state || !state->crtc || !state->fb || !state->visible) return; - iter->clips = drm_helper_get_plane_damage_clips(state); + iter->clips = (struct drm_rect *)drm_plane_get_damage_clips(state); iter->num_clips = drm_plane_get_damage_clips_count(state); /* Round down for x1/y1 and round up for x2/y2 to catch all pixels */ diff --git a/drivers/gpu/drm/drm_debugfs.c b/drivers/gpu/drm/drm_debugfs.c index 3d7182001004..b0a826489488 100644 --- a/drivers/gpu/drm/drm_debugfs.c +++ b/drivers/gpu/drm/drm_debugfs.c @@ -91,6 +91,7 @@ static int drm_clients_info(struct seq_file *m, void *data) mutex_lock(&dev->filelist_mutex); list_for_each_entry_reverse(priv, &dev->filelist, lhead) { struct task_struct *task; + bool is_current_master = drm_is_current_master(priv); rcu_read_lock(); /* locks pid_task()->comm */ task = pid_task(priv->pid, PIDTYPE_PID); @@ -99,7 +100,7 @@ static int drm_clients_info(struct seq_file *m, void *data) task ? task->comm : "<unknown>", pid_vnr(priv->pid), priv->minor->index, - drm_is_current_master(priv) ? 'y' : 'n', + is_current_master ? 'y' : 'n', priv->authenticated ? 'y' : 'n', from_kuid_munged(seq_user_ns(m), uid), priv->magic); diff --git a/drivers/gpu/drm/drm_debugfs_crc.c b/drivers/gpu/drm/drm_debugfs_crc.c index 3dd70d813f69..bbc3bc4ba844 100644 --- a/drivers/gpu/drm/drm_debugfs_crc.c +++ b/drivers/gpu/drm/drm_debugfs_crc.c @@ -46,10 +46,10 @@ * it reached a given hardware component (a CRC sampling "source"). * * Userspace can control generation of CRCs in a given CRTC by writing to the - * file dri/0/crtc-N/crc/control in debugfs, with N being the index of the CRTC. - * Accepted values are source names (which are driver-specific) and the "auto" - * keyword, which will let the driver select a default source of frame CRCs - * for this CRTC. + * file dri/0/crtc-N/crc/control in debugfs, with N being the :ref:`index of + * the CRTC<crtc_index>`. Accepted values are source names (which are + * driver-specific) and the "auto" keyword, which will let the driver select a + * default source of frame CRCs for this CRTC. * * Once frame CRC generation is enabled, userspace can capture them by reading * the dri/0/crtc-N/crc/data file. Each line in that file contains the frame diff --git a/drivers/gpu/drm/drm_dp_aux_bus.c b/drivers/gpu/drm/drm_dp_aux_bus.c new file mode 100644 index 000000000000..298ea7a49591 --- /dev/null +++ b/drivers/gpu/drm/drm_dp_aux_bus.c @@ -0,0 +1,323 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright 2021 Google Inc. + * + * The DP AUX bus is used for devices that are connected over a DisplayPort + * AUX bus. The devices on the far side of the bus are referred to as + * endpoints in this code. + * + * Commonly there is only one device connected to the DP AUX bus: a panel. + * Though historically panels (even DP panels) have been modeled as simple + * platform devices, putting them under the DP AUX bus allows the panel driver + * to perform transactions on that bus. + */ + +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/of_device.h> +#include <linux/pm_domain.h> +#include <linux/pm_runtime.h> + +#include <drm/drm_dp_aux_bus.h> +#include <drm/drm_dp_helper.h> + +/** + * dp_aux_ep_match() - The match function for the dp_aux_bus. + * @dev: The device to match. + * @drv: The driver to try to match against. + * + * At the moment, we just match on device tree. + * + * Return: True if this driver matches this device; false otherwise. + */ +static int dp_aux_ep_match(struct device *dev, struct device_driver *drv) +{ + return !!of_match_device(drv->of_match_table, dev); +} + +/** + * dp_aux_ep_probe() - The probe function for the dp_aux_bus. + * @dev: The device to probe. + * + * Calls through to the endpoint driver probe. + * + * Return: 0 if no error or negative error code. + */ +static int dp_aux_ep_probe(struct device *dev) +{ + struct dp_aux_ep_driver *aux_ep_drv = to_dp_aux_ep_drv(dev->driver); + struct dp_aux_ep_device *aux_ep = to_dp_aux_ep_dev(dev); + int ret; + + ret = dev_pm_domain_attach(dev, true); + if (ret) + return dev_err_probe(dev, ret, "Failed to attach to PM Domain\n"); + + ret = aux_ep_drv->probe(aux_ep); + if (ret) + dev_pm_domain_detach(dev, true); + + return ret; +} + +/** + * dp_aux_ep_remove() - The remove function for the dp_aux_bus. + * @dev: The device to remove. + * + * Calls through to the endpoint driver remove. + * + */ +static void dp_aux_ep_remove(struct device *dev) +{ + struct dp_aux_ep_driver *aux_ep_drv = to_dp_aux_ep_drv(dev->driver); + struct dp_aux_ep_device *aux_ep = to_dp_aux_ep_dev(dev); + + if (aux_ep_drv->remove) + aux_ep_drv->remove(aux_ep); + dev_pm_domain_detach(dev, true); +} + +/** + * dp_aux_ep_shutdown() - The shutdown function for the dp_aux_bus. + * @dev: The device to shutdown. + * + * Calls through to the endpoint driver shutdown. + */ +static void dp_aux_ep_shutdown(struct device *dev) +{ + struct dp_aux_ep_driver *aux_ep_drv; + + if (!dev->driver) + return; + + aux_ep_drv = to_dp_aux_ep_drv(dev->driver); + if (aux_ep_drv->shutdown) + aux_ep_drv->shutdown(to_dp_aux_ep_dev(dev)); +} + +static struct bus_type dp_aux_bus_type = { + .name = "dp-aux", + .match = dp_aux_ep_match, + .probe = dp_aux_ep_probe, + .remove = dp_aux_ep_remove, + .shutdown = dp_aux_ep_shutdown, +}; + +static ssize_t modalias_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return of_device_modalias(dev, buf, PAGE_SIZE); +} +static DEVICE_ATTR_RO(modalias); + +static struct attribute *dp_aux_ep_dev_attrs[] = { + &dev_attr_modalias.attr, + NULL, +}; +ATTRIBUTE_GROUPS(dp_aux_ep_dev); + +/** + * dp_aux_ep_dev_release() - Free memory for the dp_aux_ep device + * @dev: The device to free. + * + * Return: 0 if no error or negative error code. + */ +static void dp_aux_ep_dev_release(struct device *dev) +{ + kfree(to_dp_aux_ep_dev(dev)); +} + +static struct device_type dp_aux_device_type_type = { + .groups = dp_aux_ep_dev_groups, + .uevent = of_device_uevent_modalias, + .release = dp_aux_ep_dev_release, +}; + +/** + * of_dp_aux_ep_destroy() - Destroy an DP AUX endpoint device + * @dev: The device to destroy. + * @data: Not used + * + * This is just used as a callback by of_dp_aux_depopulate_ep_devices() and + * is called for _all_ of the child devices of the device providing the AUX bus. + * We'll only act on those that are of type "dp_aux_bus_type". + * + * This function is effectively an inverse of what's in the loop + * in of_dp_aux_populate_ep_devices(). + * + * Return: 0 if no error or negative error code. + */ +static int of_dp_aux_ep_destroy(struct device *dev, void *data) +{ + struct device_node *np = dev->of_node; + + if (dev->bus != &dp_aux_bus_type) + return 0; + + if (!of_node_check_flag(np, OF_POPULATED)) + return 0; + + of_node_clear_flag(np, OF_POPULATED); + of_node_put(np); + + device_unregister(dev); + + return 0; +} + +/** + * of_dp_aux_depopulate_ep_devices() - Undo of_dp_aux_populate_ep_devices + * @aux: The AUX channel whose devices we want to depopulate + * + * This will destroy all devices that were created + * by of_dp_aux_populate_ep_devices(). + */ +void of_dp_aux_depopulate_ep_devices(struct drm_dp_aux *aux) +{ + device_for_each_child_reverse(aux->dev, NULL, of_dp_aux_ep_destroy); +} +EXPORT_SYMBOL_GPL(of_dp_aux_depopulate_ep_devices); + +/** + * of_dp_aux_populate_ep_devices() - Populate the endpoint devices on the DP AUX + * @aux: The AUX channel whose devices we want to populate. It is required that + * drm_dp_aux_init() has already been called for this AUX channel. + * + * This will populate all the devices under the "aux-bus" node of the device + * providing the AUX channel (AKA aux->dev). + * + * When this function finishes, it is _possible_ (but not guaranteed) that + * our sub-devices will have finished probing. It should be noted that if our + * sub-devices return -EPROBE_DEFER that we will not return any error codes + * ourselves but our sub-devices will _not_ have actually probed successfully + * yet. There may be other cases (maybe added in the future?) where sub-devices + * won't have been probed yet when this function returns, so it's best not to + * rely on that. + * + * If this function succeeds you should later make sure you call + * of_dp_aux_depopulate_ep_devices() to undo it, or just use the devm version + * of this function. + * + * Return: 0 if no error or negative error code. + */ +int of_dp_aux_populate_ep_devices(struct drm_dp_aux *aux) +{ + struct device_node *bus, *np; + struct dp_aux_ep_device *aux_ep; + int ret; + + /* drm_dp_aux_init() should have been called already; warn if not */ + WARN_ON_ONCE(!aux->ddc.algo); + + if (!aux->dev->of_node) + return 0; + + bus = of_get_child_by_name(aux->dev->of_node, "aux-bus"); + if (!bus) + return 0; + + for_each_available_child_of_node(bus, np) { + if (of_node_test_and_set_flag(np, OF_POPULATED)) + continue; + + aux_ep = kzalloc(sizeof(*aux_ep), GFP_KERNEL); + if (!aux_ep) + continue; + aux_ep->aux = aux; + + aux_ep->dev.parent = aux->dev; + aux_ep->dev.bus = &dp_aux_bus_type; + aux_ep->dev.type = &dp_aux_device_type_type; + aux_ep->dev.of_node = of_node_get(np); + dev_set_name(&aux_ep->dev, "aux-%s", dev_name(aux->dev)); + + ret = device_register(&aux_ep->dev); + if (ret) { + dev_err(aux->dev, "Failed to create AUX EP for %pOF: %d\n", np, ret); + of_node_clear_flag(np, OF_POPULATED); + of_node_put(np); + + /* + * As per docs of device_register(), call this instead + * of kfree() directly for error cases. + */ + put_device(&aux_ep->dev); + + /* + * Following in the footsteps of of_i2c_register_devices(), + * we won't fail the whole function here--we'll just + * continue registering any other devices we find. + */ + } + } + + of_node_put(bus); + + return 0; +} + +static void of_dp_aux_depopulate_ep_devices_void(void *data) +{ + of_dp_aux_depopulate_ep_devices(data); +} + +/** + * devm_of_dp_aux_populate_ep_devices() - devm wrapper for of_dp_aux_populate_ep_devices() + * @aux: The AUX channel whose devices we want to populate + * + * Handles freeing w/ devm on the device "aux->dev". + * + * Return: 0 if no error or negative error code. + */ +int devm_of_dp_aux_populate_ep_devices(struct drm_dp_aux *aux) +{ + int ret; + + ret = of_dp_aux_populate_ep_devices(aux); + if (ret) + return ret; + + return devm_add_action_or_reset(aux->dev, + of_dp_aux_depopulate_ep_devices_void, + aux); +} +EXPORT_SYMBOL_GPL(devm_of_dp_aux_populate_ep_devices); + +int __dp_aux_dp_driver_register(struct dp_aux_ep_driver *drv, struct module *owner) +{ + drv->driver.owner = owner; + drv->driver.bus = &dp_aux_bus_type; + + return driver_register(&drv->driver); +} +EXPORT_SYMBOL_GPL(__dp_aux_dp_driver_register); + +void dp_aux_dp_driver_unregister(struct dp_aux_ep_driver *drv) +{ + driver_unregister(&drv->driver); +} +EXPORT_SYMBOL_GPL(dp_aux_dp_driver_unregister); + +static int __init dp_aux_bus_init(void) +{ + int ret; + + ret = bus_register(&dp_aux_bus_type); + if (ret) + return ret; + + return 0; +} + +static void __exit dp_aux_bus_exit(void) +{ + bus_unregister(&dp_aux_bus_type); +} + +subsys_initcall(dp_aux_bus_init); +module_exit(dp_aux_bus_exit); + +MODULE_AUTHOR("Douglas Anderson <dianders@chromium.org>"); +MODULE_DESCRIPTION("DRM DisplayPort AUX bus"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/gpu/drm/drm_dp_helper.c b/drivers/gpu/drm/drm_dp_helper.c index 55b53df6ce34..6d0f2c447f3b 100644 --- a/drivers/gpu/drm/drm_dp_helper.c +++ b/drivers/gpu/drm/drm_dp_helper.c @@ -33,9 +33,17 @@ #include <drm/drm_print.h> #include <drm/drm_vblank.h> #include <drm/drm_dp_mst_helper.h> +#include <drm/drm_panel.h> #include "drm_crtc_helper_internal.h" +struct dp_aux_backlight { + struct backlight_device *base; + struct drm_dp_aux *aux; + struct drm_edp_backlight_info info; + bool enabled; +}; + /** * DOC: dp helpers * @@ -764,7 +772,7 @@ int drm_dp_downstream_max_tmds_clock(const u8 dpcd[DP_RECEIVER_CAP_SIZE], * It's left up to the driver to check the * DP dual mode adapter's max TMDS clock. * - * Unfortunatley it looks like branch devices + * Unfortunately it looks like branch devices * may not fordward that the DP dual mode i2c * access so we just usually get i2c nak :( */ @@ -1357,7 +1365,7 @@ static int drm_dp_i2c_msg_duration(const struct drm_dp_aux_msg *msg, } /* - * Deterine how many retries should be attempted to successfully transfer + * Determine how many retries should be attempted to successfully transfer * the specified message, based on the estimated durations of the * i2c and AUX transfers. */ @@ -1410,7 +1418,7 @@ static int drm_dp_i2c_do_msg(struct drm_dp_aux *aux, struct drm_dp_aux_msg *msg) /* * While timeouts can be errors, they're usually normal * behavior (for instance, when a driver tries to - * communicate with a non-existant DisplayPort device). + * communicate with a non-existent DisplayPort device). * Avoid spamming the kernel log with timeout errors. */ if (ret == -ETIMEDOUT) @@ -3115,3 +3123,459 @@ int drm_dp_pcon_convert_rgb_to_ycbcr(struct drm_dp_aux *aux, u8 color_spc) return 0; } EXPORT_SYMBOL(drm_dp_pcon_convert_rgb_to_ycbcr); + +/** + * drm_edp_backlight_set_level() - Set the backlight level of an eDP panel via AUX + * @aux: The DP AUX channel to use + * @bl: Backlight capability info from drm_edp_backlight_init() + * @level: The brightness level to set + * + * Sets the brightness level of an eDP panel's backlight. Note that the panel's backlight must + * already have been enabled by the driver by calling drm_edp_backlight_enable(). + * + * Returns: %0 on success, negative error code on failure + */ +int drm_edp_backlight_set_level(struct drm_dp_aux *aux, const struct drm_edp_backlight_info *bl, + u16 level) +{ + int ret; + u8 buf[2] = { 0 }; + + if (bl->lsb_reg_used) { + buf[0] = (level & 0xff00) >> 8; + buf[1] = (level & 0x00ff); + } else { + buf[0] = level; + } + + ret = drm_dp_dpcd_write(aux, DP_EDP_BACKLIGHT_BRIGHTNESS_MSB, buf, sizeof(buf)); + if (ret != sizeof(buf)) { + drm_err(aux->drm_dev, + "%s: Failed to write aux backlight level: %d\n", + aux->name, ret); + return ret < 0 ? ret : -EIO; + } + + return 0; +} +EXPORT_SYMBOL(drm_edp_backlight_set_level); + +static int +drm_edp_backlight_set_enable(struct drm_dp_aux *aux, const struct drm_edp_backlight_info *bl, + bool enable) +{ + int ret; + u8 buf; + + /* The panel uses something other then DPCD for enabling its backlight */ + if (!bl->aux_enable) + return 0; + + ret = drm_dp_dpcd_readb(aux, DP_EDP_DISPLAY_CONTROL_REGISTER, &buf); + if (ret != 1) { + drm_err(aux->drm_dev, "%s: Failed to read eDP display control register: %d\n", + aux->name, ret); + return ret < 0 ? ret : -EIO; + } + if (enable) + buf |= DP_EDP_BACKLIGHT_ENABLE; + else + buf &= ~DP_EDP_BACKLIGHT_ENABLE; + + ret = drm_dp_dpcd_writeb(aux, DP_EDP_DISPLAY_CONTROL_REGISTER, buf); + if (ret != 1) { + drm_err(aux->drm_dev, "%s: Failed to write eDP display control register: %d\n", + aux->name, ret); + return ret < 0 ? ret : -EIO; + } + + return 0; +} + +/** + * drm_edp_backlight_enable() - Enable an eDP panel's backlight using DPCD + * @aux: The DP AUX channel to use + * @bl: Backlight capability info from drm_edp_backlight_init() + * @level: The initial backlight level to set via AUX, if there is one + * + * This function handles enabling DPCD backlight controls on a panel over DPCD, while additionally + * restoring any important backlight state such as the given backlight level, the brightness byte + * count, backlight frequency, etc. + * + * Note that certain panels, while supporting brightness level controls over DPCD, may not support + * having their backlights enabled via the standard %DP_EDP_DISPLAY_CONTROL_REGISTER. On such panels + * &drm_edp_backlight_info.aux_enable will be set to %false, this function will skip the step of + * programming the %DP_EDP_DISPLAY_CONTROL_REGISTER, and the driver must perform the required + * implementation specific step for enabling the backlight after calling this function. + * + * Returns: %0 on success, negative error code on failure. + */ +int drm_edp_backlight_enable(struct drm_dp_aux *aux, const struct drm_edp_backlight_info *bl, + const u16 level) +{ + int ret; + u8 dpcd_buf, new_dpcd_buf; + + ret = drm_dp_dpcd_readb(aux, DP_EDP_BACKLIGHT_MODE_SET_REGISTER, &dpcd_buf); + if (ret != 1) { + drm_dbg_kms(aux->drm_dev, + "%s: Failed to read backlight mode: %d\n", aux->name, ret); + return ret < 0 ? ret : -EIO; + } + + new_dpcd_buf = dpcd_buf; + + if ((dpcd_buf & DP_EDP_BACKLIGHT_CONTROL_MODE_MASK) != DP_EDP_BACKLIGHT_CONTROL_MODE_DPCD) { + new_dpcd_buf &= ~DP_EDP_BACKLIGHT_CONTROL_MODE_MASK; + new_dpcd_buf |= DP_EDP_BACKLIGHT_CONTROL_MODE_DPCD; + + if (bl->pwmgen_bit_count) { + ret = drm_dp_dpcd_writeb(aux, DP_EDP_PWMGEN_BIT_COUNT, bl->pwmgen_bit_count); + if (ret != 1) + drm_dbg_kms(aux->drm_dev, "%s: Failed to write aux pwmgen bit count: %d\n", + aux->name, ret); + } + } + + if (bl->pwm_freq_pre_divider) { + ret = drm_dp_dpcd_writeb(aux, DP_EDP_BACKLIGHT_FREQ_SET, bl->pwm_freq_pre_divider); + if (ret != 1) + drm_dbg_kms(aux->drm_dev, + "%s: Failed to write aux backlight frequency: %d\n", + aux->name, ret); + else + new_dpcd_buf |= DP_EDP_BACKLIGHT_FREQ_AUX_SET_ENABLE; + } + + if (new_dpcd_buf != dpcd_buf) { + ret = drm_dp_dpcd_writeb(aux, DP_EDP_BACKLIGHT_MODE_SET_REGISTER, new_dpcd_buf); + if (ret != 1) { + drm_dbg_kms(aux->drm_dev, "%s: Failed to write aux backlight mode: %d\n", + aux->name, ret); + return ret < 0 ? ret : -EIO; + } + } + + ret = drm_edp_backlight_set_level(aux, bl, level); + if (ret < 0) + return ret; + ret = drm_edp_backlight_set_enable(aux, bl, true); + if (ret < 0) + return ret; + + return 0; +} +EXPORT_SYMBOL(drm_edp_backlight_enable); + +/** + * drm_edp_backlight_disable() - Disable an eDP backlight using DPCD, if supported + * @aux: The DP AUX channel to use + * @bl: Backlight capability info from drm_edp_backlight_init() + * + * This function handles disabling DPCD backlight controls on a panel over AUX. Note that some + * panels have backlights that are enabled/disabled by other means, despite having their brightness + * values controlled through DPCD. On such panels &drm_edp_backlight_info.aux_enable will be set to + * %false, this function will become a no-op (and we will skip updating + * %DP_EDP_DISPLAY_CONTROL_REGISTER), and the driver must take care to perform it's own + * implementation specific step for disabling the backlight. + * + * Returns: %0 on success or no-op, negative error code on failure. + */ +int drm_edp_backlight_disable(struct drm_dp_aux *aux, const struct drm_edp_backlight_info *bl) +{ + int ret; + + ret = drm_edp_backlight_set_enable(aux, bl, false); + if (ret < 0) + return ret; + + return 0; +} +EXPORT_SYMBOL(drm_edp_backlight_disable); + +static inline int +drm_edp_backlight_probe_max(struct drm_dp_aux *aux, struct drm_edp_backlight_info *bl, + u16 driver_pwm_freq_hz, const u8 edp_dpcd[EDP_DISPLAY_CTL_CAP_SIZE]) +{ + int fxp, fxp_min, fxp_max, fxp_actual, f = 1; + int ret; + u8 pn, pn_min, pn_max; + + ret = drm_dp_dpcd_readb(aux, DP_EDP_PWMGEN_BIT_COUNT, &pn); + if (ret != 1) { + drm_dbg_kms(aux->drm_dev, "%s: Failed to read pwmgen bit count cap: %d\n", + aux->name, ret); + return -ENODEV; + } + + pn &= DP_EDP_PWMGEN_BIT_COUNT_MASK; + bl->max = (1 << pn) - 1; + if (!driver_pwm_freq_hz) + return 0; + + /* + * Set PWM Frequency divider to match desired frequency provided by the driver. + * The PWM Frequency is calculated as 27Mhz / (F x P). + * - Where F = PWM Frequency Pre-Divider value programmed by field 7:0 of the + * EDP_BACKLIGHT_FREQ_SET register (DPCD Address 00728h) + * - Where P = 2^Pn, where Pn is the value programmed by field 4:0 of the + * EDP_PWMGEN_BIT_COUNT register (DPCD Address 00724h) + */ + + /* Find desired value of (F x P) + * Note that, if F x P is out of supported range, the maximum value or minimum value will + * applied automatically. So no need to check that. + */ + fxp = DIV_ROUND_CLOSEST(1000 * DP_EDP_BACKLIGHT_FREQ_BASE_KHZ, driver_pwm_freq_hz); + + /* Use highest possible value of Pn for more granularity of brightness adjustment while + * satisfying the conditions below. + * - Pn is in the range of Pn_min and Pn_max + * - F is in the range of 1 and 255 + * - FxP is within 25% of desired value. + * Note: 25% is arbitrary value and may need some tweak. + */ + ret = drm_dp_dpcd_readb(aux, DP_EDP_PWMGEN_BIT_COUNT_CAP_MIN, &pn_min); + if (ret != 1) { + drm_dbg_kms(aux->drm_dev, "%s: Failed to read pwmgen bit count cap min: %d\n", + aux->name, ret); + return 0; + } + ret = drm_dp_dpcd_readb(aux, DP_EDP_PWMGEN_BIT_COUNT_CAP_MAX, &pn_max); + if (ret != 1) { + drm_dbg_kms(aux->drm_dev, "%s: Failed to read pwmgen bit count cap max: %d\n", + aux->name, ret); + return 0; + } + pn_min &= DP_EDP_PWMGEN_BIT_COUNT_MASK; + pn_max &= DP_EDP_PWMGEN_BIT_COUNT_MASK; + + /* Ensure frequency is within 25% of desired value */ + fxp_min = DIV_ROUND_CLOSEST(fxp * 3, 4); + fxp_max = DIV_ROUND_CLOSEST(fxp * 5, 4); + if (fxp_min < (1 << pn_min) || (255 << pn_max) < fxp_max) { + drm_dbg_kms(aux->drm_dev, + "%s: Driver defined backlight frequency (%d) out of range\n", + aux->name, driver_pwm_freq_hz); + return 0; + } + + for (pn = pn_max; pn >= pn_min; pn--) { + f = clamp(DIV_ROUND_CLOSEST(fxp, 1 << pn), 1, 255); + fxp_actual = f << pn; + if (fxp_min <= fxp_actual && fxp_actual <= fxp_max) + break; + } + + ret = drm_dp_dpcd_writeb(aux, DP_EDP_PWMGEN_BIT_COUNT, pn); + if (ret != 1) { + drm_dbg_kms(aux->drm_dev, "%s: Failed to write aux pwmgen bit count: %d\n", + aux->name, ret); + return 0; + } + bl->pwmgen_bit_count = pn; + bl->max = (1 << pn) - 1; + + if (edp_dpcd[2] & DP_EDP_BACKLIGHT_FREQ_AUX_SET_CAP) { + bl->pwm_freq_pre_divider = f; + drm_dbg_kms(aux->drm_dev, "%s: Using backlight frequency from driver (%dHz)\n", + aux->name, driver_pwm_freq_hz); + } + + return 0; +} + +static inline int +drm_edp_backlight_probe_level(struct drm_dp_aux *aux, struct drm_edp_backlight_info *bl, + u8 *current_mode) +{ + int ret; + u8 buf[2]; + u8 mode_reg; + + ret = drm_dp_dpcd_readb(aux, DP_EDP_BACKLIGHT_MODE_SET_REGISTER, &mode_reg); + if (ret != 1) { + drm_dbg_kms(aux->drm_dev, "%s: Failed to read backlight mode: %d\n", + aux->name, ret); + return ret < 0 ? ret : -EIO; + } + + *current_mode = (mode_reg & DP_EDP_BACKLIGHT_CONTROL_MODE_MASK); + if (*current_mode == DP_EDP_BACKLIGHT_CONTROL_MODE_DPCD) { + int size = 1 + bl->lsb_reg_used; + + ret = drm_dp_dpcd_read(aux, DP_EDP_BACKLIGHT_BRIGHTNESS_MSB, buf, size); + if (ret != size) { + drm_dbg_kms(aux->drm_dev, "%s: Failed to read backlight level: %d\n", + aux->name, ret); + return ret < 0 ? ret : -EIO; + } + + if (bl->lsb_reg_used) + return (buf[0] << 8) | buf[1]; + else + return buf[0]; + } + + /* + * If we're not in DPCD control mode yet, the programmed brightness value is meaningless and + * the driver should assume max brightness + */ + return bl->max; +} + +/** + * drm_edp_backlight_init() - Probe a display panel's TCON using the standard VESA eDP backlight + * interface. + * @aux: The DP aux device to use for probing + * @bl: The &drm_edp_backlight_info struct to fill out with information on the backlight + * @driver_pwm_freq_hz: Optional PWM frequency from the driver in hz + * @edp_dpcd: A cached copy of the eDP DPCD + * @current_level: Where to store the probed brightness level + * @current_mode: Where to store the currently set backlight control mode + * + * Initializes a &drm_edp_backlight_info struct by probing @aux for it's backlight capabilities, + * along with also probing the current and maximum supported brightness levels. + * + * If @driver_pwm_freq_hz is non-zero, this will be used as the backlight frequency. Otherwise, the + * default frequency from the panel is used. + * + * Returns: %0 on success, negative error code on failure. + */ +int +drm_edp_backlight_init(struct drm_dp_aux *aux, struct drm_edp_backlight_info *bl, + u16 driver_pwm_freq_hz, const u8 edp_dpcd[EDP_DISPLAY_CTL_CAP_SIZE], + u16 *current_level, u8 *current_mode) +{ + int ret; + + if (edp_dpcd[1] & DP_EDP_BACKLIGHT_AUX_ENABLE_CAP) + bl->aux_enable = true; + if (edp_dpcd[2] & DP_EDP_BACKLIGHT_BRIGHTNESS_BYTE_COUNT) + bl->lsb_reg_used = true; + + ret = drm_edp_backlight_probe_max(aux, bl, driver_pwm_freq_hz, edp_dpcd); + if (ret < 0) + return ret; + + ret = drm_edp_backlight_probe_level(aux, bl, current_mode); + if (ret < 0) + return ret; + *current_level = ret; + + drm_dbg_kms(aux->drm_dev, + "%s: Found backlight level=%d/%d pwm_freq_pre_divider=%d mode=%x\n", + aux->name, *current_level, bl->max, bl->pwm_freq_pre_divider, *current_mode); + drm_dbg_kms(aux->drm_dev, + "%s: Backlight caps: pwmgen_bit_count=%d lsb_reg_used=%d aux_enable=%d\n", + aux->name, bl->pwmgen_bit_count, bl->lsb_reg_used, bl->aux_enable); + return 0; +} +EXPORT_SYMBOL(drm_edp_backlight_init); + +#if IS_BUILTIN(CONFIG_BACKLIGHT_CLASS_DEVICE) || \ + (IS_MODULE(CONFIG_DRM_KMS_HELPER) && IS_MODULE(CONFIG_BACKLIGHT_CLASS_DEVICE)) + +static int dp_aux_backlight_update_status(struct backlight_device *bd) +{ + struct dp_aux_backlight *bl = bl_get_data(bd); + u16 brightness = backlight_get_brightness(bd); + int ret = 0; + + if (!backlight_is_blank(bd)) { + if (!bl->enabled) { + drm_edp_backlight_enable(bl->aux, &bl->info, brightness); + bl->enabled = true; + return 0; + } + ret = drm_edp_backlight_set_level(bl->aux, &bl->info, brightness); + } else { + if (bl->enabled) { + drm_edp_backlight_disable(bl->aux, &bl->info); + bl->enabled = false; + } + } + + return ret; +} + +static const struct backlight_ops dp_aux_bl_ops = { + .update_status = dp_aux_backlight_update_status, +}; + +/** + * drm_panel_dp_aux_backlight - create and use DP AUX backlight + * @panel: DRM panel + * @aux: The DP AUX channel to use + * + * Use this function to create and handle backlight if your panel + * supports backlight control over DP AUX channel using DPCD + * registers as per VESA's standard backlight control interface. + * + * When the panel is enabled backlight will be enabled after a + * successful call to &drm_panel_funcs.enable() + * + * When the panel is disabled backlight will be disabled before the + * call to &drm_panel_funcs.disable(). + * + * A typical implementation for a panel driver supporting backlight + * control over DP AUX will call this function at probe time. + * Backlight will then be handled transparently without requiring + * any intervention from the driver. + * + * drm_panel_dp_aux_backlight() must be called after the call to drm_panel_init(). + * + * Return: 0 on success or a negative error code on failure. + */ +int drm_panel_dp_aux_backlight(struct drm_panel *panel, struct drm_dp_aux *aux) +{ + struct dp_aux_backlight *bl; + struct backlight_properties props = { 0 }; + u16 current_level; + u8 current_mode; + u8 edp_dpcd[EDP_DISPLAY_CTL_CAP_SIZE]; + int ret; + + if (!panel || !panel->dev || !aux) + return -EINVAL; + + ret = drm_dp_dpcd_read(aux, DP_EDP_DPCD_REV, edp_dpcd, + EDP_DISPLAY_CTL_CAP_SIZE); + if (ret < 0) + return ret; + + if (!drm_edp_backlight_supported(edp_dpcd)) { + DRM_DEV_INFO(panel->dev, "DP AUX backlight is not supported\n"); + return 0; + } + + bl = devm_kzalloc(panel->dev, sizeof(*bl), GFP_KERNEL); + if (!bl) + return -ENOMEM; + + bl->aux = aux; + + ret = drm_edp_backlight_init(aux, &bl->info, 0, edp_dpcd, + ¤t_level, ¤t_mode); + if (ret < 0) + return ret; + + props.type = BACKLIGHT_RAW; + props.brightness = current_level; + props.max_brightness = bl->info.max; + + bl->base = devm_backlight_device_register(panel->dev, "dp_aux_backlight", + panel->dev, bl, + &dp_aux_bl_ops, &props); + if (IS_ERR(bl->base)) + return PTR_ERR(bl->base); + + backlight_disable(bl->base); + + panel->backlight = bl->base; + + return 0; +} +EXPORT_SYMBOL(drm_panel_dp_aux_backlight); + +#endif diff --git a/drivers/gpu/drm/drm_dp_mst_topology.c b/drivers/gpu/drm/drm_dp_mst_topology.c index ad0795afc21c..86d13d6bc463 100644 --- a/drivers/gpu/drm/drm_dp_mst_topology.c +++ b/drivers/gpu/drm/drm_dp_mst_topology.c @@ -2872,11 +2872,13 @@ static int process_single_tx_qlock(struct drm_dp_mst_topology_mgr *mgr, idx += tosend + 1; ret = drm_dp_send_sideband_msg(mgr, up, chunk, idx); - if (unlikely(ret) && drm_debug_enabled(DRM_UT_DP)) { - struct drm_printer p = drm_debug_printer(DBG_PREFIX); + if (ret) { + if (drm_debug_enabled(DRM_UT_DP)) { + struct drm_printer p = drm_debug_printer(DBG_PREFIX); - drm_printf(&p, "sideband msg failed to send\n"); - drm_dp_mst_dump_sideband_msg_tx(&p, txmsg); + drm_printf(&p, "sideband msg failed to send\n"); + drm_dp_mst_dump_sideband_msg_tx(&p, txmsg); + } return ret; } diff --git a/drivers/gpu/drm/drm_drv.c b/drivers/gpu/drm/drm_drv.c index 8804ec7d3215..7a5097467ba5 100644 --- a/drivers/gpu/drm/drm_drv.c +++ b/drivers/gpu/drm/drm_drv.c @@ -249,7 +249,7 @@ void drm_minor_release(struct drm_minor *minor) * Finally when everything is up and running and ready for userspace the device * instance can be published using drm_dev_register(). * - * There is also deprecated support for initalizing device instances using + * There is also deprecated support for initializing device instances using * bus-specific helpers and the &drm_driver.load callback. But due to * backwards-compatibility needs the device instance have to be published too * early, which requires unpretty global locking to make safe and is therefore @@ -379,7 +379,7 @@ void drm_minor_release(struct drm_minor *minor) * shortcoming however, drm_dev_unplug() marks the drm_device as unplugged before * drm_atomic_helper_shutdown() is called. This means that if the disable code * paths are protected, they will not run on regular driver module unload, - * possibily leaving the hardware enabled. + * possibly leaving the hardware enabled. */ /** diff --git a/drivers/gpu/drm/drm_dsc.c b/drivers/gpu/drm/drm_dsc.c index ff602f7ec65b..46a3c1b62463 100644 --- a/drivers/gpu/drm/drm_dsc.c +++ b/drivers/gpu/drm/drm_dsc.c @@ -98,7 +98,7 @@ void drm_dsc_pps_payload_pack(struct drm_dsc_picture_parameter_set *pps_payload, { int i; - /* Protect against someone accidently changing struct size */ + /* Protect against someone accidentally changing struct size */ BUILD_BUG_ON(sizeof(*pps_payload) != DP_SDP_PPS_HEADER_PAYLOAD_BYTES_MINUS_1 + 1); diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c index 81d5f2524246..6325877c5fd6 100644 --- a/drivers/gpu/drm/drm_edid.c +++ b/drivers/gpu/drm/drm_edid.c @@ -1919,7 +1919,7 @@ EXPORT_SYMBOL(drm_add_override_edid_modes); * level, drivers must make all reasonable efforts to expose it as an I2C * adapter and use drm_get_edid() instead of abusing this function. * - * The EDID may be overridden using debugfs override_edid or firmare EDID + * The EDID may be overridden using debugfs override_edid or firmware EDID * (drm_load_edid_firmware() and drm.edid_firmware parameter), in this priority * order. Having either of them bypasses actual EDID reads. * @@ -5906,7 +5906,7 @@ drm_hdmi_vendor_infoframe_from_display_mode(struct hdmi_vendor_infoframe *frame, * (ie.vic==0 and s3d_struct==0) we will still send it if we * know that the sink can handle it. This is based on a * suggestion in HDMI 2.0 Appendix F. Apparently some sinks - * have trouble realizing that they shuld switch from 3D to 2D + * have trouble realizing that they should switch from 3D to 2D * mode if the source simply stops sending the infoframe when * it wants to switch from 3D to 2D. */ diff --git a/drivers/gpu/drm/drm_fb_helper.c b/drivers/gpu/drm/drm_fb_helper.c index d77a24507d30..3ab078321045 100644 --- a/drivers/gpu/drm/drm_fb_helper.c +++ b/drivers/gpu/drm/drm_fb_helper.c @@ -565,7 +565,7 @@ struct fb_info *drm_fb_helper_alloc_fbi(struct drm_fb_helper *fb_helper) goto err_release; /* - * TODO: We really should be smarter here and alloc an apperture + * TODO: We really should be smarter here and alloc an aperture * for each IORESOURCE_MEM resource helper->dev->dev has and also * init the ranges of the appertures based on the resources. * Note some drivers currently count on there being only 1 empty diff --git a/drivers/gpu/drm/drm_file.c b/drivers/gpu/drm/drm_file.c index d4f0bac6f8f8..ed25168619fc 100644 --- a/drivers/gpu/drm/drm_file.c +++ b/drivers/gpu/drm/drm_file.c @@ -176,6 +176,7 @@ struct drm_file *drm_file_alloc(struct drm_minor *minor) init_waitqueue_head(&file->event_wait); file->event_space = 4096; /* set aside 4k for event buffer */ + spin_lock_init(&file->master_lookup_lock); mutex_init(&file->event_read_lock); if (drm_core_check_feature(dev, DRIVER_GEM)) @@ -404,7 +405,7 @@ static int drm_open_helper(struct file *filp, struct drm_minor *minor) * * RETURNS: * - * 0 on success or negative errno value on falure. + * 0 on success or negative errno value on failure. */ int drm_open(struct inode *inode, struct file *filp) { @@ -547,7 +548,7 @@ EXPORT_SYMBOL(drm_release_noglobal); * @offset: offset to read * * This function must be used by drivers as their &file_operations.read - * method iff they use DRM events for asynchronous signalling to userspace. + * method if they use DRM events for asynchronous signalling to userspace. * Since events are used by the KMS API for vblank and page flip completion this * means all modern display drivers must use it. * @@ -640,7 +641,7 @@ EXPORT_SYMBOL(drm_read); * @wait: poll waiter table * * This function must be used by drivers as their &file_operations.read method - * iff they use DRM events for asynchronous signalling to userspace. Since + * if they use DRM events for asynchronous signalling to userspace. Since * events are used by the KMS API for vblank and page flip completion this means * all modern display drivers must use it. * diff --git a/drivers/gpu/drm/drm_format_helper.c b/drivers/gpu/drm/drm_format_helper.c index 0e885cd34107..5231104b1498 100644 --- a/drivers/gpu/drm/drm_format_helper.c +++ b/drivers/gpu/drm/drm_format_helper.c @@ -412,7 +412,7 @@ EXPORT_SYMBOL(drm_fb_blit_rect_dstclip); * of the display and the framebuffer mismatch, the copy function will * attempt to convert between them. * - * See drm_fb_blit_rect_dstclip() for more inforamtion. + * See drm_fb_blit_rect_dstclip() for more information. * * Returns: * 0 on success, or a negative error code otherwise. diff --git a/drivers/gpu/drm/drm_framebuffer.c b/drivers/gpu/drm/drm_framebuffer.c index 4d01464b6f95..07f5abc875e9 100644 --- a/drivers/gpu/drm/drm_framebuffer.c +++ b/drivers/gpu/drm/drm_framebuffer.c @@ -404,6 +404,9 @@ static void drm_mode_rmfb_work_fn(struct work_struct *w) struct drm_framebuffer *fb = list_first_entry(&arg->fbs, typeof(*fb), filp_head); + drm_dbg_kms(fb->dev, + "Removing [FB:%d] from all active usage due to RMFB ioctl\n", + fb->base.id); list_del_init(&fb->filp_head); drm_framebuffer_remove(fb); } @@ -981,6 +984,10 @@ retry: if (plane->state->fb != fb) continue; + drm_dbg_kms(dev, + "Disabling [PLANE:%d:%s] because [FB:%d] is removed\n", + plane->base.id, plane->name, fb->base.id); + plane_state = drm_atomic_get_plane_state(state, plane); if (IS_ERR(plane_state)) { ret = PTR_ERR(plane_state); @@ -990,6 +997,11 @@ retry: if (disable_crtcs && plane_state->crtc->primary == plane) { struct drm_crtc_state *crtc_state; + drm_dbg_kms(dev, + "Disabling [CRTC:%d:%s] because [FB:%d] is removed\n", + plane_state->crtc->base.id, + plane_state->crtc->name, fb->base.id); + crtc_state = drm_atomic_get_existing_crtc_state(state, plane_state->crtc); ret = drm_atomic_add_affected_connectors(state, plane_state->crtc); @@ -1052,6 +1064,10 @@ static void legacy_remove_fb(struct drm_framebuffer *fb) /* remove from any CRTC */ drm_for_each_crtc(crtc, dev) { if (crtc->primary->fb == fb) { + drm_dbg_kms(dev, + "Disabling [CRTC:%d:%s] because [FB:%d] is removed\n", + crtc->base.id, crtc->name, fb->base.id); + /* should turn off the crtc */ if (drm_crtc_force_disable(crtc)) DRM_ERROR("failed to reset crtc %p when fb was deleted\n", crtc); @@ -1059,8 +1075,12 @@ static void legacy_remove_fb(struct drm_framebuffer *fb) } drm_for_each_plane(plane, dev) { - if (plane->fb == fb) + if (plane->fb == fb) { + drm_dbg_kms(dev, + "Disabling [PLANE:%d:%s] because [FB:%d] is removed\n", + plane->base.id, plane->name, fb->base.id); drm_plane_force_disable(plane); + } } drm_modeset_unlock_all(dev); } @@ -1090,7 +1110,7 @@ void drm_framebuffer_remove(struct drm_framebuffer *fb) /* * drm ABI mandates that we remove any deleted framebuffers from active - * useage. But since most sane clients only remove framebuffers they no + * usage. But since most sane clients only remove framebuffers they no * longer need, try to optimize this away. * * Since we're holding a reference ourselves, observing a refcount of 1 diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c index d62fb1a3c916..09c820045859 100644 --- a/drivers/gpu/drm/drm_gem.c +++ b/drivers/gpu/drm/drm_gem.c @@ -901,7 +901,7 @@ err: } /** - * drm_gem_open - initalizes GEM file-private structures at devnode open time + * drm_gem_open - initializes GEM file-private structures at devnode open time * @dev: drm_device which is being opened by userspace * @file_private: drm file-private structure to set up * @@ -936,7 +936,7 @@ drm_gem_release(struct drm_device *dev, struct drm_file *file_private) * drm_gem_object_release - release GEM buffer object resources * @obj: GEM buffer object * - * This releases any structures and resources used by @obj and is the invers of + * This releases any structures and resources used by @obj and is the inverse of * drm_gem_object_init(). */ void @@ -974,28 +974,6 @@ drm_gem_object_free(struct kref *kref) EXPORT_SYMBOL(drm_gem_object_free); /** - * drm_gem_object_put_locked - release a GEM buffer object reference - * @obj: GEM buffer object - * - * This releases a reference to @obj. Callers must hold the - * &drm_device.struct_mutex lock when calling this function, even when the - * driver doesn't use &drm_device.struct_mutex for anything. - * - * For drivers not encumbered with legacy locking use - * drm_gem_object_put() instead. - */ -void -drm_gem_object_put_locked(struct drm_gem_object *obj) -{ - if (obj) { - WARN_ON(!mutex_is_locked(&obj->dev->struct_mutex)); - - kref_put(&obj->refcount, drm_gem_object_free); - } -} -EXPORT_SYMBOL(drm_gem_object_put_locked); - -/** * drm_gem_vm_open - vma->ops->open implementation for GEM * @vma: VM area structure * @@ -1148,15 +1126,6 @@ int drm_gem_mmap(struct file *filp, struct vm_area_struct *vma) return -EACCES; } - if (node->readonly) { - if (vma->vm_flags & VM_WRITE) { - drm_gem_object_put(obj); - return -EINVAL; - } - - vma->vm_flags &= ~VM_MAYWRITE; - } - ret = drm_gem_mmap_obj(obj, drm_vma_node_size(node) << PAGE_SHIFT, vma); @@ -1311,6 +1280,9 @@ EXPORT_SYMBOL(drm_gem_unlock_reservations); * @fence_array: array of dma_fence * for the job to block on. * @fence: the dma_fence to add to the list of dependencies. * + * This functions consumes the reference for @fence both on success and error + * cases. + * * Returns: * 0 on success, or an error on failing to expand the array. */ diff --git a/drivers/gpu/drm/drm_gem_atomic_helper.c b/drivers/gpu/drm/drm_gem_atomic_helper.c index a27135084ae5..e570398abd78 100644 --- a/drivers/gpu/drm/drm_gem_atomic_helper.c +++ b/drivers/gpu/drm/drm_gem_atomic_helper.c @@ -52,7 +52,7 @@ * * The helpers for shadow-buffered planes establish and release mappings, * and provide struct drm_shadow_plane_state, which stores the plane's mapping - * for commit-tail functons. + * for commit-tail functions. * * Shadow-buffered planes can easily be enabled by using the provided macros * %DRM_GEM_SHADOW_PLANE_FUNCS and %DRM_GEM_SHADOW_PLANE_HELPER_FUNCS. @@ -135,6 +135,9 @@ * GEM based framebuffer drivers which have their buffers always pinned in * memory. * + * This function is the default implementation for GEM drivers of + * &drm_plane_helper_funcs.prepare_fb if no callback is provided. + * * See drm_atomic_set_fence_for_plane() for a discussion of implicit and * explicit fencing in atomic modeset updates. */ @@ -180,6 +183,27 @@ EXPORT_SYMBOL(drm_gem_simple_display_pipe_prepare_fb); */ /** + * __drm_gem_duplicate_shadow_plane_state - duplicates shadow-buffered plane state + * @plane: the plane + * @new_shadow_plane_state: the new shadow-buffered plane state + * + * This function duplicates shadow-buffered plane state. This is helpful for drivers + * that subclass struct drm_shadow_plane_state. + * + * The function does not duplicate existing mappings of the shadow buffers. + * Mappings are maintained during the atomic commit by the plane's prepare_fb + * and cleanup_fb helpers. See drm_gem_prepare_shadow_fb() and drm_gem_cleanup_shadow_fb() + * for corresponding helpers. + */ +void +__drm_gem_duplicate_shadow_plane_state(struct drm_plane *plane, + struct drm_shadow_plane_state *new_shadow_plane_state) +{ + __drm_atomic_helper_plane_duplicate_state(plane, &new_shadow_plane_state->base); +} +EXPORT_SYMBOL(__drm_gem_duplicate_shadow_plane_state); + +/** * drm_gem_duplicate_shadow_plane_state - duplicates shadow-buffered plane state * @plane: the plane * @@ -208,13 +232,26 @@ drm_gem_duplicate_shadow_plane_state(struct drm_plane *plane) new_shadow_plane_state = kzalloc(sizeof(*new_shadow_plane_state), GFP_KERNEL); if (!new_shadow_plane_state) return NULL; - __drm_atomic_helper_plane_duplicate_state(plane, &new_shadow_plane_state->base); + __drm_gem_duplicate_shadow_plane_state(plane, new_shadow_plane_state); return &new_shadow_plane_state->base; } EXPORT_SYMBOL(drm_gem_duplicate_shadow_plane_state); /** + * __drm_gem_destroy_shadow_plane_state - cleans up shadow-buffered plane state + * @shadow_plane_state: the shadow-buffered plane state + * + * This function cleans up shadow-buffered plane state. Helpful for drivers that + * subclass struct drm_shadow_plane_state. + */ +void __drm_gem_destroy_shadow_plane_state(struct drm_shadow_plane_state *shadow_plane_state) +{ + __drm_atomic_helper_plane_destroy_state(&shadow_plane_state->base); +} +EXPORT_SYMBOL(__drm_gem_destroy_shadow_plane_state); + +/** * drm_gem_destroy_shadow_plane_state - deletes shadow-buffered plane state * @plane: the plane * @plane_state: the plane state of type struct drm_shadow_plane_state @@ -229,12 +266,27 @@ void drm_gem_destroy_shadow_plane_state(struct drm_plane *plane, struct drm_shadow_plane_state *shadow_plane_state = to_drm_shadow_plane_state(plane_state); - __drm_atomic_helper_plane_destroy_state(&shadow_plane_state->base); + __drm_gem_destroy_shadow_plane_state(shadow_plane_state); kfree(shadow_plane_state); } EXPORT_SYMBOL(drm_gem_destroy_shadow_plane_state); /** + * __drm_gem_reset_shadow_plane - resets a shadow-buffered plane + * @plane: the plane + * @shadow_plane_state: the shadow-buffered plane state + * + * This function resets state for shadow-buffered planes. Helpful + * for drivers that subclass struct drm_shadow_plane_state. + */ +void __drm_gem_reset_shadow_plane(struct drm_plane *plane, + struct drm_shadow_plane_state *shadow_plane_state) +{ + __drm_atomic_helper_plane_reset(plane, &shadow_plane_state->base); +} +EXPORT_SYMBOL(__drm_gem_reset_shadow_plane); + +/** * drm_gem_reset_shadow_plane - resets a shadow-buffered plane * @plane: the plane * @@ -255,7 +307,7 @@ void drm_gem_reset_shadow_plane(struct drm_plane *plane) shadow_plane_state = kzalloc(sizeof(*shadow_plane_state), GFP_KERNEL); if (!shadow_plane_state) return; - __drm_atomic_helper_plane_reset(plane, &shadow_plane_state->base); + __drm_gem_reset_shadow_plane(plane, shadow_plane_state); } EXPORT_SYMBOL(drm_gem_reset_shadow_plane); @@ -278,10 +330,7 @@ int drm_gem_prepare_shadow_fb(struct drm_plane *plane, struct drm_plane_state *p { struct drm_shadow_plane_state *shadow_plane_state = to_drm_shadow_plane_state(plane_state); struct drm_framebuffer *fb = plane_state->fb; - struct drm_gem_object *obj; - struct dma_buf_map map; int ret; - size_t i; if (!fb) return 0; @@ -290,27 +339,7 @@ int drm_gem_prepare_shadow_fb(struct drm_plane *plane, struct drm_plane_state *p if (ret) return ret; - for (i = 0; i < ARRAY_SIZE(shadow_plane_state->map); ++i) { - obj = drm_gem_fb_get_obj(fb, i); - if (!obj) - continue; - ret = drm_gem_vmap(obj, &map); - if (ret) - goto err_drm_gem_vunmap; - shadow_plane_state->map[i] = map; - } - - return 0; - -err_drm_gem_vunmap: - while (i) { - --i; - obj = drm_gem_fb_get_obj(fb, i); - if (!obj) - continue; - drm_gem_vunmap(obj, &shadow_plane_state->map[i]); - } - return ret; + return drm_gem_fb_vmap(fb, shadow_plane_state->map, shadow_plane_state->data); } EXPORT_SYMBOL(drm_gem_prepare_shadow_fb); @@ -322,25 +351,17 @@ EXPORT_SYMBOL(drm_gem_prepare_shadow_fb); * This function implements struct &drm_plane_helper_funcs.cleanup_fb. * This function unmaps all buffer objects of the plane's framebuffer. * - * See drm_gem_prepare_shadow_fb() for more inforamtion. + * See drm_gem_prepare_shadow_fb() for more information. */ void drm_gem_cleanup_shadow_fb(struct drm_plane *plane, struct drm_plane_state *plane_state) { struct drm_shadow_plane_state *shadow_plane_state = to_drm_shadow_plane_state(plane_state); struct drm_framebuffer *fb = plane_state->fb; - size_t i = ARRAY_SIZE(shadow_plane_state->map); - struct drm_gem_object *obj; if (!fb) return; - while (i) { - --i; - obj = drm_gem_fb_get_obj(fb, i); - if (!obj) - continue; - drm_gem_vunmap(obj, &shadow_plane_state->map[i]); - } + drm_gem_fb_vunmap(fb, shadow_plane_state->map); } EXPORT_SYMBOL(drm_gem_cleanup_shadow_fb); diff --git a/drivers/gpu/drm/drm_gem_framebuffer_helper.c b/drivers/gpu/drm/drm_gem_framebuffer_helper.c index e2c68822e05c..3c75d79dbb65 100644 --- a/drivers/gpu/drm/drm_gem_framebuffer_helper.c +++ b/drivers/gpu/drm/drm_gem_framebuffer_helper.c @@ -15,6 +15,8 @@ #include <drm/drm_gem_framebuffer_helper.h> #include <drm/drm_modeset_helper.h> +#include "drm_internal.h" + #define AFBC_HEADER_SIZE 16 #define AFBC_TH_LAYOUT_ALIGNMENT 8 #define AFBC_HDR_ALIGN 64 @@ -48,7 +50,7 @@ struct drm_gem_object *drm_gem_fb_get_obj(struct drm_framebuffer *fb, unsigned int plane) { - if (plane >= 4) + if (plane >= ARRAY_SIZE(fb->obj)) return NULL; return fb->obj[plane]; @@ -62,7 +64,8 @@ drm_gem_fb_init(struct drm_device *dev, struct drm_gem_object **obj, unsigned int num_planes, const struct drm_framebuffer_funcs *funcs) { - int ret, i; + unsigned int i; + int ret; drm_helper_mode_fill_fb_struct(dev, fb, mode_cmd); @@ -86,9 +89,9 @@ drm_gem_fb_init(struct drm_device *dev, */ void drm_gem_fb_destroy(struct drm_framebuffer *fb) { - int i; + size_t i; - for (i = 0; i < 4; i++) + for (i = 0; i < ARRAY_SIZE(fb->obj); i++) drm_gem_object_put(fb->obj[i]); drm_framebuffer_cleanup(fb); @@ -145,8 +148,9 @@ int drm_gem_fb_init_with_funcs(struct drm_device *dev, const struct drm_framebuffer_funcs *funcs) { const struct drm_format_info *info; - struct drm_gem_object *objs[4]; - int ret, i; + struct drm_gem_object *objs[DRM_FORMAT_MAX_PLANES]; + unsigned int i; + int ret; info = drm_get_format_info(dev, mode_cmd); if (!info) { @@ -187,9 +191,10 @@ int drm_gem_fb_init_with_funcs(struct drm_device *dev, return 0; err_gem_object_put: - for (i--; i >= 0; i--) + while (i > 0) { + --i; drm_gem_object_put(objs[i]); - + } return ret; } EXPORT_SYMBOL_GPL(drm_gem_fb_init_with_funcs); @@ -306,6 +311,184 @@ drm_gem_fb_create_with_dirty(struct drm_device *dev, struct drm_file *file, } EXPORT_SYMBOL_GPL(drm_gem_fb_create_with_dirty); +/** + * drm_gem_fb_vmap - maps all framebuffer BOs into kernel address space + * @fb: the framebuffer + * @map: returns the mapping's address for each BO + * @data: returns the data address for each BO, can be NULL + * + * This function maps all buffer objects of the given framebuffer into + * kernel address space and stores them in struct dma_buf_map. If the + * mapping operation fails for one of the BOs, the function unmaps the + * already established mappings automatically. + * + * Callers that want to access a BO's stored data should pass @data. + * The argument returns the addresses of the data stored in each BO. This + * is different from @map if the framebuffer's offsets field is non-zero. + * + * See drm_gem_fb_vunmap() for unmapping. + * + * Returns: + * 0 on success, or a negative errno code otherwise. + */ +int drm_gem_fb_vmap(struct drm_framebuffer *fb, + struct dma_buf_map map[static DRM_FORMAT_MAX_PLANES], + struct dma_buf_map data[DRM_FORMAT_MAX_PLANES]) +{ + struct drm_gem_object *obj; + unsigned int i; + int ret; + + for (i = 0; i < DRM_FORMAT_MAX_PLANES; ++i) { + obj = drm_gem_fb_get_obj(fb, i); + if (!obj) { + dma_buf_map_clear(&map[i]); + continue; + } + ret = drm_gem_vmap(obj, &map[i]); + if (ret) + goto err_drm_gem_vunmap; + } + + if (data) { + for (i = 0; i < DRM_FORMAT_MAX_PLANES; ++i) { + memcpy(&data[i], &map[i], sizeof(data[i])); + if (dma_buf_map_is_null(&data[i])) + continue; + dma_buf_map_incr(&data[i], fb->offsets[i]); + } + } + + return 0; + +err_drm_gem_vunmap: + while (i) { + --i; + obj = drm_gem_fb_get_obj(fb, i); + if (!obj) + continue; + drm_gem_vunmap(obj, &map[i]); + } + return ret; +} +EXPORT_SYMBOL(drm_gem_fb_vmap); + +/** + * drm_gem_fb_vunmap - unmaps framebuffer BOs from kernel address space + * @fb: the framebuffer + * @map: mapping addresses as returned by drm_gem_fb_vmap() + * + * This function unmaps all buffer objects of the given framebuffer. + * + * See drm_gem_fb_vmap() for more information. + */ +void drm_gem_fb_vunmap(struct drm_framebuffer *fb, + struct dma_buf_map map[static DRM_FORMAT_MAX_PLANES]) +{ + unsigned int i = DRM_FORMAT_MAX_PLANES; + struct drm_gem_object *obj; + + while (i) { + --i; + obj = drm_gem_fb_get_obj(fb, i); + if (!obj) + continue; + if (dma_buf_map_is_null(&map[i])) + continue; + drm_gem_vunmap(obj, &map[i]); + } +} +EXPORT_SYMBOL(drm_gem_fb_vunmap); + +/** + * drm_gem_fb_begin_cpu_access - prepares GEM buffer objects for CPU access + * @fb: the framebuffer + * @dir: access mode + * + * Prepares a framebuffer's GEM buffer objects for CPU access. This function + * must be called before accessing the BO data within the kernel. For imported + * BOs, the function calls dma_buf_begin_cpu_access(). + * + * See drm_gem_fb_end_cpu_access() for signalling the end of CPU access. + * + * Returns: + * 0 on success, or a negative errno code otherwise. + */ +int drm_gem_fb_begin_cpu_access(struct drm_framebuffer *fb, enum dma_data_direction dir) +{ + struct dma_buf_attachment *import_attach; + struct drm_gem_object *obj; + size_t i; + int ret, ret2; + + for (i = 0; i < ARRAY_SIZE(fb->obj); ++i) { + obj = drm_gem_fb_get_obj(fb, i); + if (!obj) + continue; + import_attach = obj->import_attach; + if (!import_attach) + continue; + ret = dma_buf_begin_cpu_access(import_attach->dmabuf, dir); + if (ret) + goto err_dma_buf_end_cpu_access; + } + + return 0; + +err_dma_buf_end_cpu_access: + while (i) { + --i; + obj = drm_gem_fb_get_obj(fb, i); + if (!obj) + continue; + import_attach = obj->import_attach; + if (!import_attach) + continue; + ret2 = dma_buf_end_cpu_access(import_attach->dmabuf, dir); + if (ret2) { + drm_err(fb->dev, + "dma_buf_end_cpu_access() failed during error handling: %d\n", + ret2); + } + } + + return ret; +} +EXPORT_SYMBOL(drm_gem_fb_begin_cpu_access); + +/** + * drm_gem_fb_end_cpu_access - signals end of CPU access to GEM buffer objects + * @fb: the framebuffer + * @dir: access mode + * + * Signals the end of CPU access to the given framebuffer's GEM buffer objects. This + * function must be paired with a corresponding call to drm_gem_fb_begin_cpu_access(). + * For imported BOs, the function calls dma_buf_end_cpu_access(). + * + * See also drm_gem_fb_begin_cpu_access(). + */ +void drm_gem_fb_end_cpu_access(struct drm_framebuffer *fb, enum dma_data_direction dir) +{ + size_t i = ARRAY_SIZE(fb->obj); + struct dma_buf_attachment *import_attach; + struct drm_gem_object *obj; + int ret; + + while (i) { + --i; + obj = drm_gem_fb_get_obj(fb, i); + if (!obj) + continue; + import_attach = obj->import_attach; + if (!import_attach) + continue; + ret = dma_buf_end_cpu_access(import_attach->dmabuf, dir); + if (ret) + drm_err(fb->dev, "dma_buf_end_cpu_access() failed: %d\n", ret); + } +} +EXPORT_SYMBOL(drm_gem_fb_end_cpu_access); + static __u32 drm_gem_afbc_get_bpp(struct drm_device *dev, const struct drm_mode_fb_cmd2 *mode_cmd) { diff --git a/drivers/gpu/drm/drm_gem_shmem_helper.c b/drivers/gpu/drm/drm_gem_shmem_helper.c index 6d625cee7a6a..a61946374c82 100644 --- a/drivers/gpu/drm/drm_gem_shmem_helper.c +++ b/drivers/gpu/drm/drm_gem_shmem_helper.c @@ -368,7 +368,7 @@ static void drm_gem_shmem_vunmap_locked(struct drm_gem_shmem_object *shmem, } /* - * drm_gem_shmem_vunmap - Unmap a virtual mapping fo a shmem GEM object + * drm_gem_shmem_vunmap - Unmap a virtual mapping for a shmem GEM object * @shmem: shmem GEM object * @map: Kernel virtual address where the SHMEM GEM object was mapped * @@ -505,13 +505,13 @@ int drm_gem_shmem_dumb_create(struct drm_file *file, struct drm_device *dev, if (!args->pitch || !args->size) { args->pitch = min_pitch; - args->size = args->pitch * args->height; + args->size = PAGE_ALIGN(args->pitch * args->height); } else { /* ensure sane minimum values */ if (args->pitch < min_pitch) args->pitch = min_pitch; if (args->size < args->pitch * args->height) - args->size = args->pitch * args->height; + args->size = PAGE_ALIGN(args->pitch * args->height); } shmem = drm_gem_shmem_create_with_handle(file, dev, args->size, &args->handle); diff --git a/drivers/gpu/drm/drm_gem_vram_helper.c b/drivers/gpu/drm/drm_gem_vram_helper.c index 2a1229b8364e..43cf7e887d1a 100644 --- a/drivers/gpu/drm/drm_gem_vram_helper.c +++ b/drivers/gpu/drm/drm_gem_vram_helper.c @@ -96,7 +96,7 @@ static const struct drm_gem_object_funcs drm_gem_vram_object_funcs; * memory region. Call drm_gem_vram_offset() to retrieve this value. Typically * it's used to program the hardware's scanout engine for framebuffers, set * the cursor overlay's image for a mouse cursor, or use it as input to the - * hardware's draing engine. + * hardware's drawing engine. * * To access a buffer object's memory from the DRM driver, call * drm_gem_vram_vmap(). It maps the buffer into kernel address @@ -1012,9 +1012,8 @@ static void drm_vram_mm_cleanup(struct drm_vram_mm *vmm) * Helpers for integration with struct drm_device */ -/* deprecated; use drmm_vram_mm_init() */ -struct drm_vram_mm *drm_vram_helper_alloc_mm( - struct drm_device *dev, uint64_t vram_base, size_t vram_size) +static struct drm_vram_mm *drm_vram_helper_alloc_mm(struct drm_device *dev, uint64_t vram_base, + size_t vram_size) { int ret; @@ -1036,9 +1035,8 @@ err_kfree: dev->vram_mm = NULL; return ERR_PTR(ret); } -EXPORT_SYMBOL(drm_vram_helper_alloc_mm); -void drm_vram_helper_release_mm(struct drm_device *dev) +static void drm_vram_helper_release_mm(struct drm_device *dev) { if (!dev->vram_mm) return; @@ -1047,7 +1045,6 @@ void drm_vram_helper_release_mm(struct drm_device *dev) kfree(dev->vram_mm); dev->vram_mm = NULL; } -EXPORT_SYMBOL(drm_vram_helper_release_mm); static void drm_vram_mm_release(struct drm_device *dev, void *ptr) { diff --git a/drivers/gpu/drm/drm_hdcp.c b/drivers/gpu/drm/drm_hdcp.c index 910108ccaae1..ca9b8f697202 100644 --- a/drivers/gpu/drm/drm_hdcp.c +++ b/drivers/gpu/drm/drm_hdcp.c @@ -280,7 +280,7 @@ exit: * https://www.digital-cp.com/sites/default/files/specifications/HDCP%20on%20HDMI%20Specification%20Rev2_2_Final1.pdf * * Returns: - * Count of the revoked KSVs or -ve error number incase of the failure. + * Count of the revoked KSVs or -ve error number in case of the failure. */ int drm_hdcp_check_ksvs_revoked(struct drm_device *drm_dev, u8 *ksvs, u32 ksv_count) diff --git a/drivers/gpu/drm/drm_ioctl.c b/drivers/gpu/drm/drm_ioctl.c index f454e0424086..be4a52dc4d6f 100644 --- a/drivers/gpu/drm/drm_ioctl.c +++ b/drivers/gpu/drm/drm_ioctl.c @@ -426,7 +426,7 @@ done: } /** - * drm_noop - DRM no-op ioctl implemntation + * drm_noop - DRM no-op ioctl implementation * @dev: DRM device for the ioctl * @data: data pointer for the ioctl * @file_priv: DRM file for the ioctl call @@ -446,7 +446,7 @@ int drm_noop(struct drm_device *dev, void *data, EXPORT_SYMBOL(drm_noop); /** - * drm_invalid_op - DRM invalid ioctl implemntation + * drm_invalid_op - DRM invalid ioctl implementation * @dev: DRM device for the ioctl * @data: data pointer for the ioctl * @file_priv: DRM file for the ioctl call diff --git a/drivers/gpu/drm/drm_irq.c b/drivers/gpu/drm/drm_irq.c index c3bd664ea733..13e1d5c4ec82 100644 --- a/drivers/gpu/drm/drm_irq.c +++ b/drivers/gpu/drm/drm_irq.c @@ -60,50 +60,14 @@ #include <drm/drm.h> #include <drm/drm_device.h> #include <drm/drm_drv.h> -#include <drm/drm_irq.h> +#include <drm/drm_legacy.h> #include <drm/drm_print.h> #include <drm/drm_vblank.h> #include "drm_internal.h" -/** - * DOC: irq helpers - * - * The DRM core provides very simple support helpers to enable IRQ handling on a - * device through the drm_irq_install() and drm_irq_uninstall() functions. This - * only supports devices with a single interrupt on the main device stored in - * &drm_device.dev and set as the device paramter in drm_dev_alloc(). - * - * These IRQ helpers are strictly optional. Drivers which roll their own only - * need to set &drm_device.irq_enabled to signal the DRM core that vblank - * interrupts are working. Since these helpers don't automatically clean up the - * requested interrupt like e.g. devm_request_irq() they're not really - * recommended. - */ - -/** - * drm_irq_install - install IRQ handler - * @dev: DRM device - * @irq: IRQ number to install the handler for - * - * Initializes the IRQ related data. Installs the handler, calling the driver - * &drm_driver.irq_preinstall and &drm_driver.irq_postinstall functions before - * and after the installation. - * - * This is the simplified helper interface provided for drivers with no special - * needs. Drivers which need to install interrupt handlers for multiple - * interrupts must instead set &drm_device.irq_enabled to signal the DRM core - * that vblank interrupts are available. - * - * @irq must match the interrupt number that would be passed to request_irq(), - * if called directly instead of using this helper function. - * - * &drm_driver.irq_handler is called to handle the registered interrupt. - * - * Returns: - * Zero on success or a negative error code on failure. - */ -int drm_irq_install(struct drm_device *dev, int irq) +#if IS_ENABLED(CONFIG_DRM_LEGACY) +static int drm_legacy_irq_install(struct drm_device *dev, int irq) { int ret; unsigned long sh_flags = 0; @@ -140,7 +104,7 @@ int drm_irq_install(struct drm_device *dev, int irq) if (ret < 0) { dev->irq_enabled = false; if (drm_core_check_feature(dev, DRIVER_LEGACY)) - vga_client_register(to_pci_dev(dev->dev), NULL, NULL, NULL); + vga_client_unregister(to_pci_dev(dev->dev)); free_irq(irq, dev); } else { dev->irq = irq; @@ -148,25 +112,8 @@ int drm_irq_install(struct drm_device *dev, int irq) return ret; } -EXPORT_SYMBOL(drm_irq_install); -/** - * drm_irq_uninstall - uninstall the IRQ handler - * @dev: DRM device - * - * Calls the driver's &drm_driver.irq_uninstall function and unregisters the IRQ - * handler. This should only be called by drivers which used drm_irq_install() - * to set up their interrupt handler. Other drivers must only reset - * &drm_device.irq_enabled to false. - * - * Note that for kernel modesetting drivers it is a bug if this function fails. - * The sanity checks are only to catch buggy user modesetting drivers which call - * the same function through an ioctl. - * - * Returns: - * Zero on success or a negative error code on failure. - */ -int drm_irq_uninstall(struct drm_device *dev) +int drm_legacy_irq_uninstall(struct drm_device *dev) { unsigned long irqflags; bool irq_enabled; @@ -203,7 +150,7 @@ int drm_irq_uninstall(struct drm_device *dev) DRM_DEBUG("irq=%d\n", dev->irq); if (drm_core_check_feature(dev, DRIVER_LEGACY)) - vga_client_register(to_pci_dev(dev->dev), NULL, NULL, NULL); + vga_client_unregister(to_pci_dev(dev->dev)); if (dev->driver->irq_uninstall) dev->driver->irq_uninstall(dev); @@ -212,41 +159,8 @@ int drm_irq_uninstall(struct drm_device *dev) return 0; } -EXPORT_SYMBOL(drm_irq_uninstall); +EXPORT_SYMBOL(drm_legacy_irq_uninstall); -static void devm_drm_irq_uninstall(void *data) -{ - drm_irq_uninstall(data); -} - -/** - * devm_drm_irq_install - install IRQ handler - * @dev: DRM device - * @irq: IRQ number to install the handler for - * - * devm_drm_irq_install is a help function of drm_irq_install. - * - * if the driver uses devm_drm_irq_install, there is no need - * to call drm_irq_uninstall when the drm module get unloaded, - * as this will done automagically. - * - * Returns: - * Zero on success or a negative error code on failure. - */ -int devm_drm_irq_install(struct drm_device *dev, int irq) -{ - int ret; - - ret = drm_irq_install(dev, irq); - if (ret) - return ret; - - return devm_add_action_or_reset(dev->dev, - devm_drm_irq_uninstall, dev); -} -EXPORT_SYMBOL(devm_drm_irq_install); - -#if IS_ENABLED(CONFIG_DRM_LEGACY) int drm_legacy_irq_control(struct drm_device *dev, void *data, struct drm_file *file_priv) { @@ -275,13 +189,13 @@ int drm_legacy_irq_control(struct drm_device *dev, void *data, ctl->irq != irq) return -EINVAL; mutex_lock(&dev->struct_mutex); - ret = drm_irq_install(dev, irq); + ret = drm_legacy_irq_install(dev, irq); mutex_unlock(&dev->struct_mutex); return ret; case DRM_UNINST_HANDLER: mutex_lock(&dev->struct_mutex); - ret = drm_irq_uninstall(dev); + ret = drm_legacy_irq_uninstall(dev); mutex_unlock(&dev->struct_mutex); return ret; diff --git a/drivers/gpu/drm/drm_lease.c b/drivers/gpu/drm/drm_lease.c index 00fb433bcef1..dee4f24a1808 100644 --- a/drivers/gpu/drm/drm_lease.c +++ b/drivers/gpu/drm/drm_lease.c @@ -15,19 +15,62 @@ #include "drm_crtc_internal.h" #include "drm_internal.h" +/** + * DOC: drm leasing + * + * DRM leases provide information about whether a DRM master may control a DRM + * mode setting object. This enables the creation of multiple DRM masters that + * manage subsets of display resources. + * + * The original DRM master of a device 'owns' the available drm resources. It + * may create additional DRM masters and 'lease' resources which it controls + * to the new DRM master. This gives the new DRM master control over the + * leased resources until the owner revokes the lease, or the new DRM master + * is closed. Some helpful terminology: + * + * - An 'owner' is a &struct drm_master that is not leasing objects from + * another &struct drm_master, and hence 'owns' the objects. The owner can be + * identified as the &struct drm_master for which &drm_master.lessor is NULL. + * + * - A 'lessor' is a &struct drm_master which is leasing objects to one or more + * other &struct drm_master. Currently, lessees are not allowed to + * create sub-leases, hence the lessor is the same as the owner. + * + * - A 'lessee' is a &struct drm_master which is leasing objects from some + * other &struct drm_master. Each lessee only leases resources from a single + * lessor recorded in &drm_master.lessor, and holds the set of objects that + * it is leasing in &drm_master.leases. + * + * - A 'lease' is a contract between the lessor and lessee that identifies + * which resources may be controlled by the lessee. All of the resources + * that are leased must be owned by or leased to the lessor, and lessors are + * not permitted to lease the same object to multiple lessees. + * + * The set of objects any &struct drm_master 'controls' is limited to the set + * of objects it leases (for lessees) or all objects (for owners). + * + * Objects not controlled by a &struct drm_master cannot be modified through + * the various state manipulating ioctls, and any state reported back to user + * space will be edited to make them appear idle and/or unusable. For + * instance, connectors always report 'disconnected', while encoders + * report no possible crtcs or clones. + * + * Since each lessee may lease objects from a single lessor, display resource + * leases form a tree of &struct drm_master. As lessees are currently not + * allowed to create sub-leases, the tree depth is limited to 1. All of + * these get activated simultaneously when the top level device owner changes + * through the SETMASTER or DROPMASTER IOCTL, so &drm_device.master points to + * the owner at the top of the lease tree (i.e. the &struct drm_master for which + * &drm_master.lessor is NULL). The full list of lessees that are leasing + * objects from the owner can be searched via the owner's + * &drm_master.lessee_idr. + */ + #define drm_for_each_lessee(lessee, lessor) \ list_for_each_entry((lessee), &(lessor)->lessees, lessee_list) static uint64_t drm_lease_idr_object; -/** - * drm_lease_owner - return ancestor owner drm_master - * @master: drm_master somewhere within tree of lessees and lessors - * - * RETURN: - * - * drm_master at the top of the tree (i.e, with lessor NULL - */ struct drm_master *drm_lease_owner(struct drm_master *master) { while (master->lessor != NULL) @@ -35,16 +78,6 @@ struct drm_master *drm_lease_owner(struct drm_master *master) return master; } -/** - * _drm_find_lessee - find lessee by id (idr_mutex held) - * @master: drm_master of lessor - * @lessee_id: id - * - * RETURN: - * - * drm_master of the lessee if valid, NULL otherwise - */ - static struct drm_master* _drm_find_lessee(struct drm_master *master, int lessee_id) { @@ -52,17 +85,6 @@ _drm_find_lessee(struct drm_master *master, int lessee_id) return idr_find(&drm_lease_owner(master)->lessee_idr, lessee_id); } -/** - * _drm_lease_held_master - check to see if an object is leased (or owned) by master (idr_mutex held) - * @master: the master to check the lease status of - * @id: the id to check - * - * Checks if the specified master holds a lease on the object. Return - * value: - * - * true 'master' holds a lease on (or owns) the object - * false 'master' does not hold a lease. - */ static int _drm_lease_held_master(struct drm_master *master, int id) { lockdep_assert_held(&master->dev->mode_config.idr_mutex); @@ -71,17 +93,7 @@ static int _drm_lease_held_master(struct drm_master *master, int id) return true; } -/** - * _drm_has_leased - check to see if an object has been leased (idr_mutex held) - * @master: the master to check the lease status of - * @id: the id to check - * - * Checks if any lessee of 'master' holds a lease on 'id'. Return - * value: - * - * true Some lessee holds a lease on the object. - * false No lessee has a lease on the object. - */ +/* Checks if the given object has been leased to some lessee of drm_master */ static bool _drm_has_leased(struct drm_master *master, int id) { struct drm_master *lessee; @@ -93,58 +105,51 @@ static bool _drm_has_leased(struct drm_master *master, int id) return false; } -/** - * _drm_lease_held - check drm_mode_object lease status (idr_mutex held) - * @file_priv: the master drm_file - * @id: the object id - * - * Checks if the specified master holds a lease on the object. Return - * value: - * - * true 'master' holds a lease on (or owns) the object - * false 'master' does not hold a lease. - */ +/* Called with idr_mutex held */ bool _drm_lease_held(struct drm_file *file_priv, int id) { - if (!file_priv || !file_priv->master) + bool ret; + struct drm_master *master; + + if (!file_priv) + return true; + + master = drm_file_get_master(file_priv); + if (!master) return true; + ret = _drm_lease_held_master(master, id); + drm_master_put(&master); - return _drm_lease_held_master(file_priv->master, id); + return ret; } -/** - * drm_lease_held - check drm_mode_object lease status (idr_mutex not held) - * @file_priv: the master drm_file - * @id: the object id - * - * Checks if the specified master holds a lease on the object. Return - * value: - * - * true 'master' holds a lease on (or owns) the object - * false 'master' does not hold a lease. - */ bool drm_lease_held(struct drm_file *file_priv, int id) { struct drm_master *master; bool ret; - if (!file_priv || !file_priv->master || !file_priv->master->lessor) + if (!file_priv) return true; - master = file_priv->master; + master = drm_file_get_master(file_priv); + if (!master) + return true; + if (!master->lessor) { + ret = true; + goto out; + } mutex_lock(&master->dev->mode_config.idr_mutex); ret = _drm_lease_held_master(master, id); mutex_unlock(&master->dev->mode_config.idr_mutex); + +out: + drm_master_put(&master); return ret; } -/** - * drm_lease_filter_crtcs - restricted crtc set to leased values (idr_mutex not held) - * @file_priv: requestor file - * @crtcs_in: bitmask of crtcs to check - * - * Reconstructs a crtc mask based on the crtcs which are visible - * through the specified file. +/* + * Given a bitmask of crtcs to check, reconstructs a crtc mask based on the + * crtcs which are visible through the specified file. */ uint32_t drm_lease_filter_crtcs(struct drm_file *file_priv, uint32_t crtcs_in) { @@ -154,10 +159,16 @@ uint32_t drm_lease_filter_crtcs(struct drm_file *file_priv, uint32_t crtcs_in) int count_in, count_out; uint32_t crtcs_out = 0; - if (!file_priv || !file_priv->master || !file_priv->master->lessor) + if (!file_priv) return crtcs_in; - master = file_priv->master; + master = drm_file_get_master(file_priv); + if (!master) + return crtcs_in; + if (!master->lessor) { + crtcs_out = crtcs_in; + goto out; + } dev = master->dev; count_in = count_out = 0; @@ -176,14 +187,13 @@ uint32_t drm_lease_filter_crtcs(struct drm_file *file_priv, uint32_t crtcs_in) count_in++; } mutex_unlock(&master->dev->mode_config.idr_mutex); + +out: + drm_master_put(&master); return crtcs_out; } /* - * drm_lease_create - create a new drm_master with leased objects (idr_mutex not held) - * @lessor: lease holder (or owner) of objects - * @leases: objects to lease to the new drm_master - * * Uses drm_master_create to allocate a new drm_master, then checks to * make sure all of the desired objects can be leased, atomically * leasing them to the new drmmaster. @@ -252,15 +262,6 @@ out_lessee: return ERR_PTR(error); } -/** - * drm_lease_destroy - a master is going away (idr_mutex not held) - * @master: the drm_master being destroyed - * - * All lessees will have been destroyed as they - * hold a reference on their lessor. Notify any - * lessor for this master so that it can check - * the list of lessees. - */ void drm_lease_destroy(struct drm_master *master) { struct drm_device *dev = master->dev; @@ -294,10 +295,6 @@ void drm_lease_destroy(struct drm_master *master) DRM_DEBUG_LEASE("drm_lease_destroy done %d\n", master->lessee_id); } -/** - * _drm_lease_revoke - revoke access to all leased objects (idr_mutex held) - * @top: the master losing its lease - */ static void _drm_lease_revoke(struct drm_master *top) { int object; @@ -336,10 +333,6 @@ static void _drm_lease_revoke(struct drm_master *top) } } -/** - * drm_lease_revoke - revoke access to all leased objects (idr_mutex not held) - * @top: the master losing its lease - */ void drm_lease_revoke(struct drm_master *top) { mutex_lock(&top->dev->mode_config.idr_mutex); @@ -471,12 +464,7 @@ out_free_objects: return ret; } -/** - * drm_mode_create_lease_ioctl - create a new lease - * @dev: the drm device - * @data: pointer to struct drm_mode_create_lease - * @lessor_priv: the file being manipulated - * +/* * The master associated with the specified file will have a lease * created containing the objects specified in the ioctl structure. * A file descriptor will be allocated for that and returned to the @@ -489,7 +477,7 @@ int drm_mode_create_lease_ioctl(struct drm_device *dev, size_t object_count; int ret = 0; struct idr leases; - struct drm_master *lessor = lessor_priv->master; + struct drm_master *lessor; struct drm_master *lessee = NULL; struct file *lessee_file = NULL; struct file *lessor_file = lessor_priv->filp; @@ -501,12 +489,6 @@ int drm_mode_create_lease_ioctl(struct drm_device *dev, if (!drm_core_check_feature(dev, DRIVER_MODESET)) return -EOPNOTSUPP; - /* Do not allow sub-leases */ - if (lessor->lessor) { - DRM_DEBUG_LEASE("recursive leasing not allowed\n"); - return -EINVAL; - } - /* need some objects */ if (cl->object_count == 0) { DRM_DEBUG_LEASE("no objects in lease\n"); @@ -518,12 +500,22 @@ int drm_mode_create_lease_ioctl(struct drm_device *dev, return -EINVAL; } + lessor = drm_file_get_master(lessor_priv); + /* Do not allow sub-leases */ + if (lessor->lessor) { + DRM_DEBUG_LEASE("recursive leasing not allowed\n"); + ret = -EINVAL; + goto out_lessor; + } + object_count = cl->object_count; object_ids = memdup_user(u64_to_user_ptr(cl->object_ids), array_size(object_count, sizeof(__u32))); - if (IS_ERR(object_ids)) - return PTR_ERR(object_ids); + if (IS_ERR(object_ids)) { + ret = PTR_ERR(object_ids); + goto out_lessor; + } idr_init(&leases); @@ -534,14 +526,15 @@ int drm_mode_create_lease_ioctl(struct drm_device *dev, if (ret) { DRM_DEBUG_LEASE("lease object lookup failed: %i\n", ret); idr_destroy(&leases); - return ret; + goto out_lessor; } /* Allocate a file descriptor for the lease */ fd = get_unused_fd_flags(cl->flags & (O_CLOEXEC | O_NONBLOCK)); if (fd < 0) { idr_destroy(&leases); - return fd; + ret = fd; + goto out_lessor; } DRM_DEBUG_LEASE("Creating lease\n"); @@ -577,6 +570,7 @@ int drm_mode_create_lease_ioctl(struct drm_device *dev, /* Hook up the fd */ fd_install(fd, lessee_file); + drm_master_put(&lessor); DRM_DEBUG_LEASE("drm_mode_create_lease_ioctl succeeded\n"); return 0; @@ -586,29 +580,19 @@ out_lessee: out_leases: put_unused_fd(fd); +out_lessor: + drm_master_put(&lessor); DRM_DEBUG_LEASE("drm_mode_create_lease_ioctl failed: %d\n", ret); return ret; } -/** - * drm_mode_list_lessees_ioctl - list lessee ids - * @dev: the drm device - * @data: pointer to struct drm_mode_list_lessees - * @lessor_priv: the file being manipulated - * - * Starting from the master associated with the specified file, - * the master with the provided lessee_id is found, and then - * an array of lessee ids associated with leases from that master - * are returned. - */ - int drm_mode_list_lessees_ioctl(struct drm_device *dev, void *data, struct drm_file *lessor_priv) { struct drm_mode_list_lessees *arg = data; __u32 __user *lessee_ids = (__u32 __user *) (uintptr_t) (arg->lessees_ptr); __u32 count_lessees = arg->count_lessees; - struct drm_master *lessor = lessor_priv->master, *lessee; + struct drm_master *lessor, *lessee; int count; int ret = 0; @@ -619,6 +603,7 @@ int drm_mode_list_lessees_ioctl(struct drm_device *dev, if (!drm_core_check_feature(dev, DRIVER_MODESET)) return -EOPNOTSUPP; + lessor = drm_file_get_master(lessor_priv); DRM_DEBUG_LEASE("List lessees for %d\n", lessor->lessee_id); mutex_lock(&dev->mode_config.idr_mutex); @@ -642,26 +627,19 @@ int drm_mode_list_lessees_ioctl(struct drm_device *dev, arg->count_lessees = count; mutex_unlock(&dev->mode_config.idr_mutex); + drm_master_put(&lessor); return ret; } -/** - * drm_mode_get_lease_ioctl - list leased objects - * @dev: the drm device - * @data: pointer to struct drm_mode_get_lease - * @lessee_priv: the file being manipulated - * - * Return the list of leased objects for the specified lessee - */ - +/* Return the list of leased objects for the specified lessee */ int drm_mode_get_lease_ioctl(struct drm_device *dev, void *data, struct drm_file *lessee_priv) { struct drm_mode_get_lease *arg = data; __u32 __user *object_ids = (__u32 __user *) (uintptr_t) (arg->objects_ptr); __u32 count_objects = arg->count_objects; - struct drm_master *lessee = lessee_priv->master; + struct drm_master *lessee; struct idr *object_idr; int count; void *entry; @@ -675,6 +653,7 @@ int drm_mode_get_lease_ioctl(struct drm_device *dev, if (!drm_core_check_feature(dev, DRIVER_MODESET)) return -EOPNOTSUPP; + lessee = drm_file_get_master(lessee_priv); DRM_DEBUG_LEASE("get lease for %d\n", lessee->lessee_id); mutex_lock(&dev->mode_config.idr_mutex); @@ -702,16 +681,12 @@ int drm_mode_get_lease_ioctl(struct drm_device *dev, arg->count_objects = count; mutex_unlock(&dev->mode_config.idr_mutex); + drm_master_put(&lessee); return ret; } -/** - * drm_mode_revoke_lease_ioctl - revoke lease - * @dev: the drm device - * @data: pointer to struct drm_mode_revoke_lease - * @lessor_priv: the file being manipulated - * +/* * This removes all of the objects from the lease without * actually getting rid of the lease itself; that way all * references to it still work correctly @@ -720,7 +695,7 @@ int drm_mode_revoke_lease_ioctl(struct drm_device *dev, void *data, struct drm_file *lessor_priv) { struct drm_mode_revoke_lease *arg = data; - struct drm_master *lessor = lessor_priv->master; + struct drm_master *lessor; struct drm_master *lessee; int ret = 0; @@ -730,6 +705,7 @@ int drm_mode_revoke_lease_ioctl(struct drm_device *dev, if (!drm_core_check_feature(dev, DRIVER_MODESET)) return -EOPNOTSUPP; + lessor = drm_file_get_master(lessor_priv); mutex_lock(&dev->mode_config.idr_mutex); lessee = _drm_find_lessee(lessor, arg->lessee_id); @@ -750,6 +726,7 @@ int drm_mode_revoke_lease_ioctl(struct drm_device *dev, fail: mutex_unlock(&dev->mode_config.idr_mutex); + drm_master_put(&lessor); return ret; } diff --git a/drivers/gpu/drm/drm_legacy_misc.c b/drivers/gpu/drm/drm_legacy_misc.c index 83db43b7a25e..d4c5434062d7 100644 --- a/drivers/gpu/drm/drm_legacy_misc.c +++ b/drivers/gpu/drm/drm_legacy_misc.c @@ -35,7 +35,6 @@ #include <drm/drm_device.h> #include <drm/drm_drv.h> -#include <drm/drm_irq.h> #include <drm/drm_print.h> #include "drm_internal.h" @@ -78,7 +77,7 @@ int drm_legacy_setup(struct drm_device * dev) void drm_legacy_dev_reinit(struct drm_device *dev) { if (dev->irq_enabled) - drm_irq_uninstall(dev); + drm_legacy_irq_uninstall(dev); mutex_lock(&dev->struct_mutex); diff --git a/drivers/gpu/drm/drm_mipi_dbi.c b/drivers/gpu/drm/drm_mipi_dbi.c index 43a9b739bba7..71b646c4131f 100644 --- a/drivers/gpu/drm/drm_mipi_dbi.c +++ b/drivers/gpu/drm/drm_mipi_dbi.c @@ -7,7 +7,6 @@ #include <linux/debugfs.h> #include <linux/delay.h> -#include <linux/dma-buf.h> #include <linux/gpio/consumer.h> #include <linux/module.h> #include <linux/regulator/consumer.h> @@ -202,21 +201,17 @@ int mipi_dbi_buf_copy(void *dst, struct drm_framebuffer *fb, { struct drm_gem_object *gem = drm_gem_fb_get_obj(fb, 0); struct drm_gem_cma_object *cma_obj = to_drm_gem_cma_obj(gem); - struct dma_buf_attachment *import_attach = gem->import_attach; void *src = cma_obj->vaddr; - int ret = 0; + int ret; - if (import_attach) { - ret = dma_buf_begin_cpu_access(import_attach->dmabuf, - DMA_FROM_DEVICE); - if (ret) - return ret; - } + ret = drm_gem_fb_begin_cpu_access(fb, DMA_FROM_DEVICE); + if (ret) + return ret; switch (fb->format->format) { case DRM_FORMAT_RGB565: if (swap) - drm_fb_swab(dst, src, fb, clip, !import_attach); + drm_fb_swab(dst, src, fb, clip, !gem->import_attach); else drm_fb_memcpy(dst, src, fb, clip); break; @@ -229,9 +224,8 @@ int mipi_dbi_buf_copy(void *dst, struct drm_framebuffer *fb, return -EINVAL; } - if (import_attach) - ret = dma_buf_end_cpu_access(import_attach->dmabuf, - DMA_FROM_DEVICE); + drm_gem_fb_end_cpu_access(fb, DMA_FROM_DEVICE); + return ret; } EXPORT_SYMBOL(mipi_dbi_buf_copy); @@ -928,6 +922,59 @@ static int mipi_dbi_spi1_transfer(struct mipi_dbi *dbi, int dc, return 0; } +static int mipi_dbi_typec1_command_read(struct mipi_dbi *dbi, u8 *cmd, + u8 *data, size_t len) +{ + struct spi_device *spi = dbi->spi; + u32 speed_hz = min_t(u32, MIPI_DBI_MAX_SPI_READ_SPEED, + spi->max_speed_hz / 2); + struct spi_transfer tr[2] = { + { + .speed_hz = speed_hz, + .bits_per_word = 9, + .tx_buf = dbi->tx_buf9, + .len = 2, + }, { + .speed_hz = speed_hz, + .bits_per_word = 8, + .len = len, + .rx_buf = data, + }, + }; + struct spi_message m; + u16 *dst16; + int ret; + + if (!len) + return -EINVAL; + + if (!spi_is_bpw_supported(spi, 9)) { + /* + * FIXME: implement something like mipi_dbi_spi1e_transfer() but + * for reads using emulation. + */ + dev_err(&spi->dev, + "reading on host not supporting 9 bpw not yet implemented\n"); + return -EOPNOTSUPP; + } + + /* + * Turn the 8bit command into a 16bit version of the command in the + * buffer. Only 9 bits of this will be used when executing the actual + * transfer. + */ + dst16 = dbi->tx_buf9; + dst16[0] = *cmd; + + spi_message_init_with_transfers(&m, tr, ARRAY_SIZE(tr)); + ret = spi_sync(spi, &m); + + if (!ret) + MIPI_DBI_DEBUG_COMMAND(*cmd, data, len); + + return ret; +} + static int mipi_dbi_typec1_command(struct mipi_dbi *dbi, u8 *cmd, u8 *parameters, size_t num) { @@ -935,7 +982,7 @@ static int mipi_dbi_typec1_command(struct mipi_dbi *dbi, u8 *cmd, int ret; if (mipi_dbi_command_is_read(dbi, *cmd)) - return -EOPNOTSUPP; + return mipi_dbi_typec1_command_read(dbi, cmd, parameters, num); MIPI_DBI_DEBUG_COMMAND(*cmd, parameters, num); diff --git a/drivers/gpu/drm/drm_mm.c b/drivers/gpu/drm/drm_mm.c index a4a04d246135..93d48a6f04ab 100644 --- a/drivers/gpu/drm/drm_mm.c +++ b/drivers/gpu/drm/drm_mm.c @@ -700,7 +700,7 @@ EXPORT_SYMBOL(drm_mm_replace_node); * interfaces. First a scan operation needs to be initialized with * drm_mm_scan_init() or drm_mm_scan_init_with_range(). The driver adds * objects to the roster, probably by walking an LRU list, but this can be - * freely implemented. Eviction candiates are added using + * freely implemented. Eviction candidates are added using * drm_mm_scan_add_block() until a suitable hole is found or there are no * further evictable objects. Eviction roster metadata is tracked in &struct * drm_mm_scan. diff --git a/drivers/gpu/drm/drm_mode_object.c b/drivers/gpu/drm/drm_mode_object.c index b26588b52795..86d9e907c0b2 100644 --- a/drivers/gpu/drm/drm_mode_object.c +++ b/drivers/gpu/drm/drm_mode_object.c @@ -91,7 +91,7 @@ void drm_mode_object_register(struct drm_device *dev, } /** - * drm_mode_object_unregister - free a modeset identifer + * drm_mode_object_unregister - free a modeset identifier * @dev: DRM device * @object: object to free * diff --git a/drivers/gpu/drm/drm_modes.c b/drivers/gpu/drm/drm_modes.c index ae53ea624c73..1c72208d8133 100644 --- a/drivers/gpu/drm/drm_modes.c +++ b/drivers/gpu/drm/drm_modes.c @@ -1542,7 +1542,7 @@ static int drm_mode_parse_cmdline_int(const char *delim, unsigned int *int_ret) /* * delim must point to the '=', otherwise it is a syntax error and - * if delim points to the terminating zero, then delim + 1 wil point + * if delim points to the terminating zero, then delim + 1 will point * past the end of the string. */ if (*delim != '=') @@ -1972,7 +1972,7 @@ int drm_mode_convert_umode(struct drm_device *dev, out->flags = in->flags; /* * Old xf86-video-vmware (possibly others too) used to - * leave 'type' unititialized. Just ignore any bits we + * leave 'type' uninitialized. Just ignore any bits we * don't like. It's a just hint after all, and more * useful for the kernel->userspace direction anyway. */ diff --git a/drivers/gpu/drm/drm_of.c b/drivers/gpu/drm/drm_of.c index ca04c34e8251..997b8827fed2 100644 --- a/drivers/gpu/drm/drm_of.c +++ b/drivers/gpu/drm/drm_of.c @@ -315,7 +315,7 @@ static int drm_of_lvds_get_remote_pixels_type( remote_port = of_graph_get_remote_port(endpoint); if (!remote_port) { - of_node_put(remote_port); + of_node_put(endpoint); return -EPIPE; } @@ -331,8 +331,10 @@ static int drm_of_lvds_get_remote_pixels_type( * configurations by passing the endpoints explicitly to * drm_of_lvds_get_dual_link_pixel_order(). */ - if (!current_pt || pixels_type != current_pt) + if (!current_pt || pixels_type != current_pt) { + of_node_put(endpoint); return -EINVAL; + } } return pixels_type; diff --git a/drivers/gpu/drm/drm_plane.c b/drivers/gpu/drm/drm_plane.c index b373958ecb30..82afb854141b 100644 --- a/drivers/gpu/drm/drm_plane.c +++ b/drivers/gpu/drm/drm_plane.c @@ -38,7 +38,7 @@ /** * DOC: overview * - * A plane represents an image source that can be blended with or overlayed on + * A plane represents an image source that can be blended with or overlaid on * top of a CRTC during the scanout process. Planes take their input data from a * &drm_framebuffer object. The plane itself specifies the cropping and scaling * of that image, and where it is placed on the visible area of a display @@ -1397,6 +1397,110 @@ out: return ret; } +/** + * DOC: damage tracking + * + * FB_DAMAGE_CLIPS is an optional plane property which provides a means to + * specify a list of damage rectangles on a plane in framebuffer coordinates of + * the framebuffer attached to the plane. In current context damage is the area + * of plane framebuffer that has changed since last plane update (also called + * page-flip), irrespective of whether currently attached framebuffer is same as + * framebuffer attached during last plane update or not. + * + * FB_DAMAGE_CLIPS is a hint to kernel which could be helpful for some drivers + * to optimize internally especially for virtual devices where each framebuffer + * change needs to be transmitted over network, usb, etc. + * + * Since FB_DAMAGE_CLIPS is a hint so it is an optional property. User-space can + * ignore damage clips property and in that case driver will do a full plane + * update. In case damage clips are provided then it is guaranteed that the area + * inside damage clips will be updated to plane. For efficiency driver can do + * full update or can update more than specified in damage clips. Since driver + * is free to read more, user-space must always render the entire visible + * framebuffer. Otherwise there can be corruptions. Also, if a user-space + * provides damage clips which doesn't encompass the actual damage to + * framebuffer (since last plane update) can result in incorrect rendering. + * + * FB_DAMAGE_CLIPS is a blob property with the layout of blob data is simply an + * array of &drm_mode_rect. Unlike plane &drm_plane_state.src coordinates, + * damage clips are not in 16.16 fixed point. Similar to plane src in + * framebuffer, damage clips cannot be negative. In damage clip, x1/y1 are + * inclusive and x2/y2 are exclusive. While kernel does not error for overlapped + * damage clips, it is strongly discouraged. + * + * Drivers that are interested in damage interface for plane should enable + * FB_DAMAGE_CLIPS property by calling drm_plane_enable_fb_damage_clips(). + * Drivers implementing damage can use drm_atomic_helper_damage_iter_init() and + * drm_atomic_helper_damage_iter_next() helper iterator function to get damage + * rectangles clipped to &drm_plane_state.src. + */ + +/** + * drm_plane_enable_fb_damage_clips - Enables plane fb damage clips property. + * @plane: Plane on which to enable damage clips property. + * + * This function lets driver to enable the damage clips property on a plane. + */ +void drm_plane_enable_fb_damage_clips(struct drm_plane *plane) +{ + struct drm_device *dev = plane->dev; + struct drm_mode_config *config = &dev->mode_config; + + drm_object_attach_property(&plane->base, config->prop_fb_damage_clips, + 0); +} +EXPORT_SYMBOL(drm_plane_enable_fb_damage_clips); + +/** + * drm_plane_get_damage_clips_count - Returns damage clips count. + * @state: Plane state. + * + * Simple helper to get the number of &drm_mode_rect clips set by user-space + * during plane update. + * + * Return: Number of clips in plane fb_damage_clips blob property. + */ +unsigned int +drm_plane_get_damage_clips_count(const struct drm_plane_state *state) +{ + return (state && state->fb_damage_clips) ? + state->fb_damage_clips->length/sizeof(struct drm_mode_rect) : 0; +} +EXPORT_SYMBOL(drm_plane_get_damage_clips_count); + +struct drm_mode_rect * +__drm_plane_get_damage_clips(const struct drm_plane_state *state) +{ + return (struct drm_mode_rect *)((state && state->fb_damage_clips) ? + state->fb_damage_clips->data : NULL); +} + +/** + * drm_plane_get_damage_clips - Returns damage clips. + * @state: Plane state. + * + * Note that this function returns uapi type &drm_mode_rect. Drivers might want + * to use the helper functions drm_atomic_helper_damage_iter_init() and + * drm_atomic_helper_damage_iter_next() or drm_atomic_helper_damage_merged() if + * the driver can only handle a single damage region at most. + * + * Return: Damage clips in plane fb_damage_clips blob property. + */ +struct drm_mode_rect * +drm_plane_get_damage_clips(const struct drm_plane_state *state) +{ + struct drm_device *dev = state->plane->dev; + struct drm_mode_config *config = &dev->mode_config; + + /* check that drm_plane_enable_fb_damage_clips() was called */ + if (!drm_mode_obj_find_prop_id(&state->plane->base, + config->prop_fb_damage_clips->base.id)) + drm_warn_once(dev, "drm_plane_enable_fb_damage_clips() not called\n"); + + return __drm_plane_get_damage_clips(state); +} +EXPORT_SYMBOL(drm_plane_get_damage_clips); + struct drm_property * drm_create_scaling_filter_prop(struct drm_device *dev, unsigned int supported_filters) diff --git a/drivers/gpu/drm/drm_plane_helper.c b/drivers/gpu/drm/drm_plane_helper.c index 3aae7ea522f2..5b2d0ca03705 100644 --- a/drivers/gpu/drm/drm_plane_helper.c +++ b/drivers/gpu/drm/drm_plane_helper.c @@ -210,7 +210,7 @@ static int drm_primary_helper_update(struct drm_plane *plane, struct drm_crtc *c * We call set_config() directly here rather than using * drm_mode_set_config_internal. We're reprogramming the same * connectors that were already in use, so we shouldn't need the extra - * cross-CRTC fb refcounting to accomodate stealing connectors. + * cross-CRTC fb refcounting to accommodate stealing connectors. * drm_mode_setplane() already handles the basic refcounting for the * framebuffers involved in this operation. */ diff --git a/drivers/gpu/drm/drm_prime.c b/drivers/gpu/drm/drm_prime.c index 2a54f86856af..1d009494af8b 100644 --- a/drivers/gpu/drm/drm_prime.c +++ b/drivers/gpu/drm/drm_prime.c @@ -73,7 +73,7 @@ * Thus the chain of references always flows in one direction, avoiding loops: * importing GEM object -> dma-buf -> exported GEM bo. A further complication * are the lookup caches for import and export. These are required to guarantee - * that any given object will always have only one uniqe userspace handle. This + * that any given object will always have only one unique userspace handle. This * is required to allow userspace to detect duplicated imports, since some GEM * drivers do fail command submissions if a given buffer object is listed more * than once. These import and export caches in &drm_prime_file_private only @@ -549,7 +549,7 @@ int drm_prime_handle_to_fd_ioctl(struct drm_device *dev, void *data, * * FIXME: The underlying helper functions are named rather inconsistently. * - * Exporting buffers + * Importing buffers * ~~~~~~~~~~~~~~~~~ * * Importing dma-bufs using drm_gem_prime_import() relies on diff --git a/drivers/gpu/drm/drm_probe_helper.c b/drivers/gpu/drm/drm_probe_helper.c index e7e1ee2aa352..5606bca3caa8 100644 --- a/drivers/gpu/drm/drm_probe_helper.c +++ b/drivers/gpu/drm/drm_probe_helper.c @@ -757,7 +757,7 @@ EXPORT_SYMBOL(drm_kms_helper_poll_disable); * drm_kms_helper_poll_init - initialize and enable output polling * @dev: drm_device * - * This function intializes and then also enables output polling support for + * This function initializes and then also enables output polling support for * @dev. Drivers which do not have reliable hotplug support in hardware can use * this helper infrastructure to regularly poll such connectors for changes in * their connection state. diff --git a/drivers/gpu/drm/drm_property.c b/drivers/gpu/drm/drm_property.c index 27c824a6eb60..6c353c9dc772 100644 --- a/drivers/gpu/drm/drm_property.c +++ b/drivers/gpu/drm/drm_property.c @@ -434,7 +434,7 @@ EXPORT_SYMBOL(drm_property_add_enum); /** * drm_property_destroy - destroy a drm property * @dev: drm device - * @property: property to destry + * @property: property to destroy * * This function frees a property including any attached resources like * enumeration values. diff --git a/drivers/gpu/drm/drm_scdc_helper.c b/drivers/gpu/drm/drm_scdc_helper.c index 991b8c86d78d..48a382464d54 100644 --- a/drivers/gpu/drm/drm_scdc_helper.c +++ b/drivers/gpu/drm/drm_scdc_helper.c @@ -241,7 +241,7 @@ bool drm_scdc_set_high_tmds_clock_ratio(struct i2c_adapter *adapter, bool set) /* * The spec says that a source should wait minimum 1ms and maximum * 100ms after writing the TMDS config for clock ratio. Lets allow a - * wait of upto 2ms here. + * wait of up to 2ms here. */ usleep_range(1000, 2000); return true; diff --git a/drivers/gpu/drm/drm_simple_kms_helper.c b/drivers/gpu/drm/drm_simple_kms_helper.c index 0b095a313c44..72989ed1baba 100644 --- a/drivers/gpu/drm/drm_simple_kms_helper.c +++ b/drivers/gpu/drm/drm_simple_kms_helper.c @@ -9,6 +9,8 @@ #include <drm/drm_atomic.h> #include <drm/drm_atomic_helper.h> #include <drm/drm_bridge.h> +#include <drm/drm_drv.h> +#include <drm/drm_gem_atomic_helper.h> #include <drm/drm_managed.h> #include <drm/drm_plane_helper.h> #include <drm/drm_probe_helper.h> @@ -143,6 +145,39 @@ static const struct drm_crtc_helper_funcs drm_simple_kms_crtc_helper_funcs = { .atomic_disable = drm_simple_kms_crtc_disable, }; +static void drm_simple_kms_crtc_reset(struct drm_crtc *crtc) +{ + struct drm_simple_display_pipe *pipe; + + pipe = container_of(crtc, struct drm_simple_display_pipe, crtc); + if (!pipe->funcs || !pipe->funcs->reset_crtc) + return drm_atomic_helper_crtc_reset(crtc); + + return pipe->funcs->reset_crtc(pipe); +} + +static struct drm_crtc_state *drm_simple_kms_crtc_duplicate_state(struct drm_crtc *crtc) +{ + struct drm_simple_display_pipe *pipe; + + pipe = container_of(crtc, struct drm_simple_display_pipe, crtc); + if (!pipe->funcs || !pipe->funcs->duplicate_crtc_state) + return drm_atomic_helper_crtc_duplicate_state(crtc); + + return pipe->funcs->duplicate_crtc_state(pipe); +} + +static void drm_simple_kms_crtc_destroy_state(struct drm_crtc *crtc, struct drm_crtc_state *state) +{ + struct drm_simple_display_pipe *pipe; + + pipe = container_of(crtc, struct drm_simple_display_pipe, crtc); + if (!pipe->funcs || !pipe->funcs->destroy_crtc_state) + drm_atomic_helper_crtc_destroy_state(crtc, state); + else + pipe->funcs->destroy_crtc_state(pipe, state); +} + static int drm_simple_kms_crtc_enable_vblank(struct drm_crtc *crtc) { struct drm_simple_display_pipe *pipe; @@ -166,12 +201,12 @@ static void drm_simple_kms_crtc_disable_vblank(struct drm_crtc *crtc) } static const struct drm_crtc_funcs drm_simple_kms_crtc_funcs = { - .reset = drm_atomic_helper_crtc_reset, + .reset = drm_simple_kms_crtc_reset, .destroy = drm_crtc_cleanup, .set_config = drm_atomic_helper_set_config, .page_flip = drm_atomic_helper_page_flip, - .atomic_duplicate_state = drm_atomic_helper_crtc_duplicate_state, - .atomic_destroy_state = drm_atomic_helper_crtc_destroy_state, + .atomic_duplicate_state = drm_simple_kms_crtc_duplicate_state, + .atomic_destroy_state = drm_simple_kms_crtc_destroy_state, .enable_vblank = drm_simple_kms_crtc_enable_vblank, .disable_vblank = drm_simple_kms_crtc_disable_vblank, }; @@ -225,8 +260,14 @@ static int drm_simple_kms_plane_prepare_fb(struct drm_plane *plane, struct drm_simple_display_pipe *pipe; pipe = container_of(plane, struct drm_simple_display_pipe, plane); - if (!pipe->funcs || !pipe->funcs->prepare_fb) - return 0; + if (!pipe->funcs || !pipe->funcs->prepare_fb) { + if (WARN_ON_ONCE(!drm_core_check_feature(plane->dev, DRIVER_GEM))) + return 0; + + WARN_ON_ONCE(pipe->funcs && pipe->funcs->cleanup_fb); + + return drm_gem_simple_display_pipe_prepare_fb(pipe, state); + } return pipe->funcs->prepare_fb(pipe, state); } diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c index fdd2ec87cdd1..c9a9d74f338c 100644 --- a/drivers/gpu/drm/drm_syncobj.c +++ b/drivers/gpu/drm/drm_syncobj.c @@ -725,7 +725,7 @@ err_put_fd: return ret; } /** - * drm_syncobj_open - initalizes syncobj file-private structures at devnode open time + * drm_syncobj_open - initializes syncobj file-private structures at devnode open time * @file_private: drm file-private structure to set up * * Called at device open time, sets up the structure for handling refcounting @@ -861,7 +861,7 @@ static int drm_syncobj_transfer_to_timeline(struct drm_file *file_private, &fence); if (ret) goto err; - chain = kzalloc(sizeof(struct dma_fence_chain), GFP_KERNEL); + chain = dma_fence_chain_alloc(); if (!chain) { ret = -ENOMEM; goto err1; @@ -1402,10 +1402,10 @@ drm_syncobj_timeline_signal_ioctl(struct drm_device *dev, void *data, goto err_points; } for (i = 0; i < args->count_handles; i++) { - chains[i] = kzalloc(sizeof(struct dma_fence_chain), GFP_KERNEL); + chains[i] = dma_fence_chain_alloc(); if (!chains[i]) { for (j = 0; j < i; j++) - kfree(chains[j]); + dma_fence_chain_free(chains[j]); ret = -ENOMEM; goto err_chains; } diff --git a/drivers/gpu/drm/drm_vblank.c b/drivers/gpu/drm/drm_vblank.c index 3417e1ac7918..b701cda86d0c 100644 --- a/drivers/gpu/drm/drm_vblank.c +++ b/drivers/gpu/drm/drm_vblank.c @@ -191,7 +191,7 @@ static u32 drm_max_vblank_count(struct drm_device *dev, unsigned int pipe) /* * "No hw counter" fallback implementation of .get_vblank_counter() hook, - * if there is no useable hardware frame counter available. + * if there is no usable hardware frame counter available. */ static u32 drm_vblank_no_hw_counter(struct drm_device *dev, unsigned int pipe) { @@ -905,7 +905,7 @@ drm_get_last_vbltimestamp(struct drm_device *dev, unsigned int pipe, * and drm_crtc_vblank_count() or drm_crtc_vblank_count_and_time() * provide a barrier: Any writes done before calling * drm_crtc_handle_vblank() will be visible to callers of the later - * functions, iff the vblank count is the same or a later one. + * functions, if the vblank count is the same or a later one. * * See also &drm_vblank_crtc.count. * @@ -968,7 +968,7 @@ static u64 drm_vblank_count_and_time(struct drm_device *dev, unsigned int pipe, * and drm_crtc_vblank_count() or drm_crtc_vblank_count_and_time() * provide a barrier: Any writes done before calling * drm_crtc_handle_vblank() will be visible to callers of the later - * functions, iff the vblank count is the same or a later one. + * functions, if the vblank count is the same or a later one. * * See also &drm_vblank_crtc.count. */ @@ -1737,6 +1737,15 @@ static void drm_wait_vblank_reply(struct drm_device *dev, unsigned int pipe, reply->tval_usec = ts.tv_nsec / 1000; } +static bool drm_wait_vblank_supported(struct drm_device *dev) +{ +#if IS_ENABLED(CONFIG_DRM_LEGACY) + if (unlikely(drm_core_check_feature(dev, DRIVER_LEGACY))) + return dev->irq_enabled; +#endif + return drm_dev_has_vblank(dev); +} + int drm_wait_vblank_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv) { @@ -1748,7 +1757,7 @@ int drm_wait_vblank_ioctl(struct drm_device *dev, void *data, unsigned int pipe_index; unsigned int flags, pipe, high_pipe; - if (!dev->irq_enabled) + if (!drm_wait_vblank_supported(dev)) return -EOPNOTSUPP; if (vblwait->request.type & _DRM_VBLANK_SIGNAL) @@ -1988,7 +1997,7 @@ EXPORT_SYMBOL(drm_handle_vblank); * and drm_crtc_vblank_count() or drm_crtc_vblank_count_and_time() * provide a barrier: Any writes done before calling * drm_crtc_handle_vblank() will be visible to callers of the later - * functions, iff the vblank count is the same or a later one. + * functions, if the vblank count is the same or a later one. * * See also &drm_vblank_crtc.count. * @@ -2005,7 +2014,7 @@ EXPORT_SYMBOL(drm_crtc_handle_vblank); * Get crtc VBLANK count. * * \param dev DRM device - * \param data user arguement, pointing to a drm_crtc_get_sequence structure. + * \param data user argument, pointing to a drm_crtc_get_sequence structure. * \param file_priv drm file private for the user's open file descriptor */ @@ -2023,7 +2032,7 @@ int drm_crtc_get_sequence_ioctl(struct drm_device *dev, void *data, if (!drm_core_check_feature(dev, DRIVER_MODESET)) return -EOPNOTSUPP; - if (!dev->irq_enabled) + if (!drm_dev_has_vblank(dev)) return -EOPNOTSUPP; crtc = drm_crtc_find(dev, file_priv, get_seq->crtc_id); @@ -2061,7 +2070,7 @@ int drm_crtc_get_sequence_ioctl(struct drm_device *dev, void *data, * Queue a event for VBLANK sequence * * \param dev DRM device - * \param data user arguement, pointing to a drm_crtc_queue_sequence structure. + * \param data user argument, pointing to a drm_crtc_queue_sequence structure. * \param file_priv drm file private for the user's open file descriptor */ @@ -2082,7 +2091,7 @@ int drm_crtc_queue_sequence_ioctl(struct drm_device *dev, void *data, if (!drm_core_check_feature(dev, DRIVER_MODESET)) return -EOPNOTSUPP; - if (!dev->irq_enabled) + if (!drm_dev_has_vblank(dev)) return -EOPNOTSUPP; crtc = drm_crtc_find(dev, file_priv, queue_seq->crtc_id); diff --git a/drivers/gpu/drm/drm_vma_manager.c b/drivers/gpu/drm/drm_vma_manager.c index 4565319fa6b3..7de37f8c68fd 100644 --- a/drivers/gpu/drm/drm_vma_manager.c +++ b/drivers/gpu/drm/drm_vma_manager.c @@ -361,7 +361,7 @@ EXPORT_SYMBOL(drm_vma_node_revoke); * This is locked against concurrent access internally. * * RETURNS: - * true iff @filp is on the list + * true if @filp is on the list */ bool drm_vma_node_is_allowed(struct drm_vma_offset_node *node, struct drm_file *tag) diff --git a/drivers/gpu/drm/etnaviv/etnaviv_drv.c b/drivers/gpu/drm/etnaviv/etnaviv_drv.c index f0a07278ad04..7dcc6392792d 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_drv.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_drv.c @@ -468,17 +468,7 @@ static const struct drm_ioctl_desc etnaviv_ioctls[] = { ETNA_IOCTL(PM_QUERY_SIG, pm_query_sig, DRM_RENDER_ALLOW), }; -static const struct file_operations fops = { - .owner = THIS_MODULE, - .open = drm_open, - .release = drm_release, - .unlocked_ioctl = drm_ioctl, - .compat_ioctl = drm_compat_ioctl, - .poll = drm_poll, - .read = drm_read, - .llseek = no_llseek, - .mmap = etnaviv_gem_mmap, -}; +DEFINE_DRM_GEM_FOPS(fops); static const struct drm_driver etnaviv_drm_driver = { .driver_features = DRIVER_GEM | DRIVER_RENDER, @@ -487,7 +477,7 @@ static const struct drm_driver etnaviv_drm_driver = { .prime_handle_to_fd = drm_gem_prime_handle_to_fd, .prime_fd_to_handle = drm_gem_prime_fd_to_handle, .gem_prime_import_sg_table = etnaviv_gem_prime_import_sg_table, - .gem_prime_mmap = etnaviv_gem_prime_mmap, + .gem_prime_mmap = drm_gem_prime_mmap, #ifdef CONFIG_DEBUG_FS .debugfs_init = etnaviv_debugfs_init, #endif diff --git a/drivers/gpu/drm/etnaviv/etnaviv_drv.h b/drivers/gpu/drm/etnaviv/etnaviv_drv.h index 003288ebd896..049ae87de9be 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_drv.h +++ b/drivers/gpu/drm/etnaviv/etnaviv_drv.h @@ -47,12 +47,9 @@ struct etnaviv_drm_private { int etnaviv_ioctl_gem_submit(struct drm_device *dev, void *data, struct drm_file *file); -int etnaviv_gem_mmap(struct file *filp, struct vm_area_struct *vma); int etnaviv_gem_mmap_offset(struct drm_gem_object *obj, u64 *offset); struct sg_table *etnaviv_gem_prime_get_sg_table(struct drm_gem_object *obj); int etnaviv_gem_prime_vmap(struct drm_gem_object *obj, struct dma_buf_map *map); -int etnaviv_gem_prime_mmap(struct drm_gem_object *obj, - struct vm_area_struct *vma); struct drm_gem_object *etnaviv_gem_prime_import_sg_table(struct drm_device *dev, struct dma_buf_attachment *attach, struct sg_table *sg); int etnaviv_gem_prime_pin(struct drm_gem_object *obj); diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem.c b/drivers/gpu/drm/etnaviv/etnaviv_gem.c index b8fa6ed3dd73..8f1b5af47dd6 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_gem.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_gem.c @@ -130,8 +130,7 @@ static int etnaviv_gem_mmap_obj(struct etnaviv_gem_object *etnaviv_obj, { pgprot_t vm_page_prot; - vma->vm_flags &= ~VM_PFNMAP; - vma->vm_flags |= VM_MIXEDMAP; + vma->vm_flags |= VM_IO | VM_MIXEDMAP | VM_DONTEXPAND | VM_DONTDUMP; vm_page_prot = vm_get_page_prot(vma->vm_flags); @@ -154,19 +153,11 @@ static int etnaviv_gem_mmap_obj(struct etnaviv_gem_object *etnaviv_obj, return 0; } -int etnaviv_gem_mmap(struct file *filp, struct vm_area_struct *vma) +static int etnaviv_gem_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma) { - struct etnaviv_gem_object *obj; - int ret; - - ret = drm_gem_mmap(filp, vma); - if (ret) { - DBG("mmap failed: %d", ret); - return ret; - } + struct etnaviv_gem_object *etnaviv_obj = to_etnaviv_bo(obj); - obj = to_etnaviv_bo(vma->vm_private_data); - return obj->ops->mmap(obj, vma); + return etnaviv_obj->ops->mmap(etnaviv_obj, vma); } static vm_fault_t etnaviv_gem_fault(struct vm_fault *vmf) @@ -567,6 +558,7 @@ static const struct drm_gem_object_funcs etnaviv_gem_object_funcs = { .unpin = etnaviv_gem_prime_unpin, .get_sg_table = etnaviv_gem_prime_get_sg_table, .vmap = etnaviv_gem_prime_vmap, + .mmap = etnaviv_gem_mmap, .vm_ops = &vm_ops, }; diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem_prime.c b/drivers/gpu/drm/etnaviv/etnaviv_gem_prime.c index d741b1d735f7..6d8bed9c739d 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_gem_prime.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_gem_prime.c @@ -34,19 +34,6 @@ int etnaviv_gem_prime_vmap(struct drm_gem_object *obj, struct dma_buf_map *map) return 0; } -int etnaviv_gem_prime_mmap(struct drm_gem_object *obj, - struct vm_area_struct *vma) -{ - struct etnaviv_gem_object *etnaviv_obj = to_etnaviv_bo(obj); - int ret; - - ret = drm_gem_mmap_obj(obj, obj->size, vma); - if (ret < 0) - return ret; - - return etnaviv_obj->ops->mmap(etnaviv_obj, vma); -} - int etnaviv_gem_prime_pin(struct drm_gem_object *obj) { if (!obj->import_attach) { diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c index 4102bcea3341..c297fffe06eb 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c @@ -613,6 +613,12 @@ static void etnaviv_gpu_enable_mlcg(struct etnaviv_gpu *gpu) etnaviv_is_model_rev(gpu, GC2000, 0x5108)) pmc |= VIVS_PM_MODULE_CONTROLS_DISABLE_MODULE_CLOCK_GATING_TX; + /* Disable SE, RA and TX clock gating on affected core revisions. */ + if (etnaviv_is_model_rev(gpu, GC7000, 0x6202)) + pmc |= VIVS_PM_MODULE_CONTROLS_DISABLE_MODULE_CLOCK_GATING_SE | + VIVS_PM_MODULE_CONTROLS_DISABLE_MODULE_CLOCK_GATING_RA | + VIVS_PM_MODULE_CONTROLS_DISABLE_MODULE_CLOCK_GATING_TX; + pmc |= VIVS_PM_MODULE_CONTROLS_DISABLE_MODULE_CLOCK_GATING_RA_HZ; pmc |= VIVS_PM_MODULE_CONTROLS_DISABLE_MODULE_CLOCK_GATING_RA_EZ; diff --git a/drivers/gpu/drm/etnaviv/etnaviv_hwdb.c b/drivers/gpu/drm/etnaviv/etnaviv_hwdb.c index dfc0f536b3b9..f2fc645c7956 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_hwdb.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_hwdb.c @@ -39,6 +39,37 @@ static const struct etnaviv_chip_identity etnaviv_chip_identities[] = { }, { .model = 0x7000, + .revision = 0x6202, + .product_id = 0x70003, + .customer_id = 0, + .eco_id = 0, + .stream_count = 8, + .register_max = 64, + .thread_count = 512, + .shader_core_count = 2, + .vertex_cache_size = 16, + .vertex_output_buffer_size = 1024, + .pixel_pipes = 1, + .instruction_count = 512, + .num_constants = 320, + .buffer_size = 0, + .varyings_count = 16, + .features = 0xe0287cad, + .minor_features0 = 0xc1489eff, + .minor_features1 = 0xfefbfad9, + .minor_features2 = 0xeb9d4fbf, + .minor_features3 = 0xedfffced, + .minor_features4 = 0xdb0dafc7, + .minor_features5 = 0x3b5ac333, + .minor_features6 = 0xfccee201, + .minor_features7 = 0x03fffa6f, + .minor_features8 = 0x00e10ef0, + .minor_features9 = 0x0088003c, + .minor_features10 = 0x00004040, + .minor_features11 = 0x00000024, + }, + { + .model = 0x7000, .revision = 0x6204, .product_id = ~0U, .customer_id = ~0U, diff --git a/drivers/gpu/drm/etnaviv/etnaviv_sched.c b/drivers/gpu/drm/etnaviv/etnaviv_sched.c index 19826e504efc..feb6da1b6ceb 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_sched.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_sched.c @@ -190,7 +190,8 @@ int etnaviv_sched_init(struct etnaviv_gpu *gpu) ret = drm_sched_init(&gpu->sched, &etnaviv_sched_ops, etnaviv_hw_jobs_limit, etnaviv_job_hang_limit, - msecs_to_jiffies(500), NULL, dev_name(gpu->dev)); + msecs_to_jiffies(500), NULL, NULL, + dev_name(gpu->dev)); if (ret) return ret; diff --git a/drivers/gpu/drm/exynos/exynos_dp.c b/drivers/gpu/drm/exynos/exynos_dp.c index 9ac51b6ab34b..27664f663c5a 100644 --- a/drivers/gpu/drm/exynos/exynos_dp.c +++ b/drivers/gpu/drm/exynos/exynos_dp.c @@ -109,11 +109,8 @@ static int exynos_dp_bridge_attach(struct analogix_dp_plat_data *plat_data, if (dp->ptn_bridge) { ret = drm_bridge_attach(&dp->encoder, dp->ptn_bridge, bridge, 0); - if (ret) { - DRM_DEV_ERROR(dp->dev, - "Failed to attach bridge to drm\n"); + if (ret) return ret; - } } return 0; diff --git a/drivers/gpu/drm/exynos/exynos_drm_dma.c b/drivers/gpu/drm/exynos/exynos_drm_dma.c index 0644936afee2..bf33c3084cb4 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_dma.c +++ b/drivers/gpu/drm/exynos/exynos_drm_dma.c @@ -115,6 +115,8 @@ int exynos_drm_register_dma(struct drm_device *drm, struct device *dev, EXYNOS_DEV_ADDR_START, EXYNOS_DEV_ADDR_SIZE); else if (IS_ENABLED(CONFIG_IOMMU_DMA)) mapping = iommu_get_domain_for_dev(priv->dma_dev); + else + mapping = ERR_PTR(-ENODEV); if (IS_ERR(mapping)) return PTR_ERR(mapping); diff --git a/drivers/gpu/drm/exynos/exynos_drm_drv.c b/drivers/gpu/drm/exynos/exynos_drm_drv.c index e60257f1f24b..d8f1cf4d6b69 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_drv.c +++ b/drivers/gpu/drm/exynos/exynos_drm_drv.c @@ -300,16 +300,6 @@ static int exynos_drm_bind(struct device *dev) drm_mode_config_reset(drm); - /* - * enable drm irq mode. - * - with irq_enabled = true, we can use the vblank feature. - * - * P.S. note that we wouldn't use drm irq handler but - * just specific driver own one instead because - * drm framework supports only one irq handler. - */ - drm->irq_enabled = true; - /* init kms poll for handling hpd */ drm_kms_helper_poll_init(drm); diff --git a/drivers/gpu/drm/exynos/exynos_drm_dsi.c b/drivers/gpu/drm/exynos/exynos_drm_dsi.c index 1d777d8c1a83..e39fac889edc 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_dsi.c +++ b/drivers/gpu/drm/exynos/exynos_drm_dsi.c @@ -809,15 +809,15 @@ static int exynos_dsi_init_link(struct exynos_dsi *dsi) reg |= DSIM_AUTO_MODE; if (dsi->mode_flags & MIPI_DSI_MODE_VIDEO_HSE) reg |= DSIM_HSE_MODE; - if (!(dsi->mode_flags & MIPI_DSI_MODE_VIDEO_HFP)) + if (!(dsi->mode_flags & MIPI_DSI_MODE_VIDEO_NO_HFP)) reg |= DSIM_HFP_MODE; - if (!(dsi->mode_flags & MIPI_DSI_MODE_VIDEO_HBP)) + if (!(dsi->mode_flags & MIPI_DSI_MODE_VIDEO_NO_HBP)) reg |= DSIM_HBP_MODE; - if (!(dsi->mode_flags & MIPI_DSI_MODE_VIDEO_HSA)) + if (!(dsi->mode_flags & MIPI_DSI_MODE_VIDEO_NO_HSA)) reg |= DSIM_HSA_MODE; } - if (!(dsi->mode_flags & MIPI_DSI_MODE_EOT_PACKET)) + if (!(dsi->mode_flags & MIPI_DSI_MODE_NO_EOT_PACKET)) reg |= DSIM_EOT_DISABLE; switch (dsi->format) { diff --git a/drivers/gpu/drm/exynos/exynos_drm_g2d.c b/drivers/gpu/drm/exynos/exynos_drm_g2d.c index cab4d2c370a7..b00230626c6a 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_g2d.c +++ b/drivers/gpu/drm/exynos/exynos_drm_g2d.c @@ -4,6 +4,7 @@ * Authors: Joonyoung Shim <jy0922.shim@samsung.com> */ +#include <linux/refcount.h> #include <linux/clk.h> #include <linux/component.h> #include <linux/delay.h> @@ -208,7 +209,7 @@ struct g2d_cmdlist_userptr { struct page **pages; unsigned int npages; struct sg_table *sgt; - atomic_t refcount; + refcount_t refcount; bool in_pool; bool out_of_list; }; @@ -386,9 +387,9 @@ static void g2d_userptr_put_dma_addr(struct g2d_data *g2d, if (force) goto out; - atomic_dec(&g2d_userptr->refcount); + refcount_dec(&g2d_userptr->refcount); - if (atomic_read(&g2d_userptr->refcount) > 0) + if (refcount_read(&g2d_userptr->refcount) > 0) return; if (g2d_userptr->in_pool) @@ -436,7 +437,7 @@ static dma_addr_t *g2d_userptr_get_dma_addr(struct g2d_data *g2d, * and different size. */ if (g2d_userptr->size == size) { - atomic_inc(&g2d_userptr->refcount); + refcount_inc(&g2d_userptr->refcount); *obj = g2d_userptr; return &g2d_userptr->dma_addr; @@ -461,7 +462,7 @@ static dma_addr_t *g2d_userptr_get_dma_addr(struct g2d_data *g2d, if (!g2d_userptr) return ERR_PTR(-ENOMEM); - atomic_set(&g2d_userptr->refcount, 1); + refcount_set(&g2d_userptr->refcount, 1); g2d_userptr->size = size; start = userptr & PAGE_MASK; @@ -897,13 +898,14 @@ static void g2d_runqueue_worker(struct work_struct *work) ret = pm_runtime_resume_and_get(g2d->dev); if (ret < 0) { dev_err(g2d->dev, "failed to enable G2D device.\n"); - return; + goto out; } g2d_dma_start(g2d, g2d->runqueue_node); } } +out: mutex_unlock(&g2d->runqueue_mutex); } diff --git a/drivers/gpu/drm/exynos/exynos_hdmi.c b/drivers/gpu/drm/exynos/exynos_hdmi.c index f893731d6021..c769dec576de 100644 --- a/drivers/gpu/drm/exynos/exynos_hdmi.c +++ b/drivers/gpu/drm/exynos/exynos_hdmi.c @@ -970,11 +970,8 @@ static int hdmi_create_connector(struct drm_encoder *encoder) drm_connector_helper_add(connector, &hdmi_connector_helper_funcs); drm_connector_attach_encoder(connector, encoder); - if (hdata->bridge) { + if (hdata->bridge) ret = drm_bridge_attach(encoder, hdata->bridge, NULL, 0); - if (ret) - DRM_DEV_ERROR(hdata->dev, "Failed to attach bridge\n"); - } cec_fill_conn_info_from_drm(&conn_info, connector); diff --git a/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_drv.c b/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_drv.c index 7528e8a2d359..660fe573db96 100644 --- a/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_drv.c +++ b/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_drv.c @@ -23,7 +23,6 @@ #include <drm/drm_fb_cma_helper.h> #include <drm/drm_fb_helper.h> #include <drm/drm_gem_cma_helper.h> -#include <drm/drm_irq.h> #include <drm/drm_modeset_helper.h> #include <drm/drm_probe_helper.h> #include <drm/drm_vblank.h> @@ -51,7 +50,7 @@ static const struct regmap_config fsl_dcu_regmap_config = { .volatile_reg = fsl_dcu_drm_is_volatile_reg, }; -static void fsl_dcu_irq_uninstall(struct drm_device *dev) +static void fsl_dcu_irq_reset(struct drm_device *dev) { struct fsl_dcu_drm_device *fsl_dev = dev->dev_private; @@ -59,6 +58,45 @@ static void fsl_dcu_irq_uninstall(struct drm_device *dev) regmap_write(fsl_dev->regmap, DCU_INT_MASK, ~0); } +static irqreturn_t fsl_dcu_drm_irq(int irq, void *arg) +{ + struct drm_device *dev = arg; + struct fsl_dcu_drm_device *fsl_dev = dev->dev_private; + unsigned int int_status; + int ret; + + ret = regmap_read(fsl_dev->regmap, DCU_INT_STATUS, &int_status); + if (ret) { + dev_err(dev->dev, "read DCU_INT_STATUS failed\n"); + return IRQ_NONE; + } + + if (int_status & DCU_INT_STATUS_VBLANK) + drm_handle_vblank(dev, 0); + + regmap_write(fsl_dev->regmap, DCU_INT_STATUS, int_status); + + return IRQ_HANDLED; +} + +static int fsl_dcu_irq_install(struct drm_device *dev, unsigned int irq) +{ + if (irq == IRQ_NOTCONNECTED) + return -ENOTCONN; + + fsl_dcu_irq_reset(dev); + + return request_irq(irq, fsl_dcu_drm_irq, 0, dev->driver->name, dev); +} + +static void fsl_dcu_irq_uninstall(struct drm_device *dev) +{ + struct fsl_dcu_drm_device *fsl_dev = dev->dev_private; + + fsl_dcu_irq_reset(dev); + free_irq(fsl_dev->irq, dev); +} + static int fsl_dcu_load(struct drm_device *dev, unsigned long flags) { struct fsl_dcu_drm_device *fsl_dev = dev->dev_private; @@ -73,13 +111,13 @@ static int fsl_dcu_load(struct drm_device *dev, unsigned long flags) ret = drm_vblank_init(dev, dev->mode_config.num_crtc); if (ret < 0) { dev_err(dev->dev, "failed to initialize vblank\n"); - goto done; + goto done_vblank; } - ret = drm_irq_install(dev, fsl_dev->irq); + ret = fsl_dcu_irq_install(dev, fsl_dev->irq); if (ret < 0) { dev_err(dev->dev, "failed to install IRQ handler\n"); - goto done; + goto done_irq; } if (legacyfb_depth != 16 && legacyfb_depth != 24 && @@ -90,11 +128,11 @@ static int fsl_dcu_load(struct drm_device *dev, unsigned long flags) } return 0; -done: +done_irq: drm_kms_helper_poll_fini(dev); drm_mode_config_cleanup(dev); - drm_irq_uninstall(dev); +done_vblank: dev->dev_private = NULL; return ret; @@ -106,41 +144,17 @@ static void fsl_dcu_unload(struct drm_device *dev) drm_kms_helper_poll_fini(dev); drm_mode_config_cleanup(dev); - drm_irq_uninstall(dev); + fsl_dcu_irq_uninstall(dev); dev->dev_private = NULL; } -static irqreturn_t fsl_dcu_drm_irq(int irq, void *arg) -{ - struct drm_device *dev = arg; - struct fsl_dcu_drm_device *fsl_dev = dev->dev_private; - unsigned int int_status; - int ret; - - ret = regmap_read(fsl_dev->regmap, DCU_INT_STATUS, &int_status); - if (ret) { - dev_err(dev->dev, "read DCU_INT_STATUS failed\n"); - return IRQ_NONE; - } - - if (int_status & DCU_INT_STATUS_VBLANK) - drm_handle_vblank(dev, 0); - - regmap_write(fsl_dev->regmap, DCU_INT_STATUS, int_status); - - return IRQ_HANDLED; -} - DEFINE_DRM_GEM_CMA_FOPS(fsl_dcu_drm_fops); static const struct drm_driver fsl_dcu_drm_driver = { .driver_features = DRIVER_GEM | DRIVER_MODESET | DRIVER_ATOMIC, .load = fsl_dcu_load, .unload = fsl_dcu_unload, - .irq_handler = fsl_dcu_drm_irq, - .irq_preinstall = fsl_dcu_irq_uninstall, - .irq_uninstall = fsl_dcu_irq_uninstall, DRM_GEM_CMA_DRIVER_OPS, .fops = &fsl_dcu_drm_fops, .name = "fsl-dcu-drm", diff --git a/drivers/gpu/drm/gma500/oaktrail_lvds.c b/drivers/gpu/drm/gma500/oaktrail_lvds.c index 432bdcc57ac9..f9b1f88c73bd 100644 --- a/drivers/gpu/drm/gma500/oaktrail_lvds.c +++ b/drivers/gpu/drm/gma500/oaktrail_lvds.c @@ -113,11 +113,11 @@ static void oaktrail_lvds_mode_set(struct drm_encoder *encoder, /* Find the connector we're trying to set up */ list_for_each_entry(connector, &mode_config->connector_list, head) { - if (!connector->encoder || connector->encoder->crtc != crtc) - continue; + if (connector->encoder && connector->encoder->crtc == crtc) + break; } - if (!connector) { + if (list_entry_is_head(connector, &mode_config->connector_list, head)) { DRM_ERROR("Couldn't find connector when setting mode"); gma_power_end(dev); return; diff --git a/drivers/gpu/drm/gma500/power.c b/drivers/gpu/drm/gma500/power.c index f07641dfa5a4..20ace6010f9f 100644 --- a/drivers/gpu/drm/gma500/power.c +++ b/drivers/gpu/drm/gma500/power.c @@ -32,6 +32,7 @@ #include "psb_drv.h" #include "psb_reg.h" #include "psb_intel_reg.h" +#include "psb_irq.h" #include <linux/mutex.h> #include <linux/pm_runtime.h> diff --git a/drivers/gpu/drm/gma500/psb_drv.c b/drivers/gpu/drm/gma500/psb_drv.c index 3850842d58f3..58bce1a60a4d 100644 --- a/drivers/gpu/drm/gma500/psb_drv.c +++ b/drivers/gpu/drm/gma500/psb_drv.c @@ -23,7 +23,6 @@ #include <drm/drm_fb_helper.h> #include <drm/drm_file.h> #include <drm/drm_ioctl.h> -#include <drm/drm_irq.h> #include <drm/drm_pciids.h> #include <drm/drm_vblank.h> @@ -33,6 +32,7 @@ #include "power.h" #include "psb_drv.h" #include "psb_intel_reg.h" +#include "psb_irq.h" #include "psb_reg.h" static const struct drm_driver driver; @@ -380,7 +380,7 @@ static int psb_driver_load(struct drm_device *dev, unsigned long flags) PSB_WVDC32(0xFFFFFFFF, PSB_INT_MASK_R); spin_unlock_irqrestore(&dev_priv->irqmask_lock, irqflags); - drm_irq_install(dev, pdev->irq); + psb_irq_install(dev, pdev->irq); dev->max_vblank_count = 0xffffff; /* only 24 bits of frame count */ @@ -515,10 +515,6 @@ static const struct drm_driver driver = { .lastclose = drm_fb_helper_lastclose, .num_ioctls = ARRAY_SIZE(psb_ioctls), - .irq_preinstall = psb_irq_preinstall, - .irq_postinstall = psb_irq_postinstall, - .irq_uninstall = psb_irq_uninstall, - .irq_handler = psb_irq_handler, .dumb_create = psb_gem_dumb_create, .ioctls = psb_ioctls, diff --git a/drivers/gpu/drm/gma500/psb_drv.h b/drivers/gpu/drm/gma500/psb_drv.h index d6e7c2c2c947..f2bae270ca7b 100644 --- a/drivers/gpu/drm/gma500/psb_drv.h +++ b/drivers/gpu/drm/gma500/psb_drv.h @@ -624,11 +624,6 @@ static inline struct drm_psb_private *psb_priv(struct drm_device *dev) } /* psb_irq.c */ -extern irqreturn_t psb_irq_handler(int irq, void *arg); -extern void psb_irq_preinstall(struct drm_device *dev); -extern int psb_irq_postinstall(struct drm_device *dev); -extern void psb_irq_uninstall(struct drm_device *dev); - extern void psb_irq_uninstall_islands(struct drm_device *dev, int hw_islands); extern int psb_vblank_wait2(struct drm_device *dev, unsigned int *sequence); extern int psb_vblank_wait(struct drm_device *dev, unsigned int *sequence); diff --git a/drivers/gpu/drm/gma500/psb_irq.c b/drivers/gpu/drm/gma500/psb_irq.c index 104009e78487..deb1fbc1f748 100644 --- a/drivers/gpu/drm/gma500/psb_irq.c +++ b/drivers/gpu/drm/gma500/psb_irq.c @@ -8,6 +8,7 @@ * **************************************************************************/ +#include <drm/drm_drv.h> #include <drm/drm_vblank.h> #include "power.h" @@ -222,7 +223,7 @@ static void psb_sgx_interrupt(struct drm_device *dev, u32 stat_1, u32 stat_2) PSB_RSGX32(PSB_CR_EVENT_HOST_CLEAR2); } -irqreturn_t psb_irq_handler(int irq, void *arg) +static irqreturn_t psb_irq_handler(int irq, void *arg) { struct drm_device *dev = arg; struct drm_psb_private *dev_priv = dev->dev_private; @@ -304,7 +305,7 @@ void psb_irq_preinstall(struct drm_device *dev) spin_unlock_irqrestore(&dev_priv->irqmask_lock, irqflags); } -int psb_irq_postinstall(struct drm_device *dev) +void psb_irq_postinstall(struct drm_device *dev) { struct drm_psb_private *dev_priv = dev->dev_private; unsigned long irqflags; @@ -332,12 +333,31 @@ int psb_irq_postinstall(struct drm_device *dev) dev_priv->ops->hotplug_enable(dev, true); spin_unlock_irqrestore(&dev_priv->irqmask_lock, irqflags); +} + +int psb_irq_install(struct drm_device *dev, unsigned int irq) +{ + int ret; + + if (irq == IRQ_NOTCONNECTED) + return -ENOTCONN; + + psb_irq_preinstall(dev); + + /* PCI devices require shared interrupts. */ + ret = request_irq(irq, psb_irq_handler, IRQF_SHARED, dev->driver->name, dev); + if (ret) + return ret; + + psb_irq_postinstall(dev); + return 0; } void psb_irq_uninstall(struct drm_device *dev) { struct drm_psb_private *dev_priv = dev->dev_private; + struct pci_dev *pdev = to_pci_dev(dev->dev); unsigned long irqflags; unsigned int i; @@ -366,6 +386,8 @@ void psb_irq_uninstall(struct drm_device *dev) /* This register is safe even if display island is off */ PSB_WVDC32(PSB_RVDC32(PSB_INT_IDENTITY_R), PSB_INT_IDENTITY_R); spin_unlock_irqrestore(&dev_priv->irqmask_lock, irqflags); + + free_irq(pdev->irq, dev); } /* diff --git a/drivers/gpu/drm/gma500/psb_irq.h b/drivers/gpu/drm/gma500/psb_irq.h index 17c9b0b62471..a97cb49393d8 100644 --- a/drivers/gpu/drm/gma500/psb_irq.h +++ b/drivers/gpu/drm/gma500/psb_irq.h @@ -19,9 +19,9 @@ bool sysirq_init(struct drm_device *dev); void sysirq_uninit(struct drm_device *dev); void psb_irq_preinstall(struct drm_device *dev); -int psb_irq_postinstall(struct drm_device *dev); +void psb_irq_postinstall(struct drm_device *dev); +int psb_irq_install(struct drm_device *dev, unsigned int irq); void psb_irq_uninstall(struct drm_device *dev); -irqreturn_t psb_irq_handler(int irq, void *arg); int psb_enable_vblank(struct drm_crtc *crtc); void psb_disable_vblank(struct drm_crtc *crtc); diff --git a/drivers/gpu/drm/gud/gud_drv.c b/drivers/gpu/drm/gud/gud_drv.c index e8b672dc9832..eb4e08846da4 100644 --- a/drivers/gpu/drm/gud/gud_drv.c +++ b/drivers/gpu/drm/gud/gud_drv.c @@ -364,7 +364,6 @@ static void gud_debugfs_init(struct drm_minor *minor) static const struct drm_simple_display_pipe_funcs gud_pipe_funcs = { .check = gud_pipe_check, .update = gud_pipe_update, - .prepare_fb = drm_gem_simple_display_pipe_prepare_fb, }; static const struct drm_mode_config_funcs gud_mode_config_funcs = { @@ -394,14 +393,42 @@ static const struct drm_driver gud_drm_driver = { .minor = 0, }; -static void gud_free_buffers_and_mutex(struct drm_device *drm, void *unused) +static int gud_alloc_bulk_buffer(struct gud_device *gdrm) { - struct gud_device *gdrm = to_gud_device(drm); + unsigned int i, num_pages; + struct page **pages; + void *ptr; + int ret; + + gdrm->bulk_buf = vmalloc_32(gdrm->bulk_len); + if (!gdrm->bulk_buf) + return -ENOMEM; + + num_pages = DIV_ROUND_UP(gdrm->bulk_len, PAGE_SIZE); + pages = kmalloc_array(num_pages, sizeof(struct page *), GFP_KERNEL); + if (!pages) + return -ENOMEM; + + for (i = 0, ptr = gdrm->bulk_buf; i < num_pages; i++, ptr += PAGE_SIZE) + pages[i] = vmalloc_to_page(ptr); + + ret = sg_alloc_table_from_pages(&gdrm->bulk_sgt, pages, num_pages, + 0, gdrm->bulk_len, GFP_KERNEL); + kfree(pages); + + return ret; +} + +static void gud_free_buffers_and_mutex(void *data) +{ + struct gud_device *gdrm = data; vfree(gdrm->compress_buf); - kfree(gdrm->bulk_buf); + gdrm->compress_buf = NULL; + sg_free_table(&gdrm->bulk_sgt); + vfree(gdrm->bulk_buf); + gdrm->bulk_buf = NULL; mutex_destroy(&gdrm->ctrl_lock); - mutex_destroy(&gdrm->damage_lock); } static int gud_probe(struct usb_interface *intf, const struct usb_device_id *id) @@ -455,7 +482,7 @@ static int gud_probe(struct usb_interface *intf, const struct usb_device_id *id) INIT_WORK(&gdrm->work, gud_flush_work); gud_clear_damage(gdrm); - ret = drmm_add_action_or_reset(drm, gud_free_buffers_and_mutex, NULL); + ret = devm_add_action(dev, gud_free_buffers_and_mutex, gdrm); if (ret) return ret; @@ -536,24 +563,17 @@ static int gud_probe(struct usb_interface *intf, const struct usb_device_id *id) if (desc.max_buffer_size) max_buffer_size = le32_to_cpu(desc.max_buffer_size); -retry: - /* - * Use plain kmalloc here since devm_kmalloc() places struct devres at the beginning - * of the buffer it allocates. This wastes a lot of memory when allocating big buffers. - * Asking for 2M would actually allocate 4M. This would also prevent getting the biggest - * possible buffer potentially leading to split transfers. - */ - gdrm->bulk_buf = kmalloc(max_buffer_size, GFP_KERNEL | __GFP_NOWARN); - if (!gdrm->bulk_buf) { - max_buffer_size = roundup_pow_of_two(max_buffer_size) / 2; - if (max_buffer_size < SZ_512K) - return -ENOMEM; - goto retry; - } + /* Prevent a misbehaving device from allocating loads of RAM. 4096x4096@XRGB8888 = 64 MB */ + if (max_buffer_size > SZ_64M) + max_buffer_size = SZ_64M; gdrm->bulk_pipe = usb_sndbulkpipe(interface_to_usbdev(intf), usb_endpoint_num(bulk_out)); gdrm->bulk_len = max_buffer_size; + ret = gud_alloc_bulk_buffer(gdrm); + if (ret) + return ret; + if (gdrm->compression & GUD_COMPRESSION_LZ4) { gdrm->lz4_comp_mem = devm_kmalloc(dev, LZ4_MEM_COMPRESS, GFP_KERNEL); if (!gdrm->lz4_comp_mem) @@ -640,6 +660,7 @@ static int gud_resume(struct usb_interface *intf) static const struct usb_device_id gud_id_table[] = { { USB_DEVICE_INTERFACE_CLASS(0x1d50, 0x614d, USB_CLASS_VENDOR_SPEC) }, + { USB_DEVICE_INTERFACE_CLASS(0x16d0, 0x10a9, USB_CLASS_VENDOR_SPEC) }, { } }; diff --git a/drivers/gpu/drm/gud/gud_internal.h b/drivers/gpu/drm/gud/gud_internal.h index b65105585578..2a388e27d5d7 100644 --- a/drivers/gpu/drm/gud/gud_internal.h +++ b/drivers/gpu/drm/gud/gud_internal.h @@ -5,6 +5,7 @@ #include <linux/list.h> #include <linux/mutex.h> +#include <linux/scatterlist.h> #include <linux/usb.h> #include <linux/workqueue.h> #include <uapi/drm/drm_fourcc.h> @@ -26,6 +27,7 @@ struct gud_device { unsigned int bulk_pipe; void *bulk_buf; size_t bulk_len; + struct sg_table bulk_sgt; u8 compression; void *lz4_comp_mem; diff --git a/drivers/gpu/drm/gud/gud_pipe.c b/drivers/gpu/drm/gud/gud_pipe.c index 2f83ab6b8e61..b9b0e435ea0f 100644 --- a/drivers/gpu/drm/gud/gud_pipe.c +++ b/drivers/gpu/drm/gud/gud_pipe.c @@ -3,7 +3,6 @@ * Copyright 2020 Noralf Trønnes */ -#include <linux/dma-buf.h> #include <linux/lz4.h> #include <linux/usb.h> #include <linux/workqueue.h> @@ -15,7 +14,8 @@ #include <drm/drm_format_helper.h> #include <drm/drm_fourcc.h> #include <drm/drm_framebuffer.h> -#include <drm/drm_gem_shmem_helper.h> +#include <drm/drm_gem.h> +#include <drm/drm_gem_framebuffer_helper.h> #include <drm/drm_print.h> #include <drm/drm_rect.h> #include <drm/drm_simple_kms_helper.h> @@ -24,6 +24,19 @@ #include "gud_internal.h" /* + * Some userspace rendering loops runs all displays in the same loop. + * This means that a fast display will have to wait for a slow one. + * For this reason gud does flushing asynchronous by default. + * The down side is that in e.g. a single display setup userspace thinks + * the display is insanely fast since the driver reports back immediately + * that the flush/pageflip is done. This wastes CPU and power. + * Such users might want to set this module parameter to false. + */ +static bool gud_async_flush = true; +module_param_named(async_flush, gud_async_flush, bool, 0644); +MODULE_PARM_DESC(async_flush, "Enable asynchronous flushing [default=true]"); + +/* * FIXME: The driver is probably broken on Big Endian machines. * See discussion: * https://lore.kernel.org/dri-devel/CAKb7UvihLX0hgBOP3VBG7O+atwZcUVCPVuBdfmDMpg0NjXe-cQ@mail.gmail.com/ @@ -139,7 +152,8 @@ static int gud_prep_flush(struct gud_device *gdrm, struct drm_framebuffer *fb, { struct dma_buf_attachment *import_attach = fb->obj[0]->import_attach; u8 compression = gdrm->compression; - struct dma_buf_map map; + struct dma_buf_map map[DRM_FORMAT_MAX_PLANES]; + struct dma_buf_map map_data[DRM_FORMAT_MAX_PLANES]; void *vaddr, *buf; size_t pitch, len; int ret = 0; @@ -149,17 +163,15 @@ static int gud_prep_flush(struct gud_device *gdrm, struct drm_framebuffer *fb, if (len > gdrm->bulk_len) return -E2BIG; - ret = drm_gem_shmem_vmap(fb->obj[0], &map); + ret = drm_gem_fb_vmap(fb, map, map_data); if (ret) return ret; - vaddr = map.vaddr + fb->offsets[0]; + vaddr = map_data[0].vaddr; - if (import_attach) { - ret = dma_buf_begin_cpu_access(import_attach->dmabuf, DMA_FROM_DEVICE); - if (ret) - goto vunmap; - } + ret = drm_gem_fb_begin_cpu_access(fb, DMA_FROM_DEVICE); + if (ret) + goto vunmap; retry: if (compression) buf = gdrm->compress_buf; @@ -212,10 +224,48 @@ retry: } end_cpu_access: - if (import_attach) - dma_buf_end_cpu_access(import_attach->dmabuf, DMA_FROM_DEVICE); + drm_gem_fb_end_cpu_access(fb, DMA_FROM_DEVICE); vunmap: - drm_gem_shmem_vunmap(fb->obj[0], &map); + drm_gem_fb_vunmap(fb, map); + + return ret; +} + +struct gud_usb_bulk_context { + struct timer_list timer; + struct usb_sg_request sgr; +}; + +static void gud_usb_bulk_timeout(struct timer_list *t) +{ + struct gud_usb_bulk_context *ctx = from_timer(ctx, t, timer); + + usb_sg_cancel(&ctx->sgr); +} + +static int gud_usb_bulk(struct gud_device *gdrm, size_t len) +{ + struct gud_usb_bulk_context ctx; + int ret; + + ret = usb_sg_init(&ctx.sgr, gud_to_usb_device(gdrm), gdrm->bulk_pipe, 0, + gdrm->bulk_sgt.sgl, gdrm->bulk_sgt.nents, len, GFP_KERNEL); + if (ret) + return ret; + + timer_setup_on_stack(&ctx.timer, gud_usb_bulk_timeout, 0); + mod_timer(&ctx.timer, jiffies + msecs_to_jiffies(3000)); + + usb_sg_wait(&ctx.sgr); + + if (!del_timer_sync(&ctx.timer)) + ret = -ETIMEDOUT; + else if (ctx.sgr.status < 0) + ret = ctx.sgr.status; + else if (ctx.sgr.bytes != len) + ret = -EIO; + + destroy_timer_on_stack(&ctx.timer); return ret; } @@ -223,10 +273,9 @@ vunmap: static int gud_flush_rect(struct gud_device *gdrm, struct drm_framebuffer *fb, const struct drm_format_info *format, struct drm_rect *rect) { - struct usb_device *usb = gud_to_usb_device(gdrm); struct gud_set_buffer_req req; - int ret, actual_length; size_t len, trlen; + int ret; drm_dbg(&gdrm->drm, "Flushing [FB:%d] " DRM_RECT_FMT "\n", fb->base.id, DRM_RECT_ARG(rect)); @@ -255,10 +304,7 @@ static int gud_flush_rect(struct gud_device *gdrm, struct drm_framebuffer *fb, return ret; } - ret = usb_bulk_msg(usb, gdrm->bulk_pipe, gdrm->bulk_buf, trlen, - &actual_length, msecs_to_jiffies(3000)); - if (!ret && trlen != actual_length) - ret = -EIO; + ret = gud_usb_bulk(gdrm, trlen); if (ret) gdrm->stats_num_errors++; @@ -543,6 +589,8 @@ void gud_pipe_update(struct drm_simple_display_pipe *pipe, if (gdrm->flags & GUD_DISPLAY_FLAG_FULL_UPDATE) drm_rect_init(&damage, 0, 0, fb->width, fb->height); gud_fb_queue_damage(gdrm, fb, &damage); + if (!gud_async_flush) + flush_work(&gdrm->work); } if (!crtc->state->enable) diff --git a/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_de.c b/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_de.c index fa8da0ef707e..89bed78f1466 100644 --- a/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_de.c +++ b/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_de.c @@ -152,8 +152,7 @@ static const struct drm_plane_funcs hibmc_plane_funcs = { }; static const struct drm_plane_helper_funcs hibmc_plane_helper_funcs = { - .prepare_fb = drm_gem_vram_plane_helper_prepare_fb, - .cleanup_fb = drm_gem_vram_plane_helper_cleanup_fb, + DRM_GEM_VRAM_PLANE_HELPER_FUNCS, .atomic_check = hibmc_plane_atomic_check, .atomic_update = hibmc_plane_atomic_update, }; diff --git a/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_drv.c b/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_drv.c index f4bc5386574a..610fc8e135f9 100644 --- a/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_drv.c +++ b/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_drv.c @@ -19,7 +19,6 @@ #include <drm/drm_drv.h> #include <drm/drm_gem_framebuffer_helper.h> #include <drm/drm_gem_vram_helper.h> -#include <drm/drm_irq.h> #include <drm/drm_managed.h> #include <drm/drm_vblank.h> @@ -28,7 +27,7 @@ DEFINE_DRM_GEM_FOPS(hibmc_fops); -static irqreturn_t hibmc_drm_interrupt(int irq, void *arg) +static irqreturn_t hibmc_interrupt(int irq, void *arg) { struct drm_device *dev = (struct drm_device *)arg; struct hibmc_drm_private *priv = to_hibmc_drm_private(dev); @@ -63,7 +62,6 @@ static const struct drm_driver hibmc_driver = { .dumb_create = hibmc_dumb_create, .dumb_map_offset = drm_gem_ttm_dumb_map_offset, .gem_prime_mmap = drm_gem_prime_mmap, - .irq_handler = hibmc_drm_interrupt, }; static int __maybe_unused hibmc_pm_suspend(struct device *dev) @@ -251,10 +249,11 @@ static int hibmc_hw_init(struct hibmc_drm_private *priv) static int hibmc_unload(struct drm_device *dev) { + struct pci_dev *pdev = to_pci_dev(dev->dev); + drm_atomic_helper_shutdown(dev); - if (dev->irq_enabled) - drm_irq_uninstall(dev); + free_irq(pdev->irq, dev); pci_disable_msi(to_pci_dev(dev->dev)); @@ -291,7 +290,9 @@ static int hibmc_load(struct drm_device *dev) if (ret) { drm_warn(dev, "enabling MSI failed: %d\n", ret); } else { - ret = drm_irq_install(dev, pdev->irq); + /* PCI devices require shared interrupts. */ + ret = request_irq(pdev->irq, hibmc_interrupt, IRQF_SHARED, + dev->driver->name, dev); if (ret) drm_warn(dev, "install irq failed: %d\n", ret); } @@ -314,7 +315,7 @@ static int hibmc_pci_probe(struct pci_dev *pdev, struct drm_device *dev; int ret; - ret = drm_aperture_remove_conflicting_pci_framebuffers(pdev, "hibmcdrmfb"); + ret = drm_aperture_remove_conflicting_pci_framebuffers(pdev, &hibmc_driver); if (ret) return ret; diff --git a/drivers/gpu/drm/hisilicon/kirin/dw_drm_dsi.c b/drivers/gpu/drm/hisilicon/kirin/dw_drm_dsi.c index 9b565a057340..952cfdb1961d 100644 --- a/drivers/gpu/drm/hisilicon/kirin/dw_drm_dsi.c +++ b/drivers/gpu/drm/hisilicon/kirin/dw_drm_dsi.c @@ -769,16 +769,9 @@ static int dsi_bridge_init(struct drm_device *dev, struct dw_dsi *dsi) { struct drm_encoder *encoder = &dsi->encoder; struct drm_bridge *bridge = dsi->bridge; - int ret; /* associate the bridge to dsi encoder */ - ret = drm_bridge_attach(encoder, bridge, NULL, 0); - if (ret) { - DRM_ERROR("failed to attach external bridge\n"); - return ret; - } - - return 0; + return drm_bridge_attach(encoder, bridge, NULL, 0); } static int dsi_bind(struct device *dev, struct device *master, void *data) diff --git a/drivers/gpu/drm/hisilicon/kirin/kirin_drm_drv.c b/drivers/gpu/drm/hisilicon/kirin/kirin_drm_drv.c index e590e19db657..98ae9a48f3fe 100644 --- a/drivers/gpu/drm/hisilicon/kirin/kirin_drm_drv.c +++ b/drivers/gpu/drm/hisilicon/kirin/kirin_drm_drv.c @@ -185,8 +185,6 @@ static int kirin_drm_kms_init(struct drm_device *dev, DRM_ERROR("failed to initialize vblank.\n"); goto err_unbind_all; } - /* with irq_enabled = true, we can use the vblank feature. */ - dev->irq_enabled = true; /* reset all the states of crtc/plane/encoder/connector */ drm_mode_config_reset(dev); diff --git a/drivers/gpu/drm/hyperv/hyperv_drm_drv.c b/drivers/gpu/drm/hyperv/hyperv_drm_drv.c index eb06c92c4bfd..cd818a629183 100644 --- a/drivers/gpu/drm/hyperv/hyperv_drm_drv.c +++ b/drivers/gpu/drm/hyperv/hyperv_drm_drv.c @@ -82,7 +82,7 @@ static int hyperv_setup_gen1(struct hyperv_drm_device *hv) return -ENODEV; } - ret = drm_aperture_remove_conflicting_pci_framebuffers(pdev, "hypervdrmfb"); + ret = drm_aperture_remove_conflicting_pci_framebuffers(pdev, &hyperv_driver); if (ret) { drm_err(dev, "Not able to remove boot fb\n"); return ret; @@ -127,7 +127,7 @@ static int hyperv_setup_gen2(struct hyperv_drm_device *hv, drm_aperture_remove_conflicting_framebuffers(screen_info.lfb_base, screen_info.lfb_size, false, - "hypervdrmfb"); + &hyperv_driver); hv->fb_size = (unsigned long)hv->mmio_megabytes * 1024 * 1024; diff --git a/drivers/gpu/drm/hyperv/hyperv_drm_modeset.c b/drivers/gpu/drm/hyperv/hyperv_drm_modeset.c index 3aaee4730ec6..6dd4717d3e1e 100644 --- a/drivers/gpu/drm/hyperv/hyperv_drm_modeset.c +++ b/drivers/gpu/drm/hyperv/hyperv_drm_modeset.c @@ -105,7 +105,7 @@ static void hyperv_pipe_enable(struct drm_simple_display_pipe *pipe, crtc_state->mode.hdisplay, crtc_state->mode.vdisplay, plane_state->fb->pitches[0]); - hyperv_blit_to_vram_fullscreen(plane_state->fb, &shadow_plane_state->map[0]); + hyperv_blit_to_vram_fullscreen(plane_state->fb, &shadow_plane_state->data[0]); } static int hyperv_pipe_check(struct drm_simple_display_pipe *pipe, @@ -133,7 +133,7 @@ static void hyperv_pipe_update(struct drm_simple_display_pipe *pipe, struct drm_rect rect; if (drm_atomic_helper_damage_merged(old_state, state, &rect)) { - hyperv_blit_to_vram_rect(state->fb, &shadow_plane_state->map[0], &rect); + hyperv_blit_to_vram_rect(state->fb, &shadow_plane_state->data[0], &rect); hyperv_update_dirt(hv->hdev, &rect); } } diff --git a/drivers/gpu/drm/i810/i810_dma.c b/drivers/gpu/drm/i810/i810_dma.c index d78c82af367c..9fb4dd63342f 100644 --- a/drivers/gpu/drm/i810/i810_dma.c +++ b/drivers/gpu/drm/i810/i810_dma.c @@ -38,7 +38,6 @@ #include <drm/drm_drv.h> #include <drm/drm_file.h> #include <drm/drm_ioctl.h> -#include <drm/drm_irq.h> #include <drm/drm_print.h> #include <drm/i810_drm.h> @@ -209,7 +208,7 @@ static int i810_dma_cleanup(struct drm_device *dev) * is freed, it's too late. */ if (drm_core_check_feature(dev, DRIVER_HAVE_IRQ) && dev->irq_enabled) - drm_irq_uninstall(dev); + drm_legacy_irq_uninstall(dev); if (dev->dev_private) { int i; diff --git a/drivers/gpu/drm/i915/Kconfig.debug b/drivers/gpu/drm/i915/Kconfig.debug index 72a38f28393f..47e845353ffa 100644 --- a/drivers/gpu/drm/i915/Kconfig.debug +++ b/drivers/gpu/drm/i915/Kconfig.debug @@ -207,6 +207,8 @@ config DRM_I915_LOW_LEVEL_TRACEPOINTS This provides the ability to precisely monitor engine utilisation and also analyze the request dependency resolving timeline. + Recommended for driver developers only. + If in doubt, say "N". config DRM_I915_DEBUG_VBLANK_EVADE @@ -220,6 +222,8 @@ config DRM_I915_DEBUG_VBLANK_EVADE is exceeded, even if there isn't an actual risk of missing the vblank. + Recommended for driver developers only. + If in doubt, say "N". config DRM_I915_DEBUG_RUNTIME_PM @@ -232,4 +236,6 @@ config DRM_I915_DEBUG_RUNTIME_PM runtime PM functionality. This may introduce overhead during driver loading, suspend and resume operations. + Recommended for driver developers only. + If in doubt, say "N" diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index 4f22cac1c49b..642a5b5a1b81 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -38,6 +38,7 @@ i915-y += i915_drv.o \ i915_irq.o \ i915_getparam.o \ i915_mitigations.o \ + i915_module.o \ i915_params.o \ i915_pci.o \ i915_scatterlist.o \ @@ -89,7 +90,6 @@ gt-y += \ gt/gen8_ppgtt.o \ gt/intel_breadcrumbs.o \ gt/intel_context.o \ - gt/intel_context_param.o \ gt/intel_context_sseu.o \ gt/intel_engine_cs.o \ gt/intel_engine_heartbeat.o \ @@ -108,6 +108,7 @@ gt-y += \ gt/intel_gtt.o \ gt/intel_llc.o \ gt/intel_lrc.o \ + gt/intel_migrate.o \ gt/intel_mocs.o \ gt/intel_ppgtt.o \ gt/intel_rc6.o \ @@ -135,7 +136,6 @@ i915-y += $(gt-y) gem-y += \ gem/i915_gem_busy.o \ gem/i915_gem_clflush.o \ - gem/i915_gem_client_blt.o \ gem/i915_gem_context.o \ gem/i915_gem_create.o \ gem/i915_gem_dmabuf.o \ @@ -143,7 +143,6 @@ gem-y += \ gem/i915_gem_execbuffer.o \ gem/i915_gem_internal.o \ gem/i915_gem_object.o \ - gem/i915_gem_object_blt.o \ gem/i915_gem_lmem.o \ gem/i915_gem_mman.o \ gem/i915_gem_pages.o \ @@ -155,21 +154,24 @@ gem-y += \ gem/i915_gem_stolen.o \ gem/i915_gem_throttle.o \ gem/i915_gem_tiling.o \ + gem/i915_gem_ttm.o \ gem/i915_gem_userptr.o \ gem/i915_gem_wait.o \ gem/i915_gemfs.o i915-y += \ $(gem-y) \ i915_active.o \ + i915_buddy.o \ i915_cmd_parser.o \ i915_gem_evict.o \ i915_gem_gtt.o \ + i915_gem_ww.o \ i915_gem.o \ - i915_globals.o \ i915_query.o \ i915_request.o \ i915_scheduler.o \ i915_trace_points.o \ + i915_ttm_buddy_manager.o \ i915_vma.o \ intel_wopcm.o @@ -184,6 +186,8 @@ i915-y += gt/uc/intel_uc.o \ gt/uc/intel_guc_fw.o \ gt/uc/intel_guc_log.o \ gt/uc/intel_guc_log_debugfs.o \ + gt/uc/intel_guc_rc.o \ + gt/uc/intel_guc_slpc.o \ gt/uc/intel_guc_submission.o \ gt/uc/intel_huc.o \ gt/uc/intel_huc_debugfs.o \ @@ -265,6 +269,7 @@ i915-y += \ display/intel_pps.o \ display/intel_qp_tables.o \ display/intel_sdvo.o \ + display/intel_snps_phy.o \ display/intel_tv.o \ display/intel_vdsc.o \ display/intel_vrr.o \ @@ -276,7 +281,9 @@ i915-y += i915_perf.o # Post-mortem debug and GPU hang state capture i915-$(CONFIG_DRM_I915_CAPTURE_ERROR) += i915_gpu_error.o i915-$(CONFIG_DRM_I915_SELFTEST) += \ + gem/selftests/i915_gem_client_blt.o \ gem/selftests/igt_gem_utils.o \ + selftests/intel_scheduler_helpers.o \ selftests/i915_random.o \ selftests/i915_selftest.o \ selftests/igt_atomic.o \ diff --git a/drivers/gpu/drm/i915/display/i9xx_plane.c b/drivers/gpu/drm/i915/display/i9xx_plane.c index 9643c45a2209..b1439ba78f67 100644 --- a/drivers/gpu/drm/i915/display/i9xx_plane.c +++ b/drivers/gpu/drm/i915/display/i9xx_plane.c @@ -912,7 +912,7 @@ intel_primary_plane_create(struct drm_i915_private *dev_priv, enum pipe pipe) zpos = 0; drm_plane_create_zpos_immutable_property(&plane->base, zpos); - drm_plane_helper_add(&plane->base, &intel_plane_helper_funcs); + intel_plane_helper_add(plane); return plane; diff --git a/drivers/gpu/drm/i915/display/icl_dsi.c b/drivers/gpu/drm/i915/display/icl_dsi.c index 16812488c5dd..43ec7fcd3f5d 100644 --- a/drivers/gpu/drm/i915/display/icl_dsi.c +++ b/drivers/gpu/drm/i915/display/icl_dsi.c @@ -729,8 +729,8 @@ gen11_dsi_configure_transcoder(struct intel_encoder *encoder, { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder); - struct intel_crtc *intel_crtc = to_intel_crtc(pipe_config->uapi.crtc); - enum pipe pipe = intel_crtc->pipe; + struct intel_crtc *crtc = to_intel_crtc(pipe_config->uapi.crtc); + enum pipe pipe = crtc->pipe; u32 tmp; enum port port; enum transcoder dsi_trans; @@ -1253,15 +1253,36 @@ static void gen11_dsi_pre_enable(struct intel_atomic_state *state, gen11_dsi_set_transcoder_timings(encoder, pipe_config); } +/* + * Wa_1409054076:icl,jsl,ehl + * When pipe A is disabled and MIPI DSI is enabled on pipe B, + * the AMT KVMR feature will incorrectly see pipe A as enabled. + * Set 0x42080 bit 23=1 before enabling DSI on pipe B and leave + * it set while DSI is enabled on pipe B + */ +static void icl_apply_kvmr_pipe_a_wa(struct intel_encoder *encoder, + enum pipe pipe, bool enable) +{ + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + + if (DISPLAY_VER(dev_priv) == 11 && pipe == PIPE_B) + intel_de_rmw(dev_priv, CHICKEN_PAR1_1, + IGNORE_KVMR_PIPE_A, + enable ? IGNORE_KVMR_PIPE_A : 0); +} static void gen11_dsi_enable(struct intel_atomic_state *state, struct intel_encoder *encoder, const struct intel_crtc_state *crtc_state, const struct drm_connector_state *conn_state) { struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder); + struct intel_crtc *crtc = to_intel_crtc(conn_state->crtc); drm_WARN_ON(state->base.dev, crtc_state->has_pch_encoder); + /* Wa_1409054076:icl,jsl,ehl */ + icl_apply_kvmr_pipe_a_wa(encoder, crtc->pipe, true); + /* step6d: enable dsi transcoder */ gen11_dsi_enable_transcoder(encoder); @@ -1415,6 +1436,7 @@ static void gen11_dsi_disable(struct intel_atomic_state *state, const struct drm_connector_state *old_conn_state) { struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder); + struct intel_crtc *crtc = to_intel_crtc(old_conn_state->crtc); /* step1: turn off backlight */ intel_dsi_vbt_exec_sequence(intel_dsi, MIPI_SEQ_BACKLIGHT_OFF); @@ -1423,6 +1445,9 @@ static void gen11_dsi_disable(struct intel_atomic_state *state, /* step2d,e: disable transcoder and wait */ gen11_dsi_disable_transcoder(encoder); + /* Wa_1409054076:icl,jsl,ehl */ + icl_apply_kvmr_pipe_a_wa(encoder, crtc->pipe, false); + /* step2f,g: powerdown panel */ gen11_dsi_powerdown_panel(encoder); @@ -1548,6 +1573,22 @@ static void gen11_dsi_get_config(struct intel_encoder *encoder, pipe_config->mode_flags |= I915_MODE_FLAG_DSI_PERIODIC_CMD_MODE; } +static void gen11_dsi_sync_state(struct intel_encoder *encoder, + const struct intel_crtc_state *crtc_state) +{ + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + struct intel_crtc *intel_crtc = to_intel_crtc(crtc_state->uapi.crtc); + enum pipe pipe = intel_crtc->pipe; + + /* wa verify 1409054076:icl,jsl,ehl */ + if (DISPLAY_VER(dev_priv) == 11 && pipe == PIPE_B && + !(intel_de_read(dev_priv, CHICKEN_PAR1_1) & IGNORE_KVMR_PIPE_A)) + drm_dbg_kms(&dev_priv->drm, + "[ENCODER:%d:%s] BIOS left IGNORE_KVMR_PIPE_A cleared with pipe B enabled\n", + encoder->base.base.id, + encoder->base.name); +} + static int gen11_dsi_dsc_compute_config(struct intel_encoder *encoder, struct intel_crtc_state *crtc_state) { @@ -1966,6 +2007,7 @@ void icl_dsi_init(struct drm_i915_private *dev_priv) encoder->post_disable = gen11_dsi_post_disable; encoder->port = port; encoder->get_config = gen11_dsi_get_config; + encoder->sync_state = gen11_dsi_sync_state; encoder->update_pipe = intel_panel_update_backlight; encoder->compute_config = gen11_dsi_compute_config; encoder->get_hw_state = gen11_dsi_get_hw_state; diff --git a/drivers/gpu/drm/i915/display/intel_acpi.c b/drivers/gpu/drm/i915/display/intel_acpi.c index 833d0c1be4f1..7cfe91fc05f2 100644 --- a/drivers/gpu/drm/i915/display/intel_acpi.c +++ b/drivers/gpu/drm/i915/display/intel_acpi.c @@ -19,6 +19,12 @@ static const guid_t intel_dsm_guid = GUID_INIT(0x7ed873d3, 0xc2d0, 0x4e4f, 0xa8, 0x54, 0x0f, 0x13, 0x17, 0xb0, 0x1c, 0x2c); +#define INTEL_DSM_FN_GET_BIOS_DATA_FUNCS_SUPPORTED 0 /* No args */ + +static const guid_t intel_dsm_guid2 = + GUID_INIT(0x3e5b41c6, 0xeb1d, 0x4260, + 0x9d, 0x15, 0xc7, 0x1f, 0xba, 0xda, 0xe4, 0x14); + static char *intel_dsm_port_name(u8 id) { switch (id) { @@ -176,6 +182,19 @@ void intel_unregister_dsm_handler(void) { } +void intel_dsm_get_bios_data_funcs_supported(struct drm_i915_private *i915) +{ + struct pci_dev *pdev = to_pci_dev(i915->drm.dev); + acpi_handle dhandle; + + dhandle = ACPI_HANDLE(&pdev->dev); + if (!dhandle) + return; + + acpi_evaluate_dsm(dhandle, &intel_dsm_guid2, INTEL_DSM_REVISION_ID, + INTEL_DSM_FN_GET_BIOS_DATA_FUNCS_SUPPORTED, NULL); +} + /* * ACPI Specification, Revision 5.0, Appendix B.3.2 _DOD (Enumerate All Devices * Attached to the Display Adapter). diff --git a/drivers/gpu/drm/i915/display/intel_acpi.h b/drivers/gpu/drm/i915/display/intel_acpi.h index e8b068661d22..9f197401c313 100644 --- a/drivers/gpu/drm/i915/display/intel_acpi.h +++ b/drivers/gpu/drm/i915/display/intel_acpi.h @@ -11,11 +11,14 @@ struct drm_i915_private; #ifdef CONFIG_ACPI void intel_register_dsm_handler(void); void intel_unregister_dsm_handler(void); +void intel_dsm_get_bios_data_funcs_supported(struct drm_i915_private *i915); void intel_acpi_device_id_update(struct drm_i915_private *i915); #else static inline void intel_register_dsm_handler(void) { return; } static inline void intel_unregister_dsm_handler(void) { return; } static inline +void intel_dsm_get_bios_data_funcs_supported(struct drm_i915_private *i915) { return; } +static inline void intel_acpi_device_id_update(struct drm_i915_private *i915) { return; } #endif /* CONFIG_ACPI */ diff --git a/drivers/gpu/drm/i915/display/intel_atomic_plane.c b/drivers/gpu/drm/i915/display/intel_atomic_plane.c index 36f52a1d7552..47234d898549 100644 --- a/drivers/gpu/drm/i915/display/intel_atomic_plane.c +++ b/drivers/gpu/drm/i915/display/intel_atomic_plane.c @@ -601,7 +601,12 @@ int intel_atomic_plane_check_clipping(struct intel_plane_state *plane_state, return 0; } -const struct drm_plane_helper_funcs intel_plane_helper_funcs = { +static const struct drm_plane_helper_funcs intel_plane_helper_funcs = { .prepare_fb = intel_prepare_plane_fb, .cleanup_fb = intel_cleanup_plane_fb, }; + +void intel_plane_helper_add(struct intel_plane *plane) +{ + drm_plane_helper_add(&plane->base, &intel_plane_helper_funcs); +} diff --git a/drivers/gpu/drm/i915/display/intel_atomic_plane.h b/drivers/gpu/drm/i915/display/intel_atomic_plane.h index dc4d05e75e1c..62e5a2a77fd4 100644 --- a/drivers/gpu/drm/i915/display/intel_atomic_plane.h +++ b/drivers/gpu/drm/i915/display/intel_atomic_plane.h @@ -17,8 +17,6 @@ struct intel_crtc_state; struct intel_plane; struct intel_plane_state; -extern const struct drm_plane_helper_funcs intel_plane_helper_funcs; - unsigned int intel_adjusted_rate(const struct drm_rect *src, const struct drm_rect *dst, unsigned int rate); @@ -65,5 +63,6 @@ int intel_atomic_plane_check_clipping(struct intel_plane_state *plane_state, bool can_position); void intel_plane_set_invisible(struct intel_crtc_state *crtc_state, struct intel_plane_state *plane_state); +void intel_plane_helper_add(struct intel_plane *plane); #endif /* __INTEL_ATOMIC_PLANE_H__ */ diff --git a/drivers/gpu/drm/i915/display/intel_audio.c b/drivers/gpu/drm/i915/display/intel_audio.c index 5f4f316b3ab5..532237588511 100644 --- a/drivers/gpu/drm/i915/display/intel_audio.c +++ b/drivers/gpu/drm/i915/display/intel_audio.c @@ -1001,7 +1001,7 @@ static unsigned long i915_audio_component_get_power(struct device *kdev) /* Catch potential impedance mismatches before they occur! */ BUILD_BUG_ON(sizeof(intel_wakeref_t) > sizeof(unsigned long)); - ret = intel_display_power_get(dev_priv, POWER_DOMAIN_AUDIO); + ret = intel_display_power_get(dev_priv, POWER_DOMAIN_AUDIO_PLAYBACK); if (dev_priv->audio_power_refcount++ == 0) { if (DISPLAY_VER(dev_priv) >= 9) { @@ -1034,7 +1034,7 @@ static void i915_audio_component_put_power(struct device *kdev, if (IS_GEMINILAKE(dev_priv)) glk_force_audio_cdclk(dev_priv, false); - intel_display_power_put(dev_priv, POWER_DOMAIN_AUDIO, cookie); + intel_display_power_put(dev_priv, POWER_DOMAIN_AUDIO_PLAYBACK, cookie); } static void i915_audio_component_codec_wake_override(struct device *kdev, diff --git a/drivers/gpu/drm/i915/display/intel_bios.c b/drivers/gpu/drm/i915/display/intel_bios.c index aa667fa71158..e86e6ed2d3bf 100644 --- a/drivers/gpu/drm/i915/display/intel_bios.c +++ b/drivers/gpu/drm/i915/display/intel_bios.c @@ -1871,12 +1871,12 @@ intel_bios_encoder_supports_edp(const struct intel_bios_encoder_data *devdata) static bool is_port_valid(struct drm_i915_private *i915, enum port port) { /* - * On some ICL/CNL SKUs port F is not present, but broken VBTs mark + * On some ICL SKUs port F is not present, but broken VBTs mark * the port as present. Only try to initialize port F for the * SKUs that may actually have it. */ - if (port == PORT_F && (IS_ICELAKE(i915) || IS_CANNONLAKE(i915))) - return IS_ICL_WITH_PORT_F(i915) || IS_CNL_WITH_PORT_F(i915); + if (port == PORT_F && IS_ICELAKE(i915)) + return IS_ICL_WITH_PORT_F(i915); return true; } @@ -1998,7 +1998,7 @@ static void parse_ddi_port(struct drm_i915_private *i915, "Port %c VBT HDMI boost level: %d\n", port_name(port), hdmi_boost_level); - /* DP max link rate for CNL+ */ + /* DP max link rate for GLK+ */ if (i915->vbt.version >= 216) { if (i915->vbt.version >= 230) info->dp_max_link_rate = parse_bdb_230_dp_max_link_rate(child->dp_max_link_rate); diff --git a/drivers/gpu/drm/i915/display/intel_bw.c b/drivers/gpu/drm/i915/display/intel_bw.c index bfb398f0432e..e91e0e0191fb 100644 --- a/drivers/gpu/drm/i915/display/intel_bw.c +++ b/drivers/gpu/drm/i915/display/intel_bw.c @@ -17,12 +17,53 @@ struct intel_qgv_point { u16 dclk, t_rp, t_rdpre, t_rc, t_ras, t_rcd; }; +struct intel_psf_gv_point { + u8 clk; /* clock in multiples of 16.6666 MHz */ +}; + struct intel_qgv_info { struct intel_qgv_point points[I915_NUM_QGV_POINTS]; + struct intel_psf_gv_point psf_points[I915_NUM_PSF_GV_POINTS]; u8 num_points; + u8 num_psf_points; u8 t_bl; }; +static int dg1_mchbar_read_qgv_point_info(struct drm_i915_private *dev_priv, + struct intel_qgv_point *sp, + int point) +{ + u32 dclk_ratio, dclk_reference; + u32 val; + + val = intel_uncore_read(&dev_priv->uncore, SA_PERF_STATUS_0_0_0_MCHBAR_PC); + dclk_ratio = REG_FIELD_GET(DG1_QCLK_RATIO_MASK, val); + if (val & DG1_QCLK_REFERENCE) + dclk_reference = 6; /* 6 * 16.666 MHz = 100 MHz */ + else + dclk_reference = 8; /* 8 * 16.666 MHz = 133 MHz */ + sp->dclk = dclk_ratio * dclk_reference; + + val = intel_uncore_read(&dev_priv->uncore, SKL_MC_BIOS_DATA_0_0_0_MCHBAR_PCU); + if (val & DG1_GEAR_TYPE) + sp->dclk *= 2; + + if (sp->dclk == 0) + return -EINVAL; + + val = intel_uncore_read(&dev_priv->uncore, MCHBAR_CH0_CR_TC_PRE_0_0_0_MCHBAR); + sp->t_rp = REG_FIELD_GET(DG1_DRAM_T_RP_MASK, val); + sp->t_rdpre = REG_FIELD_GET(DG1_DRAM_T_RDPRE_MASK, val); + + val = intel_uncore_read(&dev_priv->uncore, MCHBAR_CH0_CR_TC_PRE_0_0_0_MCHBAR_HIGH); + sp->t_rcd = REG_FIELD_GET(DG1_DRAM_T_RCD_MASK, val); + sp->t_ras = REG_FIELD_GET(DG1_DRAM_T_RAS_MASK, val); + + sp->t_rc = sp->t_rp + sp->t_ras; + + return 0; +} + static int icl_pcode_read_qgv_point_info(struct drm_i915_private *dev_priv, struct intel_qgv_point *sp, int point) @@ -49,6 +90,28 @@ static int icl_pcode_read_qgv_point_info(struct drm_i915_private *dev_priv, return 0; } +static int adls_pcode_read_psf_gv_point_info(struct drm_i915_private *dev_priv, + struct intel_psf_gv_point *points) +{ + u32 val = 0; + int ret; + int i; + + ret = sandybridge_pcode_read(dev_priv, + ICL_PCODE_MEM_SUBSYSYSTEM_INFO | + ADL_PCODE_MEM_SS_READ_PSF_GV_INFO, + &val, NULL); + if (ret) + return ret; + + for (i = 0; i < I915_NUM_PSF_GV_POINTS; i++) { + points[i].clk = val & 0xff; + val >>= 8; + } + + return 0; +} + int icl_pcode_restrict_qgv_points(struct drm_i915_private *dev_priv, u32 points_mask) { @@ -62,7 +125,7 @@ int icl_pcode_restrict_qgv_points(struct drm_i915_private *dev_priv, 1); if (ret < 0) { - drm_err(&dev_priv->drm, "Failed to disable qgv points (%d)\n", ret); + drm_err(&dev_priv->drm, "Failed to disable qgv points (%d) points: 0x%x\n", ret, points_mask); return ret; } @@ -76,6 +139,7 @@ static int icl_get_qgv_points(struct drm_i915_private *dev_priv, int i, ret; qi->num_points = dram_info->num_qgv_points; + qi->num_psf_points = dram_info->num_psf_gv_points; if (DISPLAY_VER(dev_priv) == 12) switch (dram_info->type) { @@ -99,7 +163,11 @@ static int icl_get_qgv_points(struct drm_i915_private *dev_priv, for (i = 0; i < qi->num_points; i++) { struct intel_qgv_point *sp = &qi->points[i]; - ret = icl_pcode_read_qgv_point_info(dev_priv, sp, i); + if (IS_DG1(dev_priv)) + ret = dg1_mchbar_read_qgv_point_info(dev_priv, sp, i); + else + ret = icl_pcode_read_qgv_point_info(dev_priv, sp, i); + if (ret) return ret; @@ -109,6 +177,19 @@ static int icl_get_qgv_points(struct drm_i915_private *dev_priv, sp->t_rcd, sp->t_rc); } + if (qi->num_psf_points > 0) { + ret = adls_pcode_read_psf_gv_point_info(dev_priv, qi->psf_points); + if (ret) { + drm_err(&dev_priv->drm, "Failed to read PSF point data; PSF points will not be considered in bandwidth calculations.\n"); + qi->num_psf_points = 0; + } + + for (i = 0; i < qi->num_psf_points; i++) + drm_dbg_kms(&dev_priv->drm, + "PSF GV %d: CLK=%d \n", + i, qi->psf_points[i].clk); + } + return 0; } @@ -118,6 +199,16 @@ static int icl_calc_bw(int dclk, int num, int den) return DIV_ROUND_CLOSEST(num * dclk * 100, den * 6); } +static int adl_calc_psf_bw(int clk) +{ + /* + * clk is multiples of 16.666MHz (100/6) + * According to BSpec PSF GV bandwidth is + * calculated as BW = 64 * clk * 16.666Mhz + */ + return DIV_ROUND_CLOSEST(64 * clk * 100, 6); +} + static int icl_sagv_max_dclk(const struct intel_qgv_info *qi) { u16 dclk = 0; @@ -194,6 +285,7 @@ static int icl_get_bw_info(struct drm_i915_private *dev_priv, const struct intel bi->num_planes = (ipqdepth - clpchgroup) / clpchgroup + 1; bi->num_qgv_points = qi.num_points; + bi->num_psf_gv_points = qi.num_psf_points; for (j = 0; j < qi.num_points; j++) { const struct intel_qgv_point *sp = &qi.points[j]; @@ -217,6 +309,16 @@ static int icl_get_bw_info(struct drm_i915_private *dev_priv, const struct intel i, j, bi->num_planes, bi->deratedbw[j]); } + for (j = 0; j < qi.num_psf_points; j++) { + const struct intel_psf_gv_point *sp = &qi.psf_points[j]; + + bi->psf_bw[j] = adl_calc_psf_bw(sp->clk); + + drm_dbg_kms(&dev_priv->drm, + "BW%d / PSF GV %d: num_planes=%d bw=%u\n", + i, j, bi->num_planes, bi->psf_bw[j]); + } + if (bi->num_planes == 1) break; } @@ -234,6 +336,26 @@ static int icl_get_bw_info(struct drm_i915_private *dev_priv, const struct intel return 0; } +static void dg2_get_bw_info(struct drm_i915_private *i915) +{ + struct intel_bw_info *bi = &i915->max_bw[0]; + + /* + * DG2 doesn't have SAGV or QGV points, just a constant max bandwidth + * that doesn't depend on the number of planes enabled. Create a + * single dummy QGV point to reflect that. DG2-G10 platforms have a + * constant 50 GB/s bandwidth, whereas DG2-G11 platforms have 38 GB/s. + */ + bi->num_planes = 1; + bi->num_qgv_points = 1; + if (IS_DG2_G11(i915)) + bi->deratedbw[0] = 38000; + else + bi->deratedbw[0] = 50000; + + i915->sagv_status = I915_SAGV_NOT_CONTROLLED; +} + static unsigned int icl_max_bw(struct drm_i915_private *dev_priv, int num_planes, int qgv_point) { @@ -262,12 +384,23 @@ static unsigned int icl_max_bw(struct drm_i915_private *dev_priv, return 0; } +static unsigned int adl_psf_bw(struct drm_i915_private *dev_priv, + int psf_gv_point) +{ + const struct intel_bw_info *bi = + &dev_priv->max_bw[0]; + + return bi->psf_bw[psf_gv_point]; +} + void intel_bw_init_hw(struct drm_i915_private *dev_priv) { if (!HAS_DISPLAY(dev_priv)) return; - if (IS_ALDERLAKE_S(dev_priv) || IS_ALDERLAKE_P(dev_priv)) + if (IS_DG2(dev_priv)) + dg2_get_bw_info(dev_priv); + else if (IS_ALDERLAKE_S(dev_priv) || IS_ALDERLAKE_P(dev_priv)) icl_get_bw_info(dev_priv, &adls_sa_info); else if (IS_ROCKETLAKE(dev_priv)) icl_get_bw_info(dev_priv, &rkl_sa_info); @@ -534,12 +667,24 @@ int intel_bw_atomic_check(struct intel_atomic_state *state) u32 allowed_points = 0; unsigned int max_bw_point = 0, max_bw = 0; unsigned int num_qgv_points = dev_priv->max_bw[0].num_qgv_points; - u32 mask = (1 << num_qgv_points) - 1; + unsigned int num_psf_gv_points = dev_priv->max_bw[0].num_psf_gv_points; + u32 mask = 0; /* FIXME earlier gens need some checks too */ if (DISPLAY_VER(dev_priv) < 11) return 0; + /* + * We can _not_ use the whole ADLS_QGV_PT_MASK here, as PCode rejects + * it with failure if we try masking any unadvertised points. + * So need to operate only with those returned from PCode. + */ + if (num_qgv_points > 0) + mask |= REG_GENMASK(num_qgv_points - 1, 0); + + if (num_psf_gv_points > 0) + mask |= REG_GENMASK(num_psf_gv_points - 1, 0) << ADLS_PSF_PT_SHIFT; + for_each_oldnew_intel_crtc_in_state(state, crtc, old_crtc_state, new_crtc_state, i) { unsigned int old_data_rate = @@ -602,23 +747,44 @@ int intel_bw_atomic_check(struct intel_atomic_state *state) max_bw = max_data_rate; } if (max_data_rate >= data_rate) - allowed_points |= BIT(i); + allowed_points |= REG_FIELD_PREP(ADLS_QGV_PT_MASK, BIT(i)); + drm_dbg_kms(&dev_priv->drm, "QGV point %d: max bw %d required %d\n", i, max_data_rate, data_rate); } + for (i = 0; i < num_psf_gv_points; i++) { + unsigned int max_data_rate = adl_psf_bw(dev_priv, i); + + if (max_data_rate >= data_rate) + allowed_points |= REG_FIELD_PREP(ADLS_PSF_PT_MASK, BIT(i)); + + drm_dbg_kms(&dev_priv->drm, "PSF GV point %d: max bw %d" + " required %d\n", + i, max_data_rate, data_rate); + } + /* * BSpec states that we always should have at least one allowed point * left, so if we couldn't - simply reject the configuration for obvious * reasons. */ - if (allowed_points == 0) { + if ((allowed_points & ADLS_QGV_PT_MASK) == 0) { drm_dbg_kms(&dev_priv->drm, "No QGV points provide sufficient memory" " bandwidth %d for display configuration(%d active planes).\n", data_rate, num_active_planes); return -EINVAL; } + if (num_psf_gv_points > 0) { + if ((allowed_points & ADLS_PSF_PT_MASK) == 0) { + drm_dbg_kms(&dev_priv->drm, "No PSF GV points provide sufficient memory" + " bandwidth %d for display configuration(%d active planes).\n", + data_rate, num_active_planes); + return -EINVAL; + } + } + /* * Leave only single point with highest bandwidth, if * we can't enable SAGV due to the increased memory latency it may diff --git a/drivers/gpu/drm/i915/display/intel_cdclk.c b/drivers/gpu/drm/i915/display/intel_cdclk.c index 613ffcc68eba..34fa4130d5c4 100644 --- a/drivers/gpu/drm/i915/display/intel_cdclk.c +++ b/drivers/gpu/drm/i915/display/intel_cdclk.c @@ -1195,17 +1195,6 @@ static const struct intel_cdclk_vals glk_cdclk_table[] = { {} }; -static const struct intel_cdclk_vals cnl_cdclk_table[] = { - { .refclk = 19200, .cdclk = 168000, .divider = 4, .ratio = 35 }, - { .refclk = 19200, .cdclk = 336000, .divider = 2, .ratio = 35 }, - { .refclk = 19200, .cdclk = 528000, .divider = 2, .ratio = 55 }, - - { .refclk = 24000, .cdclk = 168000, .divider = 4, .ratio = 28 }, - { .refclk = 24000, .cdclk = 336000, .divider = 2, .ratio = 28 }, - { .refclk = 24000, .cdclk = 528000, .divider = 2, .ratio = 44 }, - {} -}; - static const struct intel_cdclk_vals icl_cdclk_table[] = { { .refclk = 19200, .cdclk = 172800, .divider = 2, .ratio = 18 }, { .refclk = 19200, .cdclk = 192000, .divider = 2, .ratio = 20 }, @@ -1290,6 +1279,16 @@ static const struct intel_cdclk_vals adlp_cdclk_table[] = { {} }; +static const struct intel_cdclk_vals dg2_cdclk_table[] = { + { .refclk = 38400, .cdclk = 172800, .divider = 2, .ratio = 9 }, + { .refclk = 38400, .cdclk = 192000, .divider = 2, .ratio = 10 }, + { .refclk = 38400, .cdclk = 307200, .divider = 2, .ratio = 16 }, + { .refclk = 38400, .cdclk = 326400, .divider = 4, .ratio = 34 }, + { .refclk = 38400, .cdclk = 556800, .divider = 2, .ratio = 29 }, + { .refclk = 38400, .cdclk = 652800, .divider = 2, .ratio = 34 }, + {} +}; + static int bxt_calc_cdclk(struct drm_i915_private *dev_priv, int min_cdclk) { const struct intel_cdclk_vals *table = dev_priv->cdclk.table; @@ -1329,16 +1328,6 @@ static u8 bxt_calc_voltage_level(int cdclk) return DIV_ROUND_UP(cdclk, 25000); } -static u8 cnl_calc_voltage_level(int cdclk) -{ - if (cdclk > 336000) - return 2; - else if (cdclk > 168000) - return 1; - else - return 0; -} - static u8 icl_calc_voltage_level(int cdclk) { if (cdclk > 556800) @@ -1373,15 +1362,6 @@ static u8 tgl_calc_voltage_level(int cdclk) return 0; } -static void cnl_readout_refclk(struct drm_i915_private *dev_priv, - struct intel_cdclk_config *cdclk_config) -{ - if (intel_de_read(dev_priv, SKL_DSSM) & CNL_DSSM_CDCLK_PLL_REFCLK_24MHz) - cdclk_config->ref = 24000; - else - cdclk_config->ref = 19200; -} - static void icl_readout_refclk(struct drm_i915_private *dev_priv, struct intel_cdclk_config *cdclk_config) { @@ -1408,10 +1388,10 @@ static void bxt_de_pll_readout(struct drm_i915_private *dev_priv, { u32 val, ratio; - if (DISPLAY_VER(dev_priv) >= 11) + if (IS_DG2(dev_priv)) + cdclk_config->ref = 38400; + else if (DISPLAY_VER(dev_priv) >= 11) icl_readout_refclk(dev_priv, cdclk_config); - else if (IS_CANNONLAKE(dev_priv)) - cnl_readout_refclk(dev_priv, cdclk_config); else cdclk_config->ref = 19200; @@ -1427,11 +1407,11 @@ static void bxt_de_pll_readout(struct drm_i915_private *dev_priv, } /* - * CNL+ have the ratio directly in the PLL enable register, gen9lp had - * it in a separate PLL control register. + * DISPLAY_VER >= 11 have the ratio directly in the PLL enable register, + * gen9lp had it in a separate PLL control register. */ - if (DISPLAY_VER(dev_priv) >= 11 || IS_CANNONLAKE(dev_priv)) - ratio = val & CNL_CDCLK_PLL_RATIO_MASK; + if (DISPLAY_VER(dev_priv) >= 11) + ratio = val & ICL_CDCLK_PLL_RATIO_MASK; else ratio = intel_de_read(dev_priv, BXT_DE_PLL_CTL) & BXT_DE_PLL_RATIO_MASK; @@ -1518,7 +1498,7 @@ static void bxt_de_pll_enable(struct drm_i915_private *dev_priv, int vco) dev_priv->cdclk.hw.vco = vco; } -static void cnl_cdclk_pll_disable(struct drm_i915_private *dev_priv) +static void icl_cdclk_pll_disable(struct drm_i915_private *dev_priv) { intel_de_rmw(dev_priv, BXT_DE_PLL_ENABLE, BXT_DE_PLL_PLL_ENABLE, 0); @@ -1530,12 +1510,12 @@ static void cnl_cdclk_pll_disable(struct drm_i915_private *dev_priv) dev_priv->cdclk.hw.vco = 0; } -static void cnl_cdclk_pll_enable(struct drm_i915_private *dev_priv, int vco) +static void icl_cdclk_pll_enable(struct drm_i915_private *dev_priv, int vco) { int ratio = DIV_ROUND_CLOSEST(vco, dev_priv->cdclk.hw.ref); u32 val; - val = CNL_CDCLK_PLL_RATIO(ratio); + val = ICL_CDCLK_PLL_RATIO(ratio); intel_de_write(dev_priv, BXT_DE_PLL_ENABLE, val); val |= BXT_DE_PLL_PLL_ENABLE; @@ -1548,18 +1528,13 @@ static void cnl_cdclk_pll_enable(struct drm_i915_private *dev_priv, int vco) dev_priv->cdclk.hw.vco = vco; } -static bool has_cdclk_crawl(struct drm_i915_private *i915) -{ - return INTEL_INFO(i915)->has_cdclk_crawl; -} - static void adlp_cdclk_pll_crawl(struct drm_i915_private *dev_priv, int vco) { int ratio = DIV_ROUND_CLOSEST(vco, dev_priv->cdclk.hw.ref); u32 val; /* Write PLL ratio without disabling */ - val = CNL_CDCLK_PLL_RATIO(ratio) | BXT_DE_PLL_PLL_ENABLE; + val = ICL_CDCLK_PLL_RATIO(ratio) | BXT_DE_PLL_PLL_ENABLE; intel_de_write(dev_priv, BXT_DE_PLL_ENABLE, val); /* Submit freq change request */ @@ -1628,7 +1603,7 @@ static void bxt_set_cdclk(struct drm_i915_private *dev_priv, int ret; /* Inform power controller of upcoming frequency change. */ - if (DISPLAY_VER(dev_priv) >= 11 || IS_CANNONLAKE(dev_priv)) + if (DISPLAY_VER(dev_priv) >= 11) ret = skl_pcode_request(dev_priv, SKL_PCODE_CDCLK_CONTROL, SKL_CDCLK_PREPARE_FOR_CHANGE, SKL_CDCLK_READY_FOR_CHANGE, @@ -1649,16 +1624,16 @@ static void bxt_set_cdclk(struct drm_i915_private *dev_priv, return; } - if (has_cdclk_crawl(dev_priv) && dev_priv->cdclk.hw.vco > 0 && vco > 0) { + if (HAS_CDCLK_CRAWL(dev_priv) && dev_priv->cdclk.hw.vco > 0 && vco > 0) { if (dev_priv->cdclk.hw.vco != vco) adlp_cdclk_pll_crawl(dev_priv, vco); - } else if (DISPLAY_VER(dev_priv) >= 11 || IS_CANNONLAKE(dev_priv)) { + } else if (DISPLAY_VER(dev_priv) >= 11) { if (dev_priv->cdclk.hw.vco != 0 && dev_priv->cdclk.hw.vco != vco) - cnl_cdclk_pll_disable(dev_priv); + icl_cdclk_pll_disable(dev_priv); if (dev_priv->cdclk.hw.vco != vco) - cnl_cdclk_pll_enable(dev_priv, vco); + icl_cdclk_pll_enable(dev_priv, vco); } else { if (dev_priv->cdclk.hw.vco != 0 && dev_priv->cdclk.hw.vco != vco) @@ -1684,7 +1659,7 @@ static void bxt_set_cdclk(struct drm_i915_private *dev_priv, if (pipe != INVALID_PIPE) intel_wait_for_vblank(dev_priv, pipe); - if (DISPLAY_VER(dev_priv) >= 11 || IS_CANNONLAKE(dev_priv)) { + if (DISPLAY_VER(dev_priv) >= 11) { ret = sandybridge_pcode_write(dev_priv, SKL_PCODE_CDCLK_CONTROL, cdclk_config->voltage_level); } else { @@ -1709,7 +1684,7 @@ static void bxt_set_cdclk(struct drm_i915_private *dev_priv, intel_update_cdclk(dev_priv); - if (DISPLAY_VER(dev_priv) >= 11 || IS_CANNONLAKE(dev_priv)) + if (DISPLAY_VER(dev_priv) >= 11) /* * Can't read out the voltage level :( * Let's just assume everything is as expected. @@ -1857,7 +1832,7 @@ static bool intel_cdclk_can_crawl(struct drm_i915_private *dev_priv, { int a_div, b_div; - if (!has_cdclk_crawl(dev_priv)) + if (!HAS_CDCLK_CRAWL(dev_priv)) return false; /* @@ -2118,7 +2093,7 @@ int intel_crtc_compute_min_cdclk(const struct intel_crtc_state *crtc_state) crtc_state->port_clock >= 540000 && crtc_state->lane_count == 4) { if (DISPLAY_VER(dev_priv) == 10) { - /* Display WA #1145: glk,cnl */ + /* Display WA #1145: glk */ min_cdclk = max(316800, min_cdclk); } else if (DISPLAY_VER(dev_priv) == 9 || IS_BROADWELL(dev_priv)) { /* Display WA #1144: skl,bxt */ @@ -2239,7 +2214,7 @@ static int intel_compute_min_cdclk(struct intel_cdclk_state *cdclk_state) /* * Account for port clock min voltage level requirements. - * This only really does something on CNL+ but can be + * This only really does something on DISPLA_VER >= 11 but can be * called on earlier platforms as well. * * Note that this functions assumes that 0 is @@ -2653,8 +2628,6 @@ void intel_update_max_cdclk(struct drm_i915_private *dev_priv) dev_priv->max_cdclk_freq = 648000; else dev_priv->max_cdclk_freq = 652800; - } else if (IS_CANNONLAKE(dev_priv)) { - dev_priv->max_cdclk_freq = 528000; } else if (IS_GEMINILAKE(dev_priv)) { dev_priv->max_cdclk_freq = 316800; } else if (IS_BROXTON(dev_priv)) { @@ -2878,13 +2851,19 @@ u32 intel_read_rawclk(struct drm_i915_private *dev_priv) */ void intel_init_cdclk_hooks(struct drm_i915_private *dev_priv) { - if (IS_ALDERLAKE_P(dev_priv)) { + if (IS_DG2(dev_priv)) { + dev_priv->display.set_cdclk = bxt_set_cdclk; + dev_priv->display.bw_calc_min_cdclk = skl_bw_calc_min_cdclk; + dev_priv->display.modeset_calc_cdclk = bxt_modeset_calc_cdclk; + dev_priv->display.calc_voltage_level = tgl_calc_voltage_level; + dev_priv->cdclk.table = dg2_cdclk_table; + } else if (IS_ALDERLAKE_P(dev_priv)) { dev_priv->display.set_cdclk = bxt_set_cdclk; dev_priv->display.bw_calc_min_cdclk = skl_bw_calc_min_cdclk; dev_priv->display.modeset_calc_cdclk = bxt_modeset_calc_cdclk; dev_priv->display.calc_voltage_level = tgl_calc_voltage_level; - /* Wa_22011320316:adlp[a0] */ - if (IS_ADLP_DISPLAY_STEP(dev_priv, STEP_A0, STEP_A0)) + /* Wa_22011320316:adl-p[a0] */ + if (IS_ADLP_DISPLAY_STEP(dev_priv, STEP_A0, STEP_B0)) dev_priv->cdclk.table = adlp_a_step_cdclk_table; else dev_priv->cdclk.table = adlp_cdclk_table; @@ -2912,12 +2891,6 @@ void intel_init_cdclk_hooks(struct drm_i915_private *dev_priv) dev_priv->display.modeset_calc_cdclk = bxt_modeset_calc_cdclk; dev_priv->display.calc_voltage_level = icl_calc_voltage_level; dev_priv->cdclk.table = icl_cdclk_table; - } else if (IS_CANNONLAKE(dev_priv)) { - dev_priv->display.bw_calc_min_cdclk = skl_bw_calc_min_cdclk; - dev_priv->display.set_cdclk = bxt_set_cdclk; - dev_priv->display.modeset_calc_cdclk = bxt_modeset_calc_cdclk; - dev_priv->display.calc_voltage_level = cnl_calc_voltage_level; - dev_priv->cdclk.table = cnl_cdclk_table; } else if (IS_GEMINILAKE(dev_priv) || IS_BROXTON(dev_priv)) { dev_priv->display.bw_calc_min_cdclk = skl_bw_calc_min_cdclk; dev_priv->display.set_cdclk = bxt_set_cdclk; diff --git a/drivers/gpu/drm/i915/display/intel_color.c b/drivers/gpu/drm/i915/display/intel_color.c index dab892d2251b..afcb4bf3826c 100644 --- a/drivers/gpu/drm/i915/display/intel_color.c +++ b/drivers/gpu/drm/i915/display/intel_color.c @@ -305,13 +305,12 @@ static void ilk_load_csc_matrix(const struct intel_crtc_state *crtc_state) ilk_csc_postoff_limited_range); } else if (crtc_state->csc_enable) { /* - * On GLK+ both pipe CSC and degamma LUT are controlled + * On GLK both pipe CSC and degamma LUT are controlled * by csc_enable. Hence for the cases where the degama * LUT is needed but CSC is not we need to load an * identity matrix. */ - drm_WARN_ON(&dev_priv->drm, !IS_CANNONLAKE(dev_priv) && - !IS_GEMINILAKE(dev_priv)); + drm_WARN_ON(&dev_priv->drm, !IS_GEMINILAKE(dev_priv)); ilk_update_pipe_csc(crtc, ilk_csc_off_zero, ilk_csc_coeff_identity, diff --git a/drivers/gpu/drm/i915/display/intel_combo_phy.c b/drivers/gpu/drm/i915/display/intel_combo_phy.c index 487c54cd5982..bacdf8a16bcb 100644 --- a/drivers/gpu/drm/i915/display/intel_combo_phy.c +++ b/drivers/gpu/drm/i915/display/intel_combo_phy.c @@ -23,9 +23,9 @@ enum { PROCMON_1_05V_DOT_1, }; -static const struct cnl_procmon { +static const struct icl_procmon { u32 dw1, dw9, dw10; -} cnl_procmon_values[] = { +} icl_procmon_values[] = { [PROCMON_0_85V_DOT_0] = { .dw1 = 0x00000000, .dw9 = 0x62AB67BB, .dw10 = 0x51914F96, }, [PROCMON_0_95V_DOT_0] = @@ -38,15 +38,10 @@ static const struct cnl_procmon { { .dw1 = 0x00440000, .dw9 = 0x9A00AB25, .dw10 = 0x8AE38FF1, }, }; -/* - * CNL has just one set of registers, while gen11 has a set for each combo PHY. - * The CNL registers are equivalent to the gen11 PHY A registers, that's why we - * call the ICL macros even though the function has CNL on its name. - */ -static const struct cnl_procmon * -cnl_get_procmon_ref_values(struct drm_i915_private *dev_priv, enum phy phy) +static const struct icl_procmon * +icl_get_procmon_ref_values(struct drm_i915_private *dev_priv, enum phy phy) { - const struct cnl_procmon *procmon; + const struct icl_procmon *procmon; u32 val; val = intel_de_read(dev_priv, ICL_PORT_COMP_DW3(phy)); @@ -55,32 +50,32 @@ cnl_get_procmon_ref_values(struct drm_i915_private *dev_priv, enum phy phy) MISSING_CASE(val); fallthrough; case VOLTAGE_INFO_0_85V | PROCESS_INFO_DOT_0: - procmon = &cnl_procmon_values[PROCMON_0_85V_DOT_0]; + procmon = &icl_procmon_values[PROCMON_0_85V_DOT_0]; break; case VOLTAGE_INFO_0_95V | PROCESS_INFO_DOT_0: - procmon = &cnl_procmon_values[PROCMON_0_95V_DOT_0]; + procmon = &icl_procmon_values[PROCMON_0_95V_DOT_0]; break; case VOLTAGE_INFO_0_95V | PROCESS_INFO_DOT_1: - procmon = &cnl_procmon_values[PROCMON_0_95V_DOT_1]; + procmon = &icl_procmon_values[PROCMON_0_95V_DOT_1]; break; case VOLTAGE_INFO_1_05V | PROCESS_INFO_DOT_0: - procmon = &cnl_procmon_values[PROCMON_1_05V_DOT_0]; + procmon = &icl_procmon_values[PROCMON_1_05V_DOT_0]; break; case VOLTAGE_INFO_1_05V | PROCESS_INFO_DOT_1: - procmon = &cnl_procmon_values[PROCMON_1_05V_DOT_1]; + procmon = &icl_procmon_values[PROCMON_1_05V_DOT_1]; break; } return procmon; } -static void cnl_set_procmon_ref_values(struct drm_i915_private *dev_priv, +static void icl_set_procmon_ref_values(struct drm_i915_private *dev_priv, enum phy phy) { - const struct cnl_procmon *procmon; + const struct icl_procmon *procmon; u32 val; - procmon = cnl_get_procmon_ref_values(dev_priv, phy); + procmon = icl_get_procmon_ref_values(dev_priv, phy); val = intel_de_read(dev_priv, ICL_PORT_COMP_DW1(phy)); val &= ~((0xff << 16) | 0xff); @@ -109,13 +104,13 @@ static bool check_phy_reg(struct drm_i915_private *dev_priv, return true; } -static bool cnl_verify_procmon_ref_values(struct drm_i915_private *dev_priv, +static bool icl_verify_procmon_ref_values(struct drm_i915_private *dev_priv, enum phy phy) { - const struct cnl_procmon *procmon; + const struct icl_procmon *procmon; bool ret; - procmon = cnl_get_procmon_ref_values(dev_priv, phy); + procmon = icl_get_procmon_ref_values(dev_priv, phy); ret = check_phy_reg(dev_priv, phy, ICL_PORT_COMP_DW1(phy), (0xff << 16) | 0xff, procmon->dw1); @@ -127,61 +122,6 @@ static bool cnl_verify_procmon_ref_values(struct drm_i915_private *dev_priv, return ret; } -static bool cnl_combo_phy_enabled(struct drm_i915_private *dev_priv) -{ - return !(intel_de_read(dev_priv, CHICKEN_MISC_2) & CNL_COMP_PWR_DOWN) && - (intel_de_read(dev_priv, CNL_PORT_COMP_DW0) & COMP_INIT); -} - -static bool cnl_combo_phy_verify_state(struct drm_i915_private *dev_priv) -{ - enum phy phy = PHY_A; - bool ret; - - if (!cnl_combo_phy_enabled(dev_priv)) - return false; - - ret = cnl_verify_procmon_ref_values(dev_priv, phy); - - ret &= check_phy_reg(dev_priv, phy, CNL_PORT_CL1CM_DW5, - CL_POWER_DOWN_ENABLE, CL_POWER_DOWN_ENABLE); - - return ret; -} - -static void cnl_combo_phys_init(struct drm_i915_private *dev_priv) -{ - u32 val; - - val = intel_de_read(dev_priv, CHICKEN_MISC_2); - val &= ~CNL_COMP_PWR_DOWN; - intel_de_write(dev_priv, CHICKEN_MISC_2, val); - - /* Dummy PORT_A to get the correct CNL register from the ICL macro */ - cnl_set_procmon_ref_values(dev_priv, PHY_A); - - val = intel_de_read(dev_priv, CNL_PORT_COMP_DW0); - val |= COMP_INIT; - intel_de_write(dev_priv, CNL_PORT_COMP_DW0, val); - - val = intel_de_read(dev_priv, CNL_PORT_CL1CM_DW5); - val |= CL_POWER_DOWN_ENABLE; - intel_de_write(dev_priv, CNL_PORT_CL1CM_DW5, val); -} - -static void cnl_combo_phys_uninit(struct drm_i915_private *dev_priv) -{ - u32 val; - - if (!cnl_combo_phy_verify_state(dev_priv)) - drm_warn(&dev_priv->drm, - "Combo PHY HW state changed unexpectedly.\n"); - - val = intel_de_read(dev_priv, CHICKEN_MISC_2); - val |= CNL_COMP_PWR_DOWN; - intel_de_write(dev_priv, CHICKEN_MISC_2, val); -} - static bool has_phy_misc(struct drm_i915_private *i915, enum phy phy) { /* @@ -291,7 +231,7 @@ static bool icl_combo_phy_verify_state(struct drm_i915_private *dev_priv, DCC_MODE_SELECT_CONTINUOSLY); } - ret &= cnl_verify_procmon_ref_values(dev_priv, phy); + ret &= icl_verify_procmon_ref_values(dev_priv, phy); if (phy_is_master(dev_priv, phy)) { ret &= check_phy_reg(dev_priv, phy, ICL_PORT_COMP_DW8(phy), @@ -415,7 +355,7 @@ skip_phy_misc: intel_de_write(dev_priv, ICL_PORT_PCS_DW1_GRP(phy), val); } - cnl_set_procmon_ref_values(dev_priv, phy); + icl_set_procmon_ref_values(dev_priv, phy); if (phy_is_master(dev_priv, phy)) { val = intel_de_read(dev_priv, ICL_PORT_COMP_DW8(phy)); @@ -474,16 +414,10 @@ skip_phy_misc: void intel_combo_phy_init(struct drm_i915_private *i915) { - if (DISPLAY_VER(i915) >= 11) - icl_combo_phys_init(i915); - else if (IS_CANNONLAKE(i915)) - cnl_combo_phys_init(i915); + icl_combo_phys_init(i915); } void intel_combo_phy_uninit(struct drm_i915_private *i915) { - if (DISPLAY_VER(i915) >= 11) - icl_combo_phys_uninit(i915); - else if (IS_CANNONLAKE(i915)) - cnl_combo_phys_uninit(i915); + icl_combo_phys_uninit(i915); } diff --git a/drivers/gpu/drm/i915/display/intel_crt.c b/drivers/gpu/drm/i915/display/intel_crt.c index 648f1c0d3d39..408f82b0dc7d 100644 --- a/drivers/gpu/drm/i915/display/intel_crt.c +++ b/drivers/gpu/drm/i915/display/intel_crt.c @@ -38,6 +38,7 @@ #include "intel_crt.h" #include "intel_crtc.h" #include "intel_ddi.h" +#include "intel_ddi_buf_trans.h" #include "intel_de.h" #include "intel_display_types.h" #include "intel_fdi.h" @@ -1081,6 +1082,8 @@ void intel_crt_init(struct drm_i915_private *dev_priv) crt->base.enable_clock = hsw_ddi_enable_clock; crt->base.disable_clock = hsw_ddi_disable_clock; crt->base.is_clock_enabled = hsw_ddi_is_clock_enabled; + + intel_ddi_buf_trans_init(&crt->base); } else { if (HAS_PCH_SPLIT(dev_priv)) { crt->base.compute_config = pch_crt_compute_config; diff --git a/drivers/gpu/drm/i915/display/intel_crtc.c b/drivers/gpu/drm/i915/display/intel_crtc.c index 95ff1707b4bd..254e67141a77 100644 --- a/drivers/gpu/drm/i915/display/intel_crtc.c +++ b/drivers/gpu/drm/i915/display/intel_crtc.c @@ -163,12 +163,12 @@ static void intel_crtc_free(struct intel_crtc *crtc) kfree(crtc); } -static void intel_crtc_destroy(struct drm_crtc *crtc) +static void intel_crtc_destroy(struct drm_crtc *_crtc) { - struct intel_crtc *intel_crtc = to_intel_crtc(crtc); + struct intel_crtc *crtc = to_intel_crtc(_crtc); - drm_crtc_cleanup(crtc); - kfree(intel_crtc); + drm_crtc_cleanup(&crtc->base); + kfree(crtc); } static int intel_crtc_late_register(struct drm_crtc *crtc) @@ -335,7 +335,7 @@ int intel_crtc_init(struct drm_i915_private *dev_priv, enum pipe pipe) dev_priv->plane_to_crtc_mapping[i9xx_plane] = crtc; } - if (DISPLAY_VER(dev_priv) >= 11 || IS_CANNONLAKE(dev_priv)) + if (DISPLAY_VER(dev_priv) >= 11) drm_crtc_create_scaling_filter_property(&crtc->base, BIT(DRM_SCALING_FILTER_DEFAULT) | BIT(DRM_SCALING_FILTER_NEAREST_NEIGHBOR)); diff --git a/drivers/gpu/drm/i915/display/intel_cursor.c b/drivers/gpu/drm/i915/display/intel_cursor.c index 966e020331fb..c7618fef0143 100644 --- a/drivers/gpu/drm/i915/display/intel_cursor.c +++ b/drivers/gpu/drm/i915/display/intel_cursor.c @@ -383,7 +383,7 @@ static u32 i9xx_cursor_ctl(const struct intel_crtc_state *crtc_state, if (plane_state->hw.rotation & DRM_MODE_ROTATE_180) cntl |= MCURSOR_ROTATE_180; - /* Wa_22012358565:adlp */ + /* Wa_22012358565:adl-p */ if (DISPLAY_VER(dev_priv) == 13) cntl |= MCURSOR_ARB_SLOTS(1); @@ -629,12 +629,16 @@ intel_legacy_cursor_update(struct drm_plane *_plane, /* * When crtc is inactive or there is a modeset pending, - * wait for it to complete in the slowpath + * wait for it to complete in the slowpath. + * PSR2 selective fetch also requires the slow path as + * PSR2 plane and transcoder registers can only be updated during + * vblank. * * FIXME bigjoiner fastpath would be good */ if (!crtc_state->hw.active || intel_crtc_needs_modeset(crtc_state) || - crtc_state->update_pipe || crtc_state->bigjoiner) + crtc_state->update_pipe || crtc_state->bigjoiner || + crtc_state->enable_psr2_sel_fetch) goto slow; /* @@ -801,7 +805,7 @@ intel_cursor_plane_create(struct drm_i915_private *dev_priv, if (DISPLAY_VER(dev_priv) >= 12) drm_plane_enable_fb_damage_clips(&cursor->base); - drm_plane_helper_add(&cursor->base, &intel_plane_helper_funcs); + intel_plane_helper_add(cursor); return cursor; diff --git a/drivers/gpu/drm/i915/display/intel_ddi.c b/drivers/gpu/drm/i915/display/intel_ddi.c index 00dade49665b..9903a78df896 100644 --- a/drivers/gpu/drm/i915/display/intel_ddi.c +++ b/drivers/gpu/drm/i915/display/intel_ddi.c @@ -51,6 +51,7 @@ #include "intel_panel.h" #include "intel_pps.h" #include "intel_psr.h" +#include "intel_snps_phy.h" #include "intel_sprite.h" #include "intel_tc.h" #include "intel_vdsc.h" @@ -95,24 +96,18 @@ static int intel_ddi_hdmi_level(struct intel_encoder *encoder, * values in advance. This function programs the correct values for * DP/eDP/FDI use cases. */ -void intel_prepare_dp_ddi_buffers(struct intel_encoder *encoder, - const struct intel_crtc_state *crtc_state) +void hsw_prepare_dp_ddi_buffers(struct intel_encoder *encoder, + const struct intel_crtc_state *crtc_state) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); u32 iboost_bit = 0; int i, n_entries; enum port port = encoder->port; - const struct ddi_buf_trans *ddi_translations; - - if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_ANALOG)) - ddi_translations = intel_ddi_get_buf_trans_fdi(dev_priv, - &n_entries); - else if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_EDP)) - ddi_translations = intel_ddi_get_buf_trans_edp(encoder, - &n_entries); - else - ddi_translations = intel_ddi_get_buf_trans_dp(encoder, - &n_entries); + const struct intel_ddi_buf_trans *ddi_translations; + + ddi_translations = encoder->get_buf_trans(encoder, crtc_state, &n_entries); + if (drm_WARN_ON_ONCE(&dev_priv->drm, !ddi_translations)) + return; /* If we're boosting the current, set bit 31 of trans1 */ if (DISPLAY_VER(dev_priv) == 9 && !IS_BROXTON(dev_priv) && @@ -121,9 +116,9 @@ void intel_prepare_dp_ddi_buffers(struct intel_encoder *encoder, for (i = 0; i < n_entries; i++) { intel_de_write(dev_priv, DDI_BUF_TRANS_LO(port, i), - ddi_translations[i].trans1 | iboost_bit); + ddi_translations->entries[i].hsw.trans1 | iboost_bit); intel_de_write(dev_priv, DDI_BUF_TRANS_HI(port, i), - ddi_translations[i].trans2); + ddi_translations->entries[i].hsw.trans2); } } @@ -132,17 +127,17 @@ void intel_prepare_dp_ddi_buffers(struct intel_encoder *encoder, * values in advance. This function programs the correct values for * HDMI/DVI use cases. */ -static void intel_prepare_hdmi_ddi_buffers(struct intel_encoder *encoder, - int level) +static void hsw_prepare_hdmi_ddi_buffers(struct intel_encoder *encoder, + const struct intel_crtc_state *crtc_state, + int level) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); u32 iboost_bit = 0; int n_entries; enum port port = encoder->port; - const struct ddi_buf_trans *ddi_translations; - - ddi_translations = intel_ddi_get_buf_trans_hdmi(encoder, &n_entries); + const struct intel_ddi_buf_trans *ddi_translations; + ddi_translations = encoder->get_buf_trans(encoder, crtc_state, &n_entries); if (drm_WARN_ON_ONCE(&dev_priv->drm, !ddi_translations)) return; if (drm_WARN_ON_ONCE(&dev_priv->drm, level >= n_entries)) @@ -155,9 +150,9 @@ static void intel_prepare_hdmi_ddi_buffers(struct intel_encoder *encoder, /* Entry 9 is for HDMI: */ intel_de_write(dev_priv, DDI_BUF_TRANS_LO(port, 9), - ddi_translations[level].trans1 | iboost_bit); + ddi_translations->entries[level].hsw.trans1 | iboost_bit); intel_de_write(dev_priv, DDI_BUF_TRANS_HI(port, 9), - ddi_translations[level].trans2); + ddi_translations->entries[level].hsw.trans2); } void intel_wait_ddi_buf_idle(struct drm_i915_private *dev_priv, @@ -177,14 +172,18 @@ void intel_wait_ddi_buf_idle(struct drm_i915_private *dev_priv, static void intel_wait_ddi_buf_active(struct drm_i915_private *dev_priv, enum port port) { + int ret; + /* Wait > 518 usecs for DDI_BUF_CTL to be non idle */ if (DISPLAY_VER(dev_priv) < 10) { usleep_range(518, 1000); return; } - if (wait_for_us(!(intel_de_read(dev_priv, DDI_BUF_CTL(port)) & - DDI_BUF_IS_IDLE), 500)) + ret = _wait_for(!(intel_de_read(dev_priv, DDI_BUF_CTL(port)) & + DDI_BUF_IS_IDLE), IS_DG2(dev_priv) ? 1200 : 500, 10, 10); + + if (ret) drm_err(&dev_priv->drm, "Timeout waiting for DDI BUF %c to get active\n", port_name(port)); } @@ -828,7 +827,7 @@ bool intel_ddi_get_hw_state(struct intel_encoder *encoder, static enum intel_display_power_domain intel_ddi_main_link_aux_domain(struct intel_digital_port *dig_port) { - /* CNL+ HW requires corresponding AUX IOs to be powered up for PSR with + /* ICL+ HW requires corresponding AUX IOs to be powered up for PSR with * DC states enabled at the same time, while for driver initiated AUX * transfers we need the same AUX IOs to be powered but with DC states * disabled. Accordingly use the AUX power domain here which leaves DC @@ -948,22 +947,16 @@ static void skl_ddi_set_iboost(struct intel_encoder *encoder, iboost = intel_bios_encoder_dp_boost_level(encoder->devdata); if (iboost == 0) { - const struct ddi_buf_trans *ddi_translations; + const struct intel_ddi_buf_trans *ddi_translations; int n_entries; - if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_HDMI)) - ddi_translations = intel_ddi_get_buf_trans_hdmi(encoder, &n_entries); - else if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_EDP)) - ddi_translations = intel_ddi_get_buf_trans_edp(encoder, &n_entries); - else - ddi_translations = intel_ddi_get_buf_trans_dp(encoder, &n_entries); - + ddi_translations = encoder->get_buf_trans(encoder, crtc_state, &n_entries); if (drm_WARN_ON_ONCE(&dev_priv->drm, !ddi_translations)) return; if (drm_WARN_ON_ONCE(&dev_priv->drm, level >= n_entries)) level = n_entries - 1; - iboost = ddi_translations[level].i_boost; + iboost = ddi_translations->entries[level].hsw.i_boost; } /* Make sure that the requested I_boost is valid */ @@ -983,21 +976,21 @@ static void bxt_ddi_vswing_sequence(struct intel_encoder *encoder, int level) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - const struct bxt_ddi_buf_trans *ddi_translations; + const struct intel_ddi_buf_trans *ddi_translations; enum port port = encoder->port; int n_entries; - ddi_translations = bxt_get_buf_trans(encoder, crtc_state, &n_entries); + ddi_translations = encoder->get_buf_trans(encoder, crtc_state, &n_entries); if (drm_WARN_ON_ONCE(&dev_priv->drm, !ddi_translations)) return; if (drm_WARN_ON_ONCE(&dev_priv->drm, level >= n_entries)) level = n_entries - 1; bxt_ddi_phy_set_signal_level(dev_priv, port, - ddi_translations[level].margin, - ddi_translations[level].scale, - ddi_translations[level].enable, - ddi_translations[level].deemphasis); + ddi_translations->entries[level].bxt.margin, + ddi_translations->entries[level].bxt.scale, + ddi_translations->entries[level].bxt.enable, + ddi_translations->entries[level].bxt.deemphasis); } static u8 intel_ddi_dp_voltage_max(struct intel_dp *intel_dp, @@ -1005,36 +998,9 @@ static u8 intel_ddi_dp_voltage_max(struct intel_dp *intel_dp, { struct intel_encoder *encoder = &dp_to_dig_port(intel_dp)->base; struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - enum port port = encoder->port; - enum phy phy = intel_port_to_phy(dev_priv, port); int n_entries; - if (DISPLAY_VER(dev_priv) >= 12) { - if (intel_phy_is_combo(dev_priv, phy)) - tgl_get_combo_buf_trans(encoder, crtc_state, &n_entries); - else if (IS_ALDERLAKE_P(dev_priv)) - adlp_get_dkl_buf_trans(encoder, crtc_state, &n_entries); - else - tgl_get_dkl_buf_trans(encoder, crtc_state, &n_entries); - } else if (DISPLAY_VER(dev_priv) == 11) { - if (IS_PLATFORM(dev_priv, INTEL_JASPERLAKE)) - jsl_get_combo_buf_trans(encoder, crtc_state, &n_entries); - else if (IS_PLATFORM(dev_priv, INTEL_ELKHARTLAKE)) - ehl_get_combo_buf_trans(encoder, crtc_state, &n_entries); - else if (intel_phy_is_combo(dev_priv, phy)) - icl_get_combo_buf_trans(encoder, crtc_state, &n_entries); - else - icl_get_mg_buf_trans(encoder, crtc_state, &n_entries); - } else if (IS_CANNONLAKE(dev_priv)) { - cnl_get_buf_trans(encoder, crtc_state, &n_entries); - } else if (IS_GEMINILAKE(dev_priv) || IS_BROXTON(dev_priv)) { - bxt_get_buf_trans(encoder, crtc_state, &n_entries); - } else { - if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_EDP)) - intel_ddi_get_buf_trans_edp(encoder, &n_entries); - else - intel_ddi_get_buf_trans_dp(encoder, &n_entries); - } + encoder->get_buf_trans(encoder, crtc_state, &n_entries); if (drm_WARN_ON(&dev_priv->drm, n_entries < 1)) n_entries = 1; @@ -1056,146 +1022,17 @@ static u8 intel_ddi_dp_preemph_max(struct intel_dp *intel_dp) return DP_TRAIN_PRE_EMPH_LEVEL_3; } -static void cnl_ddi_vswing_program(struct intel_encoder *encoder, - const struct intel_crtc_state *crtc_state, - int level) -{ - struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - const struct cnl_ddi_buf_trans *ddi_translations; - enum port port = encoder->port; - int n_entries, ln; - u32 val; - - ddi_translations = cnl_get_buf_trans(encoder, crtc_state, &n_entries); - - if (drm_WARN_ON_ONCE(&dev_priv->drm, !ddi_translations)) - return; - if (drm_WARN_ON_ONCE(&dev_priv->drm, level >= n_entries)) - level = n_entries - 1; - - /* Set PORT_TX_DW5 Scaling Mode Sel to 010b. */ - val = intel_de_read(dev_priv, CNL_PORT_TX_DW5_LN0(port)); - val &= ~SCALING_MODE_SEL_MASK; - val |= SCALING_MODE_SEL(2); - intel_de_write(dev_priv, CNL_PORT_TX_DW5_GRP(port), val); - - /* Program PORT_TX_DW2 */ - val = intel_de_read(dev_priv, CNL_PORT_TX_DW2_LN0(port)); - val &= ~(SWING_SEL_LOWER_MASK | SWING_SEL_UPPER_MASK | - RCOMP_SCALAR_MASK); - val |= SWING_SEL_UPPER(ddi_translations[level].dw2_swing_sel); - val |= SWING_SEL_LOWER(ddi_translations[level].dw2_swing_sel); - /* Rcomp scalar is fixed as 0x98 for every table entry */ - val |= RCOMP_SCALAR(0x98); - intel_de_write(dev_priv, CNL_PORT_TX_DW2_GRP(port), val); - - /* Program PORT_TX_DW4 */ - /* We cannot write to GRP. It would overrite individual loadgen */ - for (ln = 0; ln < 4; ln++) { - val = intel_de_read(dev_priv, CNL_PORT_TX_DW4_LN(ln, port)); - val &= ~(POST_CURSOR_1_MASK | POST_CURSOR_2_MASK | - CURSOR_COEFF_MASK); - val |= POST_CURSOR_1(ddi_translations[level].dw4_post_cursor_1); - val |= POST_CURSOR_2(ddi_translations[level].dw4_post_cursor_2); - val |= CURSOR_COEFF(ddi_translations[level].dw4_cursor_coeff); - intel_de_write(dev_priv, CNL_PORT_TX_DW4_LN(ln, port), val); - } - - /* Program PORT_TX_DW5 */ - /* All DW5 values are fixed for every table entry */ - val = intel_de_read(dev_priv, CNL_PORT_TX_DW5_LN0(port)); - val &= ~RTERM_SELECT_MASK; - val |= RTERM_SELECT(6); - val |= TAP3_DISABLE; - intel_de_write(dev_priv, CNL_PORT_TX_DW5_GRP(port), val); - - /* Program PORT_TX_DW7 */ - val = intel_de_read(dev_priv, CNL_PORT_TX_DW7_LN0(port)); - val &= ~N_SCALAR_MASK; - val |= N_SCALAR(ddi_translations[level].dw7_n_scalar); - intel_de_write(dev_priv, CNL_PORT_TX_DW7_GRP(port), val); -} - -static void cnl_ddi_vswing_sequence(struct intel_encoder *encoder, - const struct intel_crtc_state *crtc_state, - int level) -{ - struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - enum port port = encoder->port; - int width, rate, ln; - u32 val; - - width = crtc_state->lane_count; - rate = crtc_state->port_clock; - - /* - * 1. If port type is eDP or DP, - * set PORT_PCS_DW1 cmnkeeper_enable to 1b, - * else clear to 0b. - */ - val = intel_de_read(dev_priv, CNL_PORT_PCS_DW1_LN0(port)); - if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_HDMI)) - val &= ~COMMON_KEEPER_EN; - else - val |= COMMON_KEEPER_EN; - intel_de_write(dev_priv, CNL_PORT_PCS_DW1_GRP(port), val); - - /* 2. Program loadgen select */ - /* - * Program PORT_TX_DW4_LN depending on Bit rate and used lanes - * <= 6 GHz and 4 lanes (LN0=0, LN1=1, LN2=1, LN3=1) - * <= 6 GHz and 1,2 lanes (LN0=0, LN1=1, LN2=1, LN3=0) - * > 6 GHz (LN0=0, LN1=0, LN2=0, LN3=0) - */ - for (ln = 0; ln <= 3; ln++) { - val = intel_de_read(dev_priv, CNL_PORT_TX_DW4_LN(ln, port)); - val &= ~LOADGEN_SELECT; - - if ((rate <= 600000 && width == 4 && ln >= 1) || - (rate <= 600000 && width < 4 && (ln == 1 || ln == 2))) { - val |= LOADGEN_SELECT; - } - intel_de_write(dev_priv, CNL_PORT_TX_DW4_LN(ln, port), val); - } - - /* 3. Set PORT_CL_DW5 SUS Clock Config to 11b */ - val = intel_de_read(dev_priv, CNL_PORT_CL1CM_DW5); - val |= SUS_CLOCK_CONFIG; - intel_de_write(dev_priv, CNL_PORT_CL1CM_DW5, val); - - /* 4. Clear training enable to change swing values */ - val = intel_de_read(dev_priv, CNL_PORT_TX_DW5_LN0(port)); - val &= ~TX_TRAINING_EN; - intel_de_write(dev_priv, CNL_PORT_TX_DW5_GRP(port), val); - - /* 5. Program swing and de-emphasis */ - cnl_ddi_vswing_program(encoder, crtc_state, level); - - /* 6. Set training enable to trigger update */ - val = intel_de_read(dev_priv, CNL_PORT_TX_DW5_LN0(port)); - val |= TX_TRAINING_EN; - intel_de_write(dev_priv, CNL_PORT_TX_DW5_GRP(port), val); -} - static void icl_ddi_combo_vswing_program(struct intel_encoder *encoder, const struct intel_crtc_state *crtc_state, int level) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - const struct cnl_ddi_buf_trans *ddi_translations; + const struct intel_ddi_buf_trans *ddi_translations; enum phy phy = intel_port_to_phy(dev_priv, encoder->port); int n_entries, ln; u32 val; - if (DISPLAY_VER(dev_priv) >= 12) - ddi_translations = tgl_get_combo_buf_trans(encoder, crtc_state, &n_entries); - else if (IS_PLATFORM(dev_priv, INTEL_JASPERLAKE)) - ddi_translations = jsl_get_combo_buf_trans(encoder, crtc_state, &n_entries); - else if (IS_PLATFORM(dev_priv, INTEL_ELKHARTLAKE)) - ddi_translations = ehl_get_combo_buf_trans(encoder, crtc_state, &n_entries); - else - ddi_translations = icl_get_combo_buf_trans(encoder, crtc_state, &n_entries); - + ddi_translations = encoder->get_buf_trans(encoder, crtc_state, &n_entries); if (drm_WARN_ON_ONCE(&dev_priv->drm, !ddi_translations)) return; if (drm_WARN_ON_ONCE(&dev_priv->drm, level >= n_entries)) @@ -1223,8 +1060,8 @@ static void icl_ddi_combo_vswing_program(struct intel_encoder *encoder, val = intel_de_read(dev_priv, ICL_PORT_TX_DW2_LN0(phy)); val &= ~(SWING_SEL_LOWER_MASK | SWING_SEL_UPPER_MASK | RCOMP_SCALAR_MASK); - val |= SWING_SEL_UPPER(ddi_translations[level].dw2_swing_sel); - val |= SWING_SEL_LOWER(ddi_translations[level].dw2_swing_sel); + val |= SWING_SEL_UPPER(ddi_translations->entries[level].icl.dw2_swing_sel); + val |= SWING_SEL_LOWER(ddi_translations->entries[level].icl.dw2_swing_sel); /* Program Rcomp scalar for every table entry */ val |= RCOMP_SCALAR(0x98); intel_de_write(dev_priv, ICL_PORT_TX_DW2_GRP(phy), val); @@ -1235,16 +1072,16 @@ static void icl_ddi_combo_vswing_program(struct intel_encoder *encoder, val = intel_de_read(dev_priv, ICL_PORT_TX_DW4_LN(ln, phy)); val &= ~(POST_CURSOR_1_MASK | POST_CURSOR_2_MASK | CURSOR_COEFF_MASK); - val |= POST_CURSOR_1(ddi_translations[level].dw4_post_cursor_1); - val |= POST_CURSOR_2(ddi_translations[level].dw4_post_cursor_2); - val |= CURSOR_COEFF(ddi_translations[level].dw4_cursor_coeff); + val |= POST_CURSOR_1(ddi_translations->entries[level].icl.dw4_post_cursor_1); + val |= POST_CURSOR_2(ddi_translations->entries[level].icl.dw4_post_cursor_2); + val |= CURSOR_COEFF(ddi_translations->entries[level].icl.dw4_cursor_coeff); intel_de_write(dev_priv, ICL_PORT_TX_DW4_LN(ln, phy), val); } /* Program PORT_TX_DW7 */ val = intel_de_read(dev_priv, ICL_PORT_TX_DW7_LN0(phy)); val &= ~N_SCALAR_MASK; - val |= N_SCALAR(ddi_translations[level].dw7_n_scalar); + val |= N_SCALAR(ddi_translations->entries[level].icl.dw7_n_scalar); intel_de_write(dev_priv, ICL_PORT_TX_DW7_GRP(phy), val); } @@ -1315,15 +1152,14 @@ static void icl_mg_phy_ddi_vswing_sequence(struct intel_encoder *encoder, { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); enum tc_port tc_port = intel_port_to_tc(dev_priv, encoder->port); - const struct icl_mg_phy_ddi_buf_trans *ddi_translations; + const struct intel_ddi_buf_trans *ddi_translations; int n_entries, ln; u32 val; if (enc_to_dig_port(encoder)->tc_mode == TC_PORT_TBT_ALT) return; - ddi_translations = icl_get_mg_buf_trans(encoder, crtc_state, &n_entries); - + ddi_translations = encoder->get_buf_trans(encoder, crtc_state, &n_entries); if (drm_WARN_ON_ONCE(&dev_priv->drm, !ddi_translations)) return; if (drm_WARN_ON_ONCE(&dev_priv->drm, level >= n_entries)) @@ -1345,13 +1181,13 @@ static void icl_mg_phy_ddi_vswing_sequence(struct intel_encoder *encoder, val = intel_de_read(dev_priv, MG_TX1_SWINGCTRL(ln, tc_port)); val &= ~CRI_TXDEEMPH_OVERRIDE_17_12_MASK; val |= CRI_TXDEEMPH_OVERRIDE_17_12( - ddi_translations[level].cri_txdeemph_override_17_12); + ddi_translations->entries[level].mg.cri_txdeemph_override_17_12); intel_de_write(dev_priv, MG_TX1_SWINGCTRL(ln, tc_port), val); val = intel_de_read(dev_priv, MG_TX2_SWINGCTRL(ln, tc_port)); val &= ~CRI_TXDEEMPH_OVERRIDE_17_12_MASK; val |= CRI_TXDEEMPH_OVERRIDE_17_12( - ddi_translations[level].cri_txdeemph_override_17_12); + ddi_translations->entries[level].mg.cri_txdeemph_override_17_12); intel_de_write(dev_priv, MG_TX2_SWINGCTRL(ln, tc_port), val); } @@ -1361,9 +1197,9 @@ static void icl_mg_phy_ddi_vswing_sequence(struct intel_encoder *encoder, val &= ~(CRI_TXDEEMPH_OVERRIDE_11_6_MASK | CRI_TXDEEMPH_OVERRIDE_5_0_MASK); val |= CRI_TXDEEMPH_OVERRIDE_5_0( - ddi_translations[level].cri_txdeemph_override_5_0) | + ddi_translations->entries[level].mg.cri_txdeemph_override_5_0) | CRI_TXDEEMPH_OVERRIDE_11_6( - ddi_translations[level].cri_txdeemph_override_11_6) | + ddi_translations->entries[level].mg.cri_txdeemph_override_11_6) | CRI_TXDEEMPH_OVERRIDE_EN; intel_de_write(dev_priv, MG_TX1_DRVCTRL(ln, tc_port), val); @@ -1371,9 +1207,9 @@ static void icl_mg_phy_ddi_vswing_sequence(struct intel_encoder *encoder, val &= ~(CRI_TXDEEMPH_OVERRIDE_11_6_MASK | CRI_TXDEEMPH_OVERRIDE_5_0_MASK); val |= CRI_TXDEEMPH_OVERRIDE_5_0( - ddi_translations[level].cri_txdeemph_override_5_0) | + ddi_translations->entries[level].mg.cri_txdeemph_override_5_0) | CRI_TXDEEMPH_OVERRIDE_11_6( - ddi_translations[level].cri_txdeemph_override_11_6) | + ddi_translations->entries[level].mg.cri_txdeemph_override_11_6) | CRI_TXDEEMPH_OVERRIDE_EN; intel_de_write(dev_priv, MG_TX2_DRVCTRL(ln, tc_port), val); @@ -1453,18 +1289,14 @@ tgl_dkl_phy_ddi_vswing_sequence(struct intel_encoder *encoder, { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); enum tc_port tc_port = intel_port_to_tc(dev_priv, encoder->port); - const struct tgl_dkl_phy_ddi_buf_trans *ddi_translations; + const struct intel_ddi_buf_trans *ddi_translations; u32 val, dpcnt_mask, dpcnt_val; int n_entries, ln; if (enc_to_dig_port(encoder)->tc_mode == TC_PORT_TBT_ALT) return; - if (IS_ALDERLAKE_P(dev_priv)) - ddi_translations = adlp_get_dkl_buf_trans(encoder, crtc_state, &n_entries); - else - ddi_translations = tgl_get_dkl_buf_trans(encoder, crtc_state, &n_entries); - + ddi_translations = encoder->get_buf_trans(encoder, crtc_state, &n_entries); if (drm_WARN_ON_ONCE(&dev_priv->drm, !ddi_translations)) return; if (drm_WARN_ON_ONCE(&dev_priv->drm, level >= n_entries)) @@ -1473,9 +1305,9 @@ tgl_dkl_phy_ddi_vswing_sequence(struct intel_encoder *encoder, dpcnt_mask = (DKL_TX_PRESHOOT_COEFF_MASK | DKL_TX_DE_EMPAHSIS_COEFF_MASK | DKL_TX_VSWING_CONTROL_MASK); - dpcnt_val = DKL_TX_VSWING_CONTROL(ddi_translations[level].dkl_vswing_control); - dpcnt_val |= DKL_TX_DE_EMPHASIS_COEFF(ddi_translations[level].dkl_de_emphasis_control); - dpcnt_val |= DKL_TX_PRESHOOT_COEFF(ddi_translations[level].dkl_preshoot_control); + dpcnt_val = DKL_TX_VSWING_CONTROL(ddi_translations->entries[level].dkl.dkl_vswing_control); + dpcnt_val |= DKL_TX_DE_EMPHASIS_COEFF(ddi_translations->entries[level].dkl.dkl_de_emphasis_control); + dpcnt_val |= DKL_TX_PRESHOOT_COEFF(ddi_translations->entries[level].dkl.dkl_preshoot_control); for (ln = 0; ln < 2; ln++) { intel_de_write(dev_priv, HIP_INDEX_REG(tc_port), @@ -1549,33 +1381,33 @@ static int intel_ddi_dp_level(struct intel_dp *intel_dp) } static void -tgl_set_signal_levels(struct intel_dp *intel_dp, +dg2_set_signal_levels(struct intel_dp *intel_dp, const struct intel_crtc_state *crtc_state) { struct intel_encoder *encoder = &dp_to_dig_port(intel_dp)->base; int level = intel_ddi_dp_level(intel_dp); - tgl_ddi_vswing_sequence(encoder, crtc_state, level); + intel_snps_phy_ddi_vswing_sequence(encoder, level); } static void -icl_set_signal_levels(struct intel_dp *intel_dp, +tgl_set_signal_levels(struct intel_dp *intel_dp, const struct intel_crtc_state *crtc_state) { struct intel_encoder *encoder = &dp_to_dig_port(intel_dp)->base; int level = intel_ddi_dp_level(intel_dp); - icl_ddi_vswing_sequence(encoder, crtc_state, level); + tgl_ddi_vswing_sequence(encoder, crtc_state, level); } static void -cnl_set_signal_levels(struct intel_dp *intel_dp, +icl_set_signal_levels(struct intel_dp *intel_dp, const struct intel_crtc_state *crtc_state) { struct intel_encoder *encoder = &dp_to_dig_port(intel_dp)->base; int level = intel_ddi_dp_level(intel_dp); - cnl_ddi_vswing_sequence(encoder, crtc_state, level); + icl_ddi_vswing_sequence(encoder, crtc_state, level); } static void @@ -1613,7 +1445,7 @@ hsw_set_signal_levels(struct intel_dp *intel_dp, intel_de_posting_read(dev_priv, DDI_BUF_CTL(port)); } -static void _cnl_ddi_enable_clock(struct drm_i915_private *i915, i915_reg_t reg, +static void _icl_ddi_enable_clock(struct drm_i915_private *i915, i915_reg_t reg, u32 clk_sel_mask, u32 clk_sel, u32 clk_off) { mutex_lock(&i915->dpll.lock); @@ -1629,7 +1461,7 @@ static void _cnl_ddi_enable_clock(struct drm_i915_private *i915, i915_reg_t reg, mutex_unlock(&i915->dpll.lock); } -static void _cnl_ddi_disable_clock(struct drm_i915_private *i915, i915_reg_t reg, +static void _icl_ddi_disable_clock(struct drm_i915_private *i915, i915_reg_t reg, u32 clk_off) { mutex_lock(&i915->dpll.lock); @@ -1639,14 +1471,14 @@ static void _cnl_ddi_disable_clock(struct drm_i915_private *i915, i915_reg_t reg mutex_unlock(&i915->dpll.lock); } -static bool _cnl_ddi_is_clock_enabled(struct drm_i915_private *i915, i915_reg_t reg, +static bool _icl_ddi_is_clock_enabled(struct drm_i915_private *i915, i915_reg_t reg, u32 clk_off) { return !(intel_de_read(i915, reg) & clk_off); } static struct intel_shared_dpll * -_cnl_ddi_get_pll(struct drm_i915_private *i915, i915_reg_t reg, +_icl_ddi_get_pll(struct drm_i915_private *i915, i915_reg_t reg, u32 clk_sel_mask, u32 clk_sel_shift) { enum intel_dpll_id id; @@ -1666,7 +1498,7 @@ static void adls_ddi_enable_clock(struct intel_encoder *encoder, if (drm_WARN_ON(&i915->drm, !pll)) return; - _cnl_ddi_enable_clock(i915, ADLS_DPCLKA_CFGCR(phy), + _icl_ddi_enable_clock(i915, ADLS_DPCLKA_CFGCR(phy), ADLS_DPCLKA_CFGCR_DDI_CLK_SEL_MASK(phy), pll->info->id << ADLS_DPCLKA_CFGCR_DDI_SHIFT(phy), ICL_DPCLKA_CFGCR0_DDI_CLK_OFF(phy)); @@ -1677,7 +1509,7 @@ static void adls_ddi_disable_clock(struct intel_encoder *encoder) struct drm_i915_private *i915 = to_i915(encoder->base.dev); enum phy phy = intel_port_to_phy(i915, encoder->port); - _cnl_ddi_disable_clock(i915, ADLS_DPCLKA_CFGCR(phy), + _icl_ddi_disable_clock(i915, ADLS_DPCLKA_CFGCR(phy), ICL_DPCLKA_CFGCR0_DDI_CLK_OFF(phy)); } @@ -1686,7 +1518,7 @@ static bool adls_ddi_is_clock_enabled(struct intel_encoder *encoder) struct drm_i915_private *i915 = to_i915(encoder->base.dev); enum phy phy = intel_port_to_phy(i915, encoder->port); - return _cnl_ddi_is_clock_enabled(i915, ADLS_DPCLKA_CFGCR(phy), + return _icl_ddi_is_clock_enabled(i915, ADLS_DPCLKA_CFGCR(phy), ICL_DPCLKA_CFGCR0_DDI_CLK_OFF(phy)); } @@ -1695,7 +1527,7 @@ static struct intel_shared_dpll *adls_ddi_get_pll(struct intel_encoder *encoder) struct drm_i915_private *i915 = to_i915(encoder->base.dev); enum phy phy = intel_port_to_phy(i915, encoder->port); - return _cnl_ddi_get_pll(i915, ADLS_DPCLKA_CFGCR(phy), + return _icl_ddi_get_pll(i915, ADLS_DPCLKA_CFGCR(phy), ADLS_DPCLKA_CFGCR_DDI_CLK_SEL_MASK(phy), ADLS_DPCLKA_CFGCR_DDI_SHIFT(phy)); } @@ -1710,7 +1542,7 @@ static void rkl_ddi_enable_clock(struct intel_encoder *encoder, if (drm_WARN_ON(&i915->drm, !pll)) return; - _cnl_ddi_enable_clock(i915, ICL_DPCLKA_CFGCR0, + _icl_ddi_enable_clock(i915, ICL_DPCLKA_CFGCR0, RKL_DPCLKA_CFGCR0_DDI_CLK_SEL_MASK(phy), RKL_DPCLKA_CFGCR0_DDI_CLK_SEL(pll->info->id, phy), RKL_DPCLKA_CFGCR0_DDI_CLK_OFF(phy)); @@ -1721,7 +1553,7 @@ static void rkl_ddi_disable_clock(struct intel_encoder *encoder) struct drm_i915_private *i915 = to_i915(encoder->base.dev); enum phy phy = intel_port_to_phy(i915, encoder->port); - _cnl_ddi_disable_clock(i915, ICL_DPCLKA_CFGCR0, + _icl_ddi_disable_clock(i915, ICL_DPCLKA_CFGCR0, RKL_DPCLKA_CFGCR0_DDI_CLK_OFF(phy)); } @@ -1730,7 +1562,7 @@ static bool rkl_ddi_is_clock_enabled(struct intel_encoder *encoder) struct drm_i915_private *i915 = to_i915(encoder->base.dev); enum phy phy = intel_port_to_phy(i915, encoder->port); - return _cnl_ddi_is_clock_enabled(i915, ICL_DPCLKA_CFGCR0, + return _icl_ddi_is_clock_enabled(i915, ICL_DPCLKA_CFGCR0, RKL_DPCLKA_CFGCR0_DDI_CLK_OFF(phy)); } @@ -1739,7 +1571,7 @@ static struct intel_shared_dpll *rkl_ddi_get_pll(struct intel_encoder *encoder) struct drm_i915_private *i915 = to_i915(encoder->base.dev); enum phy phy = intel_port_to_phy(i915, encoder->port); - return _cnl_ddi_get_pll(i915, ICL_DPCLKA_CFGCR0, + return _icl_ddi_get_pll(i915, ICL_DPCLKA_CFGCR0, RKL_DPCLKA_CFGCR0_DDI_CLK_SEL_MASK(phy), RKL_DPCLKA_CFGCR0_DDI_CLK_SEL_SHIFT(phy)); } @@ -1763,7 +1595,7 @@ static void dg1_ddi_enable_clock(struct intel_encoder *encoder, (pll->info->id >= DPLL_ID_DG1_DPLL2 && phy < PHY_C))) return; - _cnl_ddi_enable_clock(i915, DG1_DPCLKA_CFGCR0(phy), + _icl_ddi_enable_clock(i915, DG1_DPCLKA_CFGCR0(phy), DG1_DPCLKA_CFGCR0_DDI_CLK_SEL_MASK(phy), DG1_DPCLKA_CFGCR0_DDI_CLK_SEL(pll->info->id, phy), DG1_DPCLKA_CFGCR0_DDI_CLK_OFF(phy)); @@ -1774,7 +1606,7 @@ static void dg1_ddi_disable_clock(struct intel_encoder *encoder) struct drm_i915_private *i915 = to_i915(encoder->base.dev); enum phy phy = intel_port_to_phy(i915, encoder->port); - _cnl_ddi_disable_clock(i915, DG1_DPCLKA_CFGCR0(phy), + _icl_ddi_disable_clock(i915, DG1_DPCLKA_CFGCR0(phy), DG1_DPCLKA_CFGCR0_DDI_CLK_OFF(phy)); } @@ -1783,7 +1615,7 @@ static bool dg1_ddi_is_clock_enabled(struct intel_encoder *encoder) struct drm_i915_private *i915 = to_i915(encoder->base.dev); enum phy phy = intel_port_to_phy(i915, encoder->port); - return _cnl_ddi_is_clock_enabled(i915, DG1_DPCLKA_CFGCR0(phy), + return _icl_ddi_is_clock_enabled(i915, DG1_DPCLKA_CFGCR0(phy), DG1_DPCLKA_CFGCR0_DDI_CLK_OFF(phy)); } @@ -1820,7 +1652,7 @@ static void icl_ddi_combo_enable_clock(struct intel_encoder *encoder, if (drm_WARN_ON(&i915->drm, !pll)) return; - _cnl_ddi_enable_clock(i915, ICL_DPCLKA_CFGCR0, + _icl_ddi_enable_clock(i915, ICL_DPCLKA_CFGCR0, ICL_DPCLKA_CFGCR0_DDI_CLK_SEL_MASK(phy), ICL_DPCLKA_CFGCR0_DDI_CLK_SEL(pll->info->id, phy), ICL_DPCLKA_CFGCR0_DDI_CLK_OFF(phy)); @@ -1831,7 +1663,7 @@ static void icl_ddi_combo_disable_clock(struct intel_encoder *encoder) struct drm_i915_private *i915 = to_i915(encoder->base.dev); enum phy phy = intel_port_to_phy(i915, encoder->port); - _cnl_ddi_disable_clock(i915, ICL_DPCLKA_CFGCR0, + _icl_ddi_disable_clock(i915, ICL_DPCLKA_CFGCR0, ICL_DPCLKA_CFGCR0_DDI_CLK_OFF(phy)); } @@ -1840,7 +1672,7 @@ static bool icl_ddi_combo_is_clock_enabled(struct intel_encoder *encoder) struct drm_i915_private *i915 = to_i915(encoder->base.dev); enum phy phy = intel_port_to_phy(i915, encoder->port); - return _cnl_ddi_is_clock_enabled(i915, ICL_DPCLKA_CFGCR0, + return _icl_ddi_is_clock_enabled(i915, ICL_DPCLKA_CFGCR0, ICL_DPCLKA_CFGCR0_DDI_CLK_OFF(phy)); } @@ -1849,7 +1681,7 @@ struct intel_shared_dpll *icl_ddi_combo_get_pll(struct intel_encoder *encoder) struct drm_i915_private *i915 = to_i915(encoder->base.dev); enum phy phy = intel_port_to_phy(i915, encoder->port); - return _cnl_ddi_get_pll(i915, ICL_DPCLKA_CFGCR0, + return _icl_ddi_get_pll(i915, ICL_DPCLKA_CFGCR0, ICL_DPCLKA_CFGCR0_DDI_CLK_SEL_MASK(phy), ICL_DPCLKA_CFGCR0_DDI_CLK_SEL_SHIFT(phy)); } @@ -1982,50 +1814,6 @@ static struct intel_shared_dpll *icl_ddi_tc_get_pll(struct intel_encoder *encode return intel_get_shared_dpll_by_id(i915, id); } -static void cnl_ddi_enable_clock(struct intel_encoder *encoder, - const struct intel_crtc_state *crtc_state) -{ - struct drm_i915_private *i915 = to_i915(encoder->base.dev); - const struct intel_shared_dpll *pll = crtc_state->shared_dpll; - enum port port = encoder->port; - - if (drm_WARN_ON(&i915->drm, !pll)) - return; - - _cnl_ddi_enable_clock(i915, DPCLKA_CFGCR0, - DPCLKA_CFGCR0_DDI_CLK_SEL_MASK(port), - DPCLKA_CFGCR0_DDI_CLK_SEL(pll->info->id, port), - DPCLKA_CFGCR0_DDI_CLK_OFF(port)); -} - -static void cnl_ddi_disable_clock(struct intel_encoder *encoder) -{ - struct drm_i915_private *i915 = to_i915(encoder->base.dev); - enum port port = encoder->port; - - _cnl_ddi_disable_clock(i915, DPCLKA_CFGCR0, - DPCLKA_CFGCR0_DDI_CLK_OFF(port)); -} - -static bool cnl_ddi_is_clock_enabled(struct intel_encoder *encoder) -{ - struct drm_i915_private *i915 = to_i915(encoder->base.dev); - enum port port = encoder->port; - - return _cnl_ddi_is_clock_enabled(i915, DPCLKA_CFGCR0, - DPCLKA_CFGCR0_DDI_CLK_OFF(port)); -} - -static struct intel_shared_dpll *cnl_ddi_get_pll(struct intel_encoder *encoder) -{ - struct drm_i915_private *i915 = to_i915(encoder->base.dev); - enum port port = encoder->port; - - return _cnl_ddi_get_pll(i915, DPCLKA_CFGCR0, - DPCLKA_CFGCR0_DDI_CLK_SEL_MASK(port), - DPCLKA_CFGCR0_DDI_CLK_SEL_SHIFT(port)); -} - static struct intel_shared_dpll *bxt_ddi_get_pll(struct intel_encoder *encoder) { struct drm_i915_private *i915 = to_i915(encoder->base.dev); @@ -2249,7 +2037,7 @@ void intel_ddi_sanitize_encoder_pll_mapping(struct intel_encoder *encoder) ddi_clk_needed = false; } - if (ddi_clk_needed || !encoder->disable_clock || + if (ddi_clk_needed || !encoder->is_clock_enabled || !encoder->is_clock_enabled(encoder)) return; @@ -2534,6 +2322,116 @@ static void intel_ddi_mso_configure(const struct intel_crtc_state *crtc_state) OVERLAP_PIXELS_MASK, dss1); } +static void dg2_ddi_pre_enable_dp(struct intel_atomic_state *state, + struct intel_encoder *encoder, + const struct intel_crtc_state *crtc_state, + const struct drm_connector_state *conn_state) +{ + struct intel_dp *intel_dp = enc_to_intel_dp(encoder); + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + enum phy phy = intel_port_to_phy(dev_priv, encoder->port); + struct intel_digital_port *dig_port = enc_to_dig_port(encoder); + bool is_mst = intel_crtc_has_type(crtc_state, INTEL_OUTPUT_DP_MST); + int level = intel_ddi_dp_level(intel_dp); + + intel_dp_set_link_params(intel_dp, crtc_state->port_clock, + crtc_state->lane_count); + + /* + * 1. Enable Power Wells + * + * This was handled at the beginning of intel_atomic_commit_tail(), + * before we called down into this function. + */ + + /* 2. Enable Panel Power if PPS is required */ + intel_pps_on(intel_dp); + + /* + * 3. Enable the port PLL. + */ + intel_ddi_enable_clock(encoder, crtc_state); + + /* 4. Enable IO power */ + if (!intel_phy_is_tc(dev_priv, phy) || + dig_port->tc_mode != TC_PORT_TBT_ALT) + dig_port->ddi_io_wakeref = intel_display_power_get(dev_priv, + dig_port->ddi_io_power_domain); + + /* + * 5. The rest of the below are substeps under the bspec's "Enable and + * Train Display Port" step. Note that steps that are specific to + * MST will be handled by intel_mst_pre_enable_dp() before/after it + * calls into this function. Also intel_mst_pre_enable_dp() only calls + * us when active_mst_links==0, so any steps designated for "single + * stream or multi-stream master transcoder" can just be performed + * unconditionally here. + */ + + /* + * 5.a Configure Transcoder Clock Select to direct the Port clock to the + * Transcoder. + */ + intel_ddi_enable_pipe_clock(encoder, crtc_state); + + /* 5.b Not relevant to i915 for now */ + + /* + * 5.c Configure TRANS_DDI_FUNC_CTL DDI Select, DDI Mode Select & MST + * Transport Select + */ + intel_ddi_config_transcoder_func(encoder, crtc_state); + + /* + * 5.d Configure & enable DP_TP_CTL with link training pattern 1 + * selected + * + * This will be handled by the intel_dp_start_link_train() farther + * down this function. + */ + + /* 5.e Configure voltage swing and related IO settings */ + intel_snps_phy_ddi_vswing_sequence(encoder, level); + + /* + * 5.f Configure and enable DDI_BUF_CTL + * 5.g Wait for DDI_BUF_CTL DDI Idle Status = 0b (Not Idle), timeout + * after 1200 us. + * + * We only configure what the register value will be here. Actual + * enabling happens during link training farther down. + */ + intel_ddi_init_dp_buf_reg(encoder, crtc_state); + + if (!is_mst) + intel_dp_set_power(intel_dp, DP_SET_POWER_D0); + + intel_dp_sink_set_decompression_state(intel_dp, crtc_state, true); + /* + * DDI FEC: "anticipates enabling FEC encoding sets the FEC_READY bit + * in the FEC_CONFIGURATION register to 1 before initiating link + * training + */ + intel_dp_sink_set_fec_ready(intel_dp, crtc_state); + + /* + * 5.h Follow DisplayPort specification training sequence (see notes for + * failure handling) + * 5.i If DisplayPort multi-stream - Set DP_TP_CTL link training to Idle + * Pattern, wait for 5 idle patterns (DP_TP_STATUS Min_Idles_Sent) + * (timeout after 800 us) + */ + intel_dp_start_link_train(intel_dp, crtc_state); + + /* 5.j Set DP_TP_CTL link training to Normal */ + if (!is_trans_port_sync_mode(crtc_state)) + intel_dp_stop_link_train(intel_dp, crtc_state); + + /* 5.k Configure and enable FEC if needed */ + intel_ddi_enable_fec(encoder, crtc_state); + intel_dsc_enable(encoder, crtc_state); +} + static void tgl_ddi_pre_enable_dp(struct intel_atomic_state *state, struct intel_encoder *encoder, const struct intel_crtc_state *crtc_state, @@ -2714,12 +2612,10 @@ static void hsw_ddi_pre_enable_dp(struct intel_atomic_state *state, if (DISPLAY_VER(dev_priv) >= 11) icl_ddi_vswing_sequence(encoder, crtc_state, level); - else if (IS_CANNONLAKE(dev_priv)) - cnl_ddi_vswing_sequence(encoder, crtc_state, level); else if (IS_GEMINILAKE(dev_priv) || IS_BROXTON(dev_priv)) bxt_ddi_vswing_sequence(encoder, crtc_state, level); else - intel_prepare_dp_ddi_buffers(encoder, crtc_state); + hsw_prepare_dp_ddi_buffers(encoder, crtc_state); intel_ddi_power_up_lanes(encoder, crtc_state); @@ -2751,7 +2647,9 @@ static void intel_ddi_pre_enable_dp(struct intel_atomic_state *state, { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - if (DISPLAY_VER(dev_priv) >= 12) + if (IS_DG2(dev_priv)) + dg2_ddi_pre_enable_dp(state, encoder, crtc_state, conn_state); + else if (DISPLAY_VER(dev_priv) >= 12) tgl_ddi_pre_enable_dp(state, encoder, crtc_state, conn_state); else hsw_ddi_pre_enable_dp(state, encoder, crtc_state, conn_state); @@ -2827,6 +2725,7 @@ static void intel_ddi_pre_enable(struct intel_atomic_state *state, conn_state); /* FIXME precompute everything properly */ + /* FIXME how do we turn infoframes off again? */ if (dig_port->lspcon.active && dig_port->dp.has_hdmi_sink) dig_port->set_infoframes(encoder, crtc_state->has_infoframe, @@ -3157,16 +3056,16 @@ static void intel_enable_ddi_hdmi(struct intel_atomic_state *state, "[CONNECTOR:%d:%s] Failed to configure sink scrambling/TMDS bit clock ratio\n", connector->base.id, connector->name); - if (DISPLAY_VER(dev_priv) >= 12) + if (IS_DG2(dev_priv)) + intel_snps_phy_ddi_vswing_sequence(encoder, U32_MAX); + else if (DISPLAY_VER(dev_priv) >= 12) tgl_ddi_vswing_sequence(encoder, crtc_state, level); else if (DISPLAY_VER(dev_priv) == 11) icl_ddi_vswing_sequence(encoder, crtc_state, level); - else if (IS_CANNONLAKE(dev_priv)) - cnl_ddi_vswing_sequence(encoder, crtc_state, level); else if (IS_GEMINILAKE(dev_priv) || IS_BROXTON(dev_priv)) bxt_ddi_vswing_sequence(encoder, crtc_state, level); else - intel_prepare_hdmi_ddi_buffers(encoder, level); + hsw_prepare_hdmi_ddi_buffers(encoder, crtc_state, level); if (DISPLAY_VER(dev_priv) == 9 && !IS_BROXTON(dev_priv)) skl_ddi_set_iboost(encoder, crtc_state, level); @@ -3260,12 +3159,6 @@ static void intel_disable_ddi_dp(struct intel_atomic_state *state, intel_dp->link_trained = false; - if (old_crtc_state->has_audio) - intel_audio_codec_disable(encoder, - old_crtc_state, old_conn_state); - - intel_edp_drrs_disable(intel_dp, old_crtc_state); - intel_psr_disable(intel_dp, old_crtc_state); intel_edp_backlight_off(old_conn_state); /* Disable the decompression in DP Sink */ intel_dp_sink_set_decompression_state(intel_dp, old_crtc_state, @@ -3283,10 +3176,6 @@ static void intel_disable_ddi_hdmi(struct intel_atomic_state *state, struct drm_i915_private *i915 = to_i915(encoder->base.dev); struct drm_connector *connector = old_conn_state->connector; - if (old_crtc_state->has_audio) - intel_audio_codec_disable(encoder, - old_crtc_state, old_conn_state); - if (!intel_hdmi_handle_sink_scrambling(encoder, connector, false, false)) drm_dbg_kms(&i915->drm, @@ -3294,6 +3183,25 @@ static void intel_disable_ddi_hdmi(struct intel_atomic_state *state, connector->base.id, connector->name); } +static void intel_pre_disable_ddi(struct intel_atomic_state *state, + struct intel_encoder *encoder, + const struct intel_crtc_state *old_crtc_state, + const struct drm_connector_state *old_conn_state) +{ + struct intel_dp *intel_dp; + + if (old_crtc_state->has_audio) + intel_audio_codec_disable(encoder, old_crtc_state, + old_conn_state); + + if (intel_crtc_has_type(old_crtc_state, INTEL_OUTPUT_HDMI)) + return; + + intel_dp = enc_to_intel_dp(encoder); + intel_edp_drrs_disable(intel_dp, old_crtc_state); + intel_psr_disable(intel_dp, old_crtc_state); +} + static void intel_disable_ddi(struct intel_atomic_state *state, struct intel_encoder *encoder, const struct intel_crtc_state *old_crtc_state, @@ -3510,7 +3418,7 @@ static bool intel_ddi_is_audio_enabled(struct drm_i915_private *dev_priv, if (cpu_transcoder == TRANSCODER_EDP) return false; - if (!intel_display_power_is_enabled(dev_priv, POWER_DOMAIN_AUDIO)) + if (!intel_display_power_is_enabled(dev_priv, POWER_DOMAIN_AUDIO_MMIO)) return false; return intel_de_read(dev_priv, HSW_AUD_PIN_ELD_CP_VLD) & @@ -3526,8 +3434,6 @@ void intel_ddi_compute_min_voltage_level(struct drm_i915_private *dev_priv, crtc_state->min_voltage_level = 3; else if (DISPLAY_VER(dev_priv) >= 11 && crtc_state->port_clock > 594000) crtc_state->min_voltage_level = 1; - else if (IS_CANNONLAKE(dev_priv) && crtc_state->port_clock > 594000) - crtc_state->min_voltage_level = 2; } static enum transcoder bdw_transcoder_master_readout(struct drm_i915_private *dev_priv, @@ -3594,7 +3500,7 @@ static void intel_ddi_read_func_ctl(struct intel_encoder *encoder, struct intel_crtc_state *pipe_config) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - struct intel_crtc *intel_crtc = to_intel_crtc(pipe_config->uapi.crtc); + struct intel_crtc *crtc = to_intel_crtc(pipe_config->uapi.crtc); enum transcoder cpu_transcoder = pipe_config->cpu_transcoder; struct intel_digital_port *dig_port = enc_to_dig_port(encoder); u32 temp, flags = 0; @@ -3657,7 +3563,7 @@ static void intel_ddi_read_func_ctl(struct intel_encoder *encoder, pipe_config->output_types |= BIT(INTEL_OUTPUT_DP); pipe_config->lane_count = ((temp & DDI_PORT_WIDTH_MASK) >> DDI_PORT_WIDTH_SHIFT) + 1; - intel_dp_get_m_n(intel_crtc, pipe_config); + intel_dp_get_m_n(crtc, pipe_config); if (DISPLAY_VER(dev_priv) >= 11) { i915_reg_t dp_tp_ctl = dp_tp_ctl_reg(encoder, pipe_config); @@ -3687,7 +3593,7 @@ static void intel_ddi_read_func_ctl(struct intel_encoder *encoder, pipe_config->mst_master_transcoder = REG_FIELD_GET(TRANS_DDI_MST_TRANSPORT_SELECT_MASK, temp); - intel_dp_get_m_n(intel_crtc, pipe_config); + intel_dp_get_m_n(crtc, pipe_config); pipe_config->infoframes.enable |= intel_hdmi_infoframes_enabled(encoder, pipe_config); @@ -3801,6 +3707,15 @@ void intel_ddi_get_clock(struct intel_encoder *encoder, &crtc_state->dpll_hw_state); } +static void dg2_ddi_get_config(struct intel_encoder *encoder, + struct intel_crtc_state *crtc_state) +{ + intel_mpllb_readout_hw_state(encoder, &crtc_state->mpllb_state); + crtc_state->port_clock = intel_mpllb_calc_port_clock(encoder, &crtc_state->mpllb_state); + + intel_ddi_get_config(encoder, crtc_state); +} + static void adls_ddi_get_config(struct intel_encoder *encoder, struct intel_crtc_state *crtc_state) { @@ -3868,13 +3783,6 @@ static void icl_ddi_tc_get_config(struct intel_encoder *encoder, intel_ddi_get_config(encoder, crtc_state); } -static void cnl_ddi_get_config(struct intel_encoder *encoder, - struct intel_crtc_state *crtc_state) -{ - intel_ddi_get_clock(encoder, crtc_state, cnl_ddi_get_pll(encoder)); - intel_ddi_get_config(encoder, crtc_state); -} - static void bxt_ddi_get_config(struct intel_encoder *encoder, struct intel_crtc_state *crtc_state) { @@ -4121,12 +4029,12 @@ intel_ddi_init_dp_connector(struct intel_digital_port *dig_port) dig_port->dp.set_link_train = intel_ddi_set_link_train; dig_port->dp.set_idle_link_train = intel_ddi_set_idle_link_train; - if (DISPLAY_VER(dev_priv) >= 12) + if (IS_DG2(dev_priv)) + dig_port->dp.set_signal_levels = dg2_set_signal_levels; + else if (DISPLAY_VER(dev_priv) >= 12) dig_port->dp.set_signal_levels = tgl_set_signal_levels; else if (DISPLAY_VER(dev_priv) >= 11) dig_port->dp.set_signal_levels = icl_set_signal_levels; - else if (IS_CANNONLAKE(dev_priv)) - dig_port->dp.set_signal_levels = cnl_set_signal_levels; else if (IS_GEMINILAKE(dev_priv) || IS_BROXTON(dev_priv)) dig_port->dp.set_signal_levels = bxt_set_signal_levels; else @@ -4373,15 +4281,6 @@ static bool intel_ddi_a_force_4_lanes(struct intel_digital_port *dig_port) if (IS_GEMINILAKE(dev_priv) || IS_BROXTON(dev_priv)) return true; - /* Cannonlake: Most of SKUs don't support DDI_E, and the only - * one who does also have a full A/E split called - * DDI_F what makes DDI_E useless. However for this - * case let's trust VBT info. - */ - if (IS_CANNONLAKE(dev_priv) && - !intel_bios_is_port_present(dev_priv, PORT_E)) - return true; - return false; } @@ -4486,15 +4385,6 @@ static enum hpd_pin ehl_hpd_pin(struct drm_i915_private *dev_priv, return HPD_PORT_A + port - PORT_A; } -static enum hpd_pin cnl_hpd_pin(struct drm_i915_private *dev_priv, - enum port port) -{ - if (port == PORT_F) - return HPD_PORT_E; - - return HPD_PORT_A + port - PORT_A; -} - static enum hpd_pin skl_hpd_pin(struct drm_i915_private *dev_priv, enum port port) { if (HAS_PCH_TGP(dev_priv)) @@ -4513,6 +4403,36 @@ static bool intel_ddi_is_tc(struct drm_i915_private *i915, enum port port) return false; } +static void intel_ddi_encoder_suspend(struct intel_encoder *encoder) +{ + struct intel_dp *intel_dp = enc_to_intel_dp(encoder); + struct drm_i915_private *i915 = dp_to_i915(intel_dp); + struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp); + enum phy phy = intel_port_to_phy(i915, encoder->port); + + intel_dp_encoder_suspend(encoder); + + if (!intel_phy_is_tc(i915, phy)) + return; + + intel_tc_port_disconnect_phy(dig_port); +} + +static void intel_ddi_encoder_shutdown(struct intel_encoder *encoder) +{ + struct intel_dp *intel_dp = enc_to_intel_dp(encoder); + struct drm_i915_private *i915 = dp_to_i915(intel_dp); + struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp); + enum phy phy = intel_port_to_phy(i915, encoder->port); + + intel_dp_encoder_shutdown(encoder); + + if (!intel_phy_is_tc(i915, phy)) + return; + + intel_tc_port_disconnect_phy(dig_port); +} + #define port_tc_name(port) ((port) - PORT_TC1 + '1') #define tc_port_name(tc_port) ((tc_port) - TC_PORT_1 + '1') @@ -4616,14 +4536,15 @@ void intel_ddi_init(struct drm_i915_private *dev_priv, enum port port) encoder->enable = intel_enable_ddi; encoder->pre_pll_enable = intel_ddi_pre_pll_enable; encoder->pre_enable = intel_ddi_pre_enable; + encoder->pre_disable = intel_pre_disable_ddi; encoder->disable = intel_disable_ddi; encoder->post_disable = intel_ddi_post_disable; encoder->update_pipe = intel_ddi_update_pipe; encoder->get_hw_state = intel_ddi_get_hw_state; encoder->sync_state = intel_ddi_sync_state; encoder->initial_fastset_check = intel_ddi_initial_fastset_check; - encoder->suspend = intel_dp_encoder_suspend; - encoder->shutdown = intel_dp_encoder_shutdown; + encoder->suspend = intel_ddi_encoder_suspend; + encoder->shutdown = intel_ddi_encoder_shutdown; encoder->get_power_domains = intel_ddi_get_power_domains; encoder->type = INTEL_OUTPUT_DDI; @@ -4632,7 +4553,11 @@ void intel_ddi_init(struct drm_i915_private *dev_priv, enum port port) encoder->cloneable = 0; encoder->pipe_mask = ~0; - if (IS_ALDERLAKE_S(dev_priv)) { + if (IS_DG2(dev_priv)) { + encoder->enable_clock = intel_mpllb_enable; + encoder->disable_clock = intel_mpllb_disable; + encoder->get_config = dg2_ddi_get_config; + } else if (IS_ALDERLAKE_S(dev_priv)) { encoder->enable_clock = adls_ddi_enable_clock; encoder->disable_clock = adls_ddi_disable_clock; encoder->is_clock_enabled = adls_ddi_is_clock_enabled; @@ -4671,11 +4596,6 @@ void intel_ddi_init(struct drm_i915_private *dev_priv, enum port port) encoder->is_clock_enabled = icl_ddi_combo_is_clock_enabled; encoder->get_config = icl_ddi_combo_get_config; } - } else if (IS_CANNONLAKE(dev_priv)) { - encoder->enable_clock = cnl_ddi_enable_clock; - encoder->disable_clock = cnl_ddi_disable_clock; - encoder->is_clock_enabled = cnl_ddi_is_clock_enabled; - encoder->get_config = cnl_ddi_get_config; } else if (IS_GEMINILAKE(dev_priv) || IS_BROXTON(dev_priv)) { /* BXT/GLK have fixed PLL->port mapping */ encoder->get_config = bxt_ddi_get_config; @@ -4691,6 +4611,8 @@ void intel_ddi_init(struct drm_i915_private *dev_priv, enum port port) encoder->get_config = hsw_ddi_get_config; } + intel_ddi_buf_trans_init(encoder); + if (DISPLAY_VER(dev_priv) >= 13) encoder->hpd_pin = xelpd_hpd_pin(dev_priv, port); else if (IS_DG1(dev_priv)) @@ -4703,8 +4625,6 @@ void intel_ddi_init(struct drm_i915_private *dev_priv, enum port port) encoder->hpd_pin = ehl_hpd_pin(dev_priv, port); else if (DISPLAY_VER(dev_priv) == 11) encoder->hpd_pin = icl_hpd_pin(dev_priv, port); - else if (IS_CANNONLAKE(dev_priv)) - encoder->hpd_pin = cnl_hpd_pin(dev_priv, port); else if (DISPLAY_VER(dev_priv) == 9 && !IS_BROXTON(dev_priv)) encoder->hpd_pin = skl_hpd_pin(dev_priv, port); else diff --git a/drivers/gpu/drm/i915/display/intel_ddi.h b/drivers/gpu/drm/i915/display/intel_ddi.h index 59c6b01d4199..7d448485d887 100644 --- a/drivers/gpu/drm/i915/display/intel_ddi.h +++ b/drivers/gpu/drm/i915/display/intel_ddi.h @@ -40,8 +40,8 @@ bool hsw_ddi_is_clock_enabled(struct intel_encoder *encoder); void hsw_ddi_get_config(struct intel_encoder *encoder, struct intel_crtc_state *crtc_state); struct intel_shared_dpll *icl_ddi_combo_get_pll(struct intel_encoder *encoder); -void intel_prepare_dp_ddi_buffers(struct intel_encoder *encoder, - const struct intel_crtc_state *crtc_state); +void hsw_prepare_dp_ddi_buffers(struct intel_encoder *encoder, + const struct intel_crtc_state *crtc_state); void intel_wait_ddi_buf_idle(struct drm_i915_private *dev_priv, enum port port); void intel_ddi_init(struct drm_i915_private *dev_priv, enum port port); diff --git a/drivers/gpu/drm/i915/display/intel_ddi_buf_trans.c b/drivers/gpu/drm/i915/display/intel_ddi_buf_trans.c index 8bfd00f49f2a..ba2c08f1a797 100644 --- a/drivers/gpu/drm/i915/display/intel_ddi_buf_trans.c +++ b/drivers/gpu/drm/i915/display/intel_ddi_buf_trans.c @@ -13,1095 +13,1160 @@ * them for both DP and FDI transports, allowing those ports to * automatically adapt to HDMI connections as well */ -static const struct ddi_buf_trans hsw_ddi_translations_dp[] = { - { 0x00FFFFFF, 0x0006000E, 0x0 }, - { 0x00D75FFF, 0x0005000A, 0x0 }, - { 0x00C30FFF, 0x00040006, 0x0 }, - { 0x80AAAFFF, 0x000B0000, 0x0 }, - { 0x00FFFFFF, 0x0005000A, 0x0 }, - { 0x00D75FFF, 0x000C0004, 0x0 }, - { 0x80C30FFF, 0x000B0000, 0x0 }, - { 0x00FFFFFF, 0x00040006, 0x0 }, - { 0x80D75FFF, 0x000B0000, 0x0 }, -}; - -static const struct ddi_buf_trans hsw_ddi_translations_fdi[] = { - { 0x00FFFFFF, 0x0007000E, 0x0 }, - { 0x00D75FFF, 0x000F000A, 0x0 }, - { 0x00C30FFF, 0x00060006, 0x0 }, - { 0x00AAAFFF, 0x001E0000, 0x0 }, - { 0x00FFFFFF, 0x000F000A, 0x0 }, - { 0x00D75FFF, 0x00160004, 0x0 }, - { 0x00C30FFF, 0x001E0000, 0x0 }, - { 0x00FFFFFF, 0x00060006, 0x0 }, - { 0x00D75FFF, 0x001E0000, 0x0 }, -}; - -static const struct ddi_buf_trans hsw_ddi_translations_hdmi[] = { - /* Idx NT mV d T mV d db */ - { 0x00FFFFFF, 0x0006000E, 0x0 },/* 0: 400 400 0 */ - { 0x00E79FFF, 0x000E000C, 0x0 },/* 1: 400 500 2 */ - { 0x00D75FFF, 0x0005000A, 0x0 },/* 2: 400 600 3.5 */ - { 0x00FFFFFF, 0x0005000A, 0x0 },/* 3: 600 600 0 */ - { 0x00E79FFF, 0x001D0007, 0x0 },/* 4: 600 750 2 */ - { 0x00D75FFF, 0x000C0004, 0x0 },/* 5: 600 900 3.5 */ - { 0x00FFFFFF, 0x00040006, 0x0 },/* 6: 800 800 0 */ - { 0x80E79FFF, 0x00030002, 0x0 },/* 7: 800 1000 2 */ - { 0x00FFFFFF, 0x00140005, 0x0 },/* 8: 850 850 0 */ - { 0x00FFFFFF, 0x000C0004, 0x0 },/* 9: 900 900 0 */ - { 0x00FFFFFF, 0x001C0003, 0x0 },/* 10: 950 950 0 */ - { 0x80FFFFFF, 0x00030002, 0x0 },/* 11: 1000 1000 0 */ -}; - -static const struct ddi_buf_trans bdw_ddi_translations_edp[] = { - { 0x00FFFFFF, 0x00000012, 0x0 }, - { 0x00EBAFFF, 0x00020011, 0x0 }, - { 0x00C71FFF, 0x0006000F, 0x0 }, - { 0x00AAAFFF, 0x000E000A, 0x0 }, - { 0x00FFFFFF, 0x00020011, 0x0 }, - { 0x00DB6FFF, 0x0005000F, 0x0 }, - { 0x00BEEFFF, 0x000A000C, 0x0 }, - { 0x00FFFFFF, 0x0005000F, 0x0 }, - { 0x00DB6FFF, 0x000A000C, 0x0 }, -}; - -static const struct ddi_buf_trans bdw_ddi_translations_dp[] = { - { 0x00FFFFFF, 0x0007000E, 0x0 }, - { 0x00D75FFF, 0x000E000A, 0x0 }, - { 0x00BEFFFF, 0x00140006, 0x0 }, - { 0x80B2CFFF, 0x001B0002, 0x0 }, - { 0x00FFFFFF, 0x000E000A, 0x0 }, - { 0x00DB6FFF, 0x00160005, 0x0 }, - { 0x80C71FFF, 0x001A0002, 0x0 }, - { 0x00F7DFFF, 0x00180004, 0x0 }, - { 0x80D75FFF, 0x001B0002, 0x0 }, -}; - -static const struct ddi_buf_trans bdw_ddi_translations_fdi[] = { - { 0x00FFFFFF, 0x0001000E, 0x0 }, - { 0x00D75FFF, 0x0004000A, 0x0 }, - { 0x00C30FFF, 0x00070006, 0x0 }, - { 0x00AAAFFF, 0x000C0000, 0x0 }, - { 0x00FFFFFF, 0x0004000A, 0x0 }, - { 0x00D75FFF, 0x00090004, 0x0 }, - { 0x00C30FFF, 0x000C0000, 0x0 }, - { 0x00FFFFFF, 0x00070006, 0x0 }, - { 0x00D75FFF, 0x000C0000, 0x0 }, -}; - -static const struct ddi_buf_trans bdw_ddi_translations_hdmi[] = { - /* Idx NT mV d T mV df db */ - { 0x00FFFFFF, 0x0007000E, 0x0 },/* 0: 400 400 0 */ - { 0x00D75FFF, 0x000E000A, 0x0 },/* 1: 400 600 3.5 */ - { 0x00BEFFFF, 0x00140006, 0x0 },/* 2: 400 800 6 */ - { 0x00FFFFFF, 0x0009000D, 0x0 },/* 3: 450 450 0 */ - { 0x00FFFFFF, 0x000E000A, 0x0 },/* 4: 600 600 0 */ - { 0x00D7FFFF, 0x00140006, 0x0 },/* 5: 600 800 2.5 */ - { 0x80CB2FFF, 0x001B0002, 0x0 },/* 6: 600 1000 4.5 */ - { 0x00FFFFFF, 0x00140006, 0x0 },/* 7: 800 800 0 */ - { 0x80E79FFF, 0x001B0002, 0x0 },/* 8: 800 1000 2 */ - { 0x80FFFFFF, 0x001B0002, 0x0 },/* 9: 1000 1000 0 */ +static const union intel_ddi_buf_trans_entry _hsw_ddi_translations_dp[] = { + { .hsw = { 0x00FFFFFF, 0x0006000E, 0x0 } }, + { .hsw = { 0x00D75FFF, 0x0005000A, 0x0 } }, + { .hsw = { 0x00C30FFF, 0x00040006, 0x0 } }, + { .hsw = { 0x80AAAFFF, 0x000B0000, 0x0 } }, + { .hsw = { 0x00FFFFFF, 0x0005000A, 0x0 } }, + { .hsw = { 0x00D75FFF, 0x000C0004, 0x0 } }, + { .hsw = { 0x80C30FFF, 0x000B0000, 0x0 } }, + { .hsw = { 0x00FFFFFF, 0x00040006, 0x0 } }, + { .hsw = { 0x80D75FFF, 0x000B0000, 0x0 } }, +}; + +static const struct intel_ddi_buf_trans hsw_ddi_translations_dp = { + .entries = _hsw_ddi_translations_dp, + .num_entries = ARRAY_SIZE(_hsw_ddi_translations_dp), +}; + +static const union intel_ddi_buf_trans_entry _hsw_ddi_translations_fdi[] = { + { .hsw = { 0x00FFFFFF, 0x0007000E, 0x0 } }, + { .hsw = { 0x00D75FFF, 0x000F000A, 0x0 } }, + { .hsw = { 0x00C30FFF, 0x00060006, 0x0 } }, + { .hsw = { 0x00AAAFFF, 0x001E0000, 0x0 } }, + { .hsw = { 0x00FFFFFF, 0x000F000A, 0x0 } }, + { .hsw = { 0x00D75FFF, 0x00160004, 0x0 } }, + { .hsw = { 0x00C30FFF, 0x001E0000, 0x0 } }, + { .hsw = { 0x00FFFFFF, 0x00060006, 0x0 } }, + { .hsw = { 0x00D75FFF, 0x001E0000, 0x0 } }, +}; + +static const struct intel_ddi_buf_trans hsw_ddi_translations_fdi = { + .entries = _hsw_ddi_translations_fdi, + .num_entries = ARRAY_SIZE(_hsw_ddi_translations_fdi), +}; + +static const union intel_ddi_buf_trans_entry _hsw_ddi_translations_hdmi[] = { + /* Idx NT mV d T mV d db */ + { .hsw = { 0x00FFFFFF, 0x0006000E, 0x0 } }, /* 0: 400 400 0 */ + { .hsw = { 0x00E79FFF, 0x000E000C, 0x0 } }, /* 1: 400 500 2 */ + { .hsw = { 0x00D75FFF, 0x0005000A, 0x0 } }, /* 2: 400 600 3.5 */ + { .hsw = { 0x00FFFFFF, 0x0005000A, 0x0 } }, /* 3: 600 600 0 */ + { .hsw = { 0x00E79FFF, 0x001D0007, 0x0 } }, /* 4: 600 750 2 */ + { .hsw = { 0x00D75FFF, 0x000C0004, 0x0 } }, /* 5: 600 900 3.5 */ + { .hsw = { 0x00FFFFFF, 0x00040006, 0x0 } }, /* 6: 800 800 0 */ + { .hsw = { 0x80E79FFF, 0x00030002, 0x0 } }, /* 7: 800 1000 2 */ + { .hsw = { 0x00FFFFFF, 0x00140005, 0x0 } }, /* 8: 850 850 0 */ + { .hsw = { 0x00FFFFFF, 0x000C0004, 0x0 } }, /* 9: 900 900 0 */ + { .hsw = { 0x00FFFFFF, 0x001C0003, 0x0 } }, /* 10: 950 950 0 */ + { .hsw = { 0x80FFFFFF, 0x00030002, 0x0 } }, /* 11: 1000 1000 0 */ +}; + +static const struct intel_ddi_buf_trans hsw_ddi_translations_hdmi = { + .entries = _hsw_ddi_translations_hdmi, + .num_entries = ARRAY_SIZE(_hsw_ddi_translations_hdmi), + .hdmi_default_entry = 6, +}; + +static const union intel_ddi_buf_trans_entry _bdw_ddi_translations_edp[] = { + { .hsw = { 0x00FFFFFF, 0x00000012, 0x0 } }, + { .hsw = { 0x00EBAFFF, 0x00020011, 0x0 } }, + { .hsw = { 0x00C71FFF, 0x0006000F, 0x0 } }, + { .hsw = { 0x00AAAFFF, 0x000E000A, 0x0 } }, + { .hsw = { 0x00FFFFFF, 0x00020011, 0x0 } }, + { .hsw = { 0x00DB6FFF, 0x0005000F, 0x0 } }, + { .hsw = { 0x00BEEFFF, 0x000A000C, 0x0 } }, + { .hsw = { 0x00FFFFFF, 0x0005000F, 0x0 } }, + { .hsw = { 0x00DB6FFF, 0x000A000C, 0x0 } }, +}; + +static const struct intel_ddi_buf_trans bdw_ddi_translations_edp = { + .entries = _bdw_ddi_translations_edp, + .num_entries = ARRAY_SIZE(_bdw_ddi_translations_edp), +}; + +static const union intel_ddi_buf_trans_entry _bdw_ddi_translations_dp[] = { + { .hsw = { 0x00FFFFFF, 0x0007000E, 0x0 } }, + { .hsw = { 0x00D75FFF, 0x000E000A, 0x0 } }, + { .hsw = { 0x00BEFFFF, 0x00140006, 0x0 } }, + { .hsw = { 0x80B2CFFF, 0x001B0002, 0x0 } }, + { .hsw = { 0x00FFFFFF, 0x000E000A, 0x0 } }, + { .hsw = { 0x00DB6FFF, 0x00160005, 0x0 } }, + { .hsw = { 0x80C71FFF, 0x001A0002, 0x0 } }, + { .hsw = { 0x00F7DFFF, 0x00180004, 0x0 } }, + { .hsw = { 0x80D75FFF, 0x001B0002, 0x0 } }, +}; + +static const struct intel_ddi_buf_trans bdw_ddi_translations_dp = { + .entries = _bdw_ddi_translations_dp, + .num_entries = ARRAY_SIZE(_bdw_ddi_translations_dp), +}; + +static const union intel_ddi_buf_trans_entry _bdw_ddi_translations_fdi[] = { + { .hsw = { 0x00FFFFFF, 0x0001000E, 0x0 } }, + { .hsw = { 0x00D75FFF, 0x0004000A, 0x0 } }, + { .hsw = { 0x00C30FFF, 0x00070006, 0x0 } }, + { .hsw = { 0x00AAAFFF, 0x000C0000, 0x0 } }, + { .hsw = { 0x00FFFFFF, 0x0004000A, 0x0 } }, + { .hsw = { 0x00D75FFF, 0x00090004, 0x0 } }, + { .hsw = { 0x00C30FFF, 0x000C0000, 0x0 } }, + { .hsw = { 0x00FFFFFF, 0x00070006, 0x0 } }, + { .hsw = { 0x00D75FFF, 0x000C0000, 0x0 } }, +}; + +static const struct intel_ddi_buf_trans bdw_ddi_translations_fdi = { + .entries = _bdw_ddi_translations_fdi, + .num_entries = ARRAY_SIZE(_bdw_ddi_translations_fdi), +}; + +static const union intel_ddi_buf_trans_entry _bdw_ddi_translations_hdmi[] = { + /* Idx NT mV d T mV df db */ + { .hsw = { 0x00FFFFFF, 0x0007000E, 0x0 } }, /* 0: 400 400 0 */ + { .hsw = { 0x00D75FFF, 0x000E000A, 0x0 } }, /* 1: 400 600 3.5 */ + { .hsw = { 0x00BEFFFF, 0x00140006, 0x0 } }, /* 2: 400 800 6 */ + { .hsw = { 0x00FFFFFF, 0x0009000D, 0x0 } }, /* 3: 450 450 0 */ + { .hsw = { 0x00FFFFFF, 0x000E000A, 0x0 } }, /* 4: 600 600 0 */ + { .hsw = { 0x00D7FFFF, 0x00140006, 0x0 } }, /* 5: 600 800 2.5 */ + { .hsw = { 0x80CB2FFF, 0x001B0002, 0x0 } }, /* 6: 600 1000 4.5 */ + { .hsw = { 0x00FFFFFF, 0x00140006, 0x0 } }, /* 7: 800 800 0 */ + { .hsw = { 0x80E79FFF, 0x001B0002, 0x0 } }, /* 8: 800 1000 2 */ + { .hsw = { 0x80FFFFFF, 0x001B0002, 0x0 } }, /* 9: 1000 1000 0 */ +}; + +static const struct intel_ddi_buf_trans bdw_ddi_translations_hdmi = { + .entries = _bdw_ddi_translations_hdmi, + .num_entries = ARRAY_SIZE(_bdw_ddi_translations_hdmi), + .hdmi_default_entry = 7, }; /* Skylake H and S */ -static const struct ddi_buf_trans skl_ddi_translations_dp[] = { - { 0x00002016, 0x000000A0, 0x0 }, - { 0x00005012, 0x0000009B, 0x0 }, - { 0x00007011, 0x00000088, 0x0 }, - { 0x80009010, 0x000000C0, 0x1 }, - { 0x00002016, 0x0000009B, 0x0 }, - { 0x00005012, 0x00000088, 0x0 }, - { 0x80007011, 0x000000C0, 0x1 }, - { 0x00002016, 0x000000DF, 0x0 }, - { 0x80005012, 0x000000C0, 0x1 }, +static const union intel_ddi_buf_trans_entry _skl_ddi_translations_dp[] = { + { .hsw = { 0x00002016, 0x000000A0, 0x0 } }, + { .hsw = { 0x00005012, 0x0000009B, 0x0 } }, + { .hsw = { 0x00007011, 0x00000088, 0x0 } }, + { .hsw = { 0x80009010, 0x000000C0, 0x1 } }, + { .hsw = { 0x00002016, 0x0000009B, 0x0 } }, + { .hsw = { 0x00005012, 0x00000088, 0x0 } }, + { .hsw = { 0x80007011, 0x000000C0, 0x1 } }, + { .hsw = { 0x00002016, 0x000000DF, 0x0 } }, + { .hsw = { 0x80005012, 0x000000C0, 0x1 } }, +}; + +static const struct intel_ddi_buf_trans skl_ddi_translations_dp = { + .entries = _skl_ddi_translations_dp, + .num_entries = ARRAY_SIZE(_skl_ddi_translations_dp), }; /* Skylake U */ -static const struct ddi_buf_trans skl_u_ddi_translations_dp[] = { - { 0x0000201B, 0x000000A2, 0x0 }, - { 0x00005012, 0x00000088, 0x0 }, - { 0x80007011, 0x000000CD, 0x1 }, - { 0x80009010, 0x000000C0, 0x1 }, - { 0x0000201B, 0x0000009D, 0x0 }, - { 0x80005012, 0x000000C0, 0x1 }, - { 0x80007011, 0x000000C0, 0x1 }, - { 0x00002016, 0x00000088, 0x0 }, - { 0x80005012, 0x000000C0, 0x1 }, +static const union intel_ddi_buf_trans_entry _skl_u_ddi_translations_dp[] = { + { .hsw = { 0x0000201B, 0x000000A2, 0x0 } }, + { .hsw = { 0x00005012, 0x00000088, 0x0 } }, + { .hsw = { 0x80007011, 0x000000CD, 0x1 } }, + { .hsw = { 0x80009010, 0x000000C0, 0x1 } }, + { .hsw = { 0x0000201B, 0x0000009D, 0x0 } }, + { .hsw = { 0x80005012, 0x000000C0, 0x1 } }, + { .hsw = { 0x80007011, 0x000000C0, 0x1 } }, + { .hsw = { 0x00002016, 0x00000088, 0x0 } }, + { .hsw = { 0x80005012, 0x000000C0, 0x1 } }, +}; + +static const struct intel_ddi_buf_trans skl_u_ddi_translations_dp = { + .entries = _skl_u_ddi_translations_dp, + .num_entries = ARRAY_SIZE(_skl_u_ddi_translations_dp), }; /* Skylake Y */ -static const struct ddi_buf_trans skl_y_ddi_translations_dp[] = { - { 0x00000018, 0x000000A2, 0x0 }, - { 0x00005012, 0x00000088, 0x0 }, - { 0x80007011, 0x000000CD, 0x3 }, - { 0x80009010, 0x000000C0, 0x3 }, - { 0x00000018, 0x0000009D, 0x0 }, - { 0x80005012, 0x000000C0, 0x3 }, - { 0x80007011, 0x000000C0, 0x3 }, - { 0x00000018, 0x00000088, 0x0 }, - { 0x80005012, 0x000000C0, 0x3 }, +static const union intel_ddi_buf_trans_entry _skl_y_ddi_translations_dp[] = { + { .hsw = { 0x00000018, 0x000000A2, 0x0 } }, + { .hsw = { 0x00005012, 0x00000088, 0x0 } }, + { .hsw = { 0x80007011, 0x000000CD, 0x3 } }, + { .hsw = { 0x80009010, 0x000000C0, 0x3 } }, + { .hsw = { 0x00000018, 0x0000009D, 0x0 } }, + { .hsw = { 0x80005012, 0x000000C0, 0x3 } }, + { .hsw = { 0x80007011, 0x000000C0, 0x3 } }, + { .hsw = { 0x00000018, 0x00000088, 0x0 } }, + { .hsw = { 0x80005012, 0x000000C0, 0x3 } }, +}; + +static const struct intel_ddi_buf_trans skl_y_ddi_translations_dp = { + .entries = _skl_y_ddi_translations_dp, + .num_entries = ARRAY_SIZE(_skl_y_ddi_translations_dp), }; /* Kabylake H and S */ -static const struct ddi_buf_trans kbl_ddi_translations_dp[] = { - { 0x00002016, 0x000000A0, 0x0 }, - { 0x00005012, 0x0000009B, 0x0 }, - { 0x00007011, 0x00000088, 0x0 }, - { 0x80009010, 0x000000C0, 0x1 }, - { 0x00002016, 0x0000009B, 0x0 }, - { 0x00005012, 0x00000088, 0x0 }, - { 0x80007011, 0x000000C0, 0x1 }, - { 0x00002016, 0x00000097, 0x0 }, - { 0x80005012, 0x000000C0, 0x1 }, +static const union intel_ddi_buf_trans_entry _kbl_ddi_translations_dp[] = { + { .hsw = { 0x00002016, 0x000000A0, 0x0 } }, + { .hsw = { 0x00005012, 0x0000009B, 0x0 } }, + { .hsw = { 0x00007011, 0x00000088, 0x0 } }, + { .hsw = { 0x80009010, 0x000000C0, 0x1 } }, + { .hsw = { 0x00002016, 0x0000009B, 0x0 } }, + { .hsw = { 0x00005012, 0x00000088, 0x0 } }, + { .hsw = { 0x80007011, 0x000000C0, 0x1 } }, + { .hsw = { 0x00002016, 0x00000097, 0x0 } }, + { .hsw = { 0x80005012, 0x000000C0, 0x1 } }, +}; + +static const struct intel_ddi_buf_trans kbl_ddi_translations_dp = { + .entries = _kbl_ddi_translations_dp, + .num_entries = ARRAY_SIZE(_kbl_ddi_translations_dp), }; /* Kabylake U */ -static const struct ddi_buf_trans kbl_u_ddi_translations_dp[] = { - { 0x0000201B, 0x000000A1, 0x0 }, - { 0x00005012, 0x00000088, 0x0 }, - { 0x80007011, 0x000000CD, 0x3 }, - { 0x80009010, 0x000000C0, 0x3 }, - { 0x0000201B, 0x0000009D, 0x0 }, - { 0x80005012, 0x000000C0, 0x3 }, - { 0x80007011, 0x000000C0, 0x3 }, - { 0x00002016, 0x0000004F, 0x0 }, - { 0x80005012, 0x000000C0, 0x3 }, +static const union intel_ddi_buf_trans_entry _kbl_u_ddi_translations_dp[] = { + { .hsw = { 0x0000201B, 0x000000A1, 0x0 } }, + { .hsw = { 0x00005012, 0x00000088, 0x0 } }, + { .hsw = { 0x80007011, 0x000000CD, 0x3 } }, + { .hsw = { 0x80009010, 0x000000C0, 0x3 } }, + { .hsw = { 0x0000201B, 0x0000009D, 0x0 } }, + { .hsw = { 0x80005012, 0x000000C0, 0x3 } }, + { .hsw = { 0x80007011, 0x000000C0, 0x3 } }, + { .hsw = { 0x00002016, 0x0000004F, 0x0 } }, + { .hsw = { 0x80005012, 0x000000C0, 0x3 } }, +}; + +static const struct intel_ddi_buf_trans kbl_u_ddi_translations_dp = { + .entries = _kbl_u_ddi_translations_dp, + .num_entries = ARRAY_SIZE(_kbl_u_ddi_translations_dp), }; /* Kabylake Y */ -static const struct ddi_buf_trans kbl_y_ddi_translations_dp[] = { - { 0x00001017, 0x000000A1, 0x0 }, - { 0x00005012, 0x00000088, 0x0 }, - { 0x80007011, 0x000000CD, 0x3 }, - { 0x8000800F, 0x000000C0, 0x3 }, - { 0x00001017, 0x0000009D, 0x0 }, - { 0x80005012, 0x000000C0, 0x3 }, - { 0x80007011, 0x000000C0, 0x3 }, - { 0x00001017, 0x0000004C, 0x0 }, - { 0x80005012, 0x000000C0, 0x3 }, +static const union intel_ddi_buf_trans_entry _kbl_y_ddi_translations_dp[] = { + { .hsw = { 0x00001017, 0x000000A1, 0x0 } }, + { .hsw = { 0x00005012, 0x00000088, 0x0 } }, + { .hsw = { 0x80007011, 0x000000CD, 0x3 } }, + { .hsw = { 0x8000800F, 0x000000C0, 0x3 } }, + { .hsw = { 0x00001017, 0x0000009D, 0x0 } }, + { .hsw = { 0x80005012, 0x000000C0, 0x3 } }, + { .hsw = { 0x80007011, 0x000000C0, 0x3 } }, + { .hsw = { 0x00001017, 0x0000004C, 0x0 } }, + { .hsw = { 0x80005012, 0x000000C0, 0x3 } }, +}; + +static const struct intel_ddi_buf_trans kbl_y_ddi_translations_dp = { + .entries = _kbl_y_ddi_translations_dp, + .num_entries = ARRAY_SIZE(_kbl_y_ddi_translations_dp), }; /* * Skylake/Kabylake H and S * eDP 1.4 low vswing translation parameters */ -static const struct ddi_buf_trans skl_ddi_translations_edp[] = { - { 0x00000018, 0x000000A8, 0x0 }, - { 0x00004013, 0x000000A9, 0x0 }, - { 0x00007011, 0x000000A2, 0x0 }, - { 0x00009010, 0x0000009C, 0x0 }, - { 0x00000018, 0x000000A9, 0x0 }, - { 0x00006013, 0x000000A2, 0x0 }, - { 0x00007011, 0x000000A6, 0x0 }, - { 0x00000018, 0x000000AB, 0x0 }, - { 0x00007013, 0x0000009F, 0x0 }, - { 0x00000018, 0x000000DF, 0x0 }, +static const union intel_ddi_buf_trans_entry _skl_ddi_translations_edp[] = { + { .hsw = { 0x00000018, 0x000000A8, 0x0 } }, + { .hsw = { 0x00004013, 0x000000A9, 0x0 } }, + { .hsw = { 0x00007011, 0x000000A2, 0x0 } }, + { .hsw = { 0x00009010, 0x0000009C, 0x0 } }, + { .hsw = { 0x00000018, 0x000000A9, 0x0 } }, + { .hsw = { 0x00006013, 0x000000A2, 0x0 } }, + { .hsw = { 0x00007011, 0x000000A6, 0x0 } }, + { .hsw = { 0x00000018, 0x000000AB, 0x0 } }, + { .hsw = { 0x00007013, 0x0000009F, 0x0 } }, + { .hsw = { 0x00000018, 0x000000DF, 0x0 } }, +}; + +static const struct intel_ddi_buf_trans skl_ddi_translations_edp = { + .entries = _skl_ddi_translations_edp, + .num_entries = ARRAY_SIZE(_skl_ddi_translations_edp), }; /* * Skylake/Kabylake U * eDP 1.4 low vswing translation parameters */ -static const struct ddi_buf_trans skl_u_ddi_translations_edp[] = { - { 0x00000018, 0x000000A8, 0x0 }, - { 0x00004013, 0x000000A9, 0x0 }, - { 0x00007011, 0x000000A2, 0x0 }, - { 0x00009010, 0x0000009C, 0x0 }, - { 0x00000018, 0x000000A9, 0x0 }, - { 0x00006013, 0x000000A2, 0x0 }, - { 0x00007011, 0x000000A6, 0x0 }, - { 0x00002016, 0x000000AB, 0x0 }, - { 0x00005013, 0x0000009F, 0x0 }, - { 0x00000018, 0x000000DF, 0x0 }, +static const union intel_ddi_buf_trans_entry _skl_u_ddi_translations_edp[] = { + { .hsw = { 0x00000018, 0x000000A8, 0x0 } }, + { .hsw = { 0x00004013, 0x000000A9, 0x0 } }, + { .hsw = { 0x00007011, 0x000000A2, 0x0 } }, + { .hsw = { 0x00009010, 0x0000009C, 0x0 } }, + { .hsw = { 0x00000018, 0x000000A9, 0x0 } }, + { .hsw = { 0x00006013, 0x000000A2, 0x0 } }, + { .hsw = { 0x00007011, 0x000000A6, 0x0 } }, + { .hsw = { 0x00002016, 0x000000AB, 0x0 } }, + { .hsw = { 0x00005013, 0x0000009F, 0x0 } }, + { .hsw = { 0x00000018, 0x000000DF, 0x0 } }, +}; + +static const struct intel_ddi_buf_trans skl_u_ddi_translations_edp = { + .entries = _skl_u_ddi_translations_edp, + .num_entries = ARRAY_SIZE(_skl_u_ddi_translations_edp), }; /* * Skylake/Kabylake Y * eDP 1.4 low vswing translation parameters */ -static const struct ddi_buf_trans skl_y_ddi_translations_edp[] = { - { 0x00000018, 0x000000A8, 0x0 }, - { 0x00004013, 0x000000AB, 0x0 }, - { 0x00007011, 0x000000A4, 0x0 }, - { 0x00009010, 0x000000DF, 0x0 }, - { 0x00000018, 0x000000AA, 0x0 }, - { 0x00006013, 0x000000A4, 0x0 }, - { 0x00007011, 0x0000009D, 0x0 }, - { 0x00000018, 0x000000A0, 0x0 }, - { 0x00006012, 0x000000DF, 0x0 }, - { 0x00000018, 0x0000008A, 0x0 }, +static const union intel_ddi_buf_trans_entry _skl_y_ddi_translations_edp[] = { + { .hsw = { 0x00000018, 0x000000A8, 0x0 } }, + { .hsw = { 0x00004013, 0x000000AB, 0x0 } }, + { .hsw = { 0x00007011, 0x000000A4, 0x0 } }, + { .hsw = { 0x00009010, 0x000000DF, 0x0 } }, + { .hsw = { 0x00000018, 0x000000AA, 0x0 } }, + { .hsw = { 0x00006013, 0x000000A4, 0x0 } }, + { .hsw = { 0x00007011, 0x0000009D, 0x0 } }, + { .hsw = { 0x00000018, 0x000000A0, 0x0 } }, + { .hsw = { 0x00006012, 0x000000DF, 0x0 } }, + { .hsw = { 0x00000018, 0x0000008A, 0x0 } }, +}; + +static const struct intel_ddi_buf_trans skl_y_ddi_translations_edp = { + .entries = _skl_y_ddi_translations_edp, + .num_entries = ARRAY_SIZE(_skl_y_ddi_translations_edp), }; /* Skylake/Kabylake U, H and S */ -static const struct ddi_buf_trans skl_ddi_translations_hdmi[] = { - { 0x00000018, 0x000000AC, 0x0 }, - { 0x00005012, 0x0000009D, 0x0 }, - { 0x00007011, 0x00000088, 0x0 }, - { 0x00000018, 0x000000A1, 0x0 }, - { 0x00000018, 0x00000098, 0x0 }, - { 0x00004013, 0x00000088, 0x0 }, - { 0x80006012, 0x000000CD, 0x1 }, - { 0x00000018, 0x000000DF, 0x0 }, - { 0x80003015, 0x000000CD, 0x1 }, /* Default */ - { 0x80003015, 0x000000C0, 0x1 }, - { 0x80000018, 0x000000C0, 0x1 }, +static const union intel_ddi_buf_trans_entry _skl_ddi_translations_hdmi[] = { + { .hsw = { 0x00000018, 0x000000AC, 0x0 } }, + { .hsw = { 0x00005012, 0x0000009D, 0x0 } }, + { .hsw = { 0x00007011, 0x00000088, 0x0 } }, + { .hsw = { 0x00000018, 0x000000A1, 0x0 } }, + { .hsw = { 0x00000018, 0x00000098, 0x0 } }, + { .hsw = { 0x00004013, 0x00000088, 0x0 } }, + { .hsw = { 0x80006012, 0x000000CD, 0x1 } }, + { .hsw = { 0x00000018, 0x000000DF, 0x0 } }, + { .hsw = { 0x80003015, 0x000000CD, 0x1 } }, /* Default */ + { .hsw = { 0x80003015, 0x000000C0, 0x1 } }, + { .hsw = { 0x80000018, 0x000000C0, 0x1 } }, +}; + +static const struct intel_ddi_buf_trans skl_ddi_translations_hdmi = { + .entries = _skl_ddi_translations_hdmi, + .num_entries = ARRAY_SIZE(_skl_ddi_translations_hdmi), + .hdmi_default_entry = 8, }; /* Skylake/Kabylake Y */ -static const struct ddi_buf_trans skl_y_ddi_translations_hdmi[] = { - { 0x00000018, 0x000000A1, 0x0 }, - { 0x00005012, 0x000000DF, 0x0 }, - { 0x80007011, 0x000000CB, 0x3 }, - { 0x00000018, 0x000000A4, 0x0 }, - { 0x00000018, 0x0000009D, 0x0 }, - { 0x00004013, 0x00000080, 0x0 }, - { 0x80006013, 0x000000C0, 0x3 }, - { 0x00000018, 0x0000008A, 0x0 }, - { 0x80003015, 0x000000C0, 0x3 }, /* Default */ - { 0x80003015, 0x000000C0, 0x3 }, - { 0x80000018, 0x000000C0, 0x3 }, -}; - - -static const struct bxt_ddi_buf_trans bxt_ddi_translations_dp[] = { - /* Idx NT mV diff db */ - { 52, 0x9A, 0, 128, }, /* 0: 400 0 */ - { 78, 0x9A, 0, 85, }, /* 1: 400 3.5 */ - { 104, 0x9A, 0, 64, }, /* 2: 400 6 */ - { 154, 0x9A, 0, 43, }, /* 3: 400 9.5 */ - { 77, 0x9A, 0, 128, }, /* 4: 600 0 */ - { 116, 0x9A, 0, 85, }, /* 5: 600 3.5 */ - { 154, 0x9A, 0, 64, }, /* 6: 600 6 */ - { 102, 0x9A, 0, 128, }, /* 7: 800 0 */ - { 154, 0x9A, 0, 85, }, /* 8: 800 3.5 */ - { 154, 0x9A, 1, 128, }, /* 9: 1200 0 */ -}; - -static const struct bxt_ddi_buf_trans bxt_ddi_translations_edp[] = { +static const union intel_ddi_buf_trans_entry _skl_y_ddi_translations_hdmi[] = { + { .hsw = { 0x00000018, 0x000000A1, 0x0 } }, + { .hsw = { 0x00005012, 0x000000DF, 0x0 } }, + { .hsw = { 0x80007011, 0x000000CB, 0x3 } }, + { .hsw = { 0x00000018, 0x000000A4, 0x0 } }, + { .hsw = { 0x00000018, 0x0000009D, 0x0 } }, + { .hsw = { 0x00004013, 0x00000080, 0x0 } }, + { .hsw = { 0x80006013, 0x000000C0, 0x3 } }, + { .hsw = { 0x00000018, 0x0000008A, 0x0 } }, + { .hsw = { 0x80003015, 0x000000C0, 0x3 } }, /* Default */ + { .hsw = { 0x80003015, 0x000000C0, 0x3 } }, + { .hsw = { 0x80000018, 0x000000C0, 0x3 } }, +}; + +static const struct intel_ddi_buf_trans skl_y_ddi_translations_hdmi = { + .entries = _skl_y_ddi_translations_hdmi, + .num_entries = ARRAY_SIZE(_skl_y_ddi_translations_hdmi), + .hdmi_default_entry = 8, +}; + +static const union intel_ddi_buf_trans_entry _bxt_ddi_translations_dp[] = { + /* Idx NT mV diff db */ + { .bxt = { 52, 0x9A, 0, 128, } }, /* 0: 400 0 */ + { .bxt = { 78, 0x9A, 0, 85, } }, /* 1: 400 3.5 */ + { .bxt = { 104, 0x9A, 0, 64, } }, /* 2: 400 6 */ + { .bxt = { 154, 0x9A, 0, 43, } }, /* 3: 400 9.5 */ + { .bxt = { 77, 0x9A, 0, 128, } }, /* 4: 600 0 */ + { .bxt = { 116, 0x9A, 0, 85, } }, /* 5: 600 3.5 */ + { .bxt = { 154, 0x9A, 0, 64, } }, /* 6: 600 6 */ + { .bxt = { 102, 0x9A, 0, 128, } }, /* 7: 800 0 */ + { .bxt = { 154, 0x9A, 0, 85, } }, /* 8: 800 3.5 */ + { .bxt = { 154, 0x9A, 1, 128, } }, /* 9: 1200 0 */ +}; + +static const struct intel_ddi_buf_trans bxt_ddi_translations_dp = { + .entries = _bxt_ddi_translations_dp, + .num_entries = ARRAY_SIZE(_bxt_ddi_translations_dp), +}; + +static const union intel_ddi_buf_trans_entry _bxt_ddi_translations_edp[] = { /* Idx NT mV diff db */ - { 26, 0, 0, 128, }, /* 0: 200 0 */ - { 38, 0, 0, 112, }, /* 1: 200 1.5 */ - { 48, 0, 0, 96, }, /* 2: 200 4 */ - { 54, 0, 0, 69, }, /* 3: 200 6 */ - { 32, 0, 0, 128, }, /* 4: 250 0 */ - { 48, 0, 0, 104, }, /* 5: 250 1.5 */ - { 54, 0, 0, 85, }, /* 6: 250 4 */ - { 43, 0, 0, 128, }, /* 7: 300 0 */ - { 54, 0, 0, 101, }, /* 8: 300 1.5 */ - { 48, 0, 0, 128, }, /* 9: 300 0 */ + { .bxt = { 26, 0, 0, 128, } }, /* 0: 200 0 */ + { .bxt = { 38, 0, 0, 112, } }, /* 1: 200 1.5 */ + { .bxt = { 48, 0, 0, 96, } }, /* 2: 200 4 */ + { .bxt = { 54, 0, 0, 69, } }, /* 3: 200 6 */ + { .bxt = { 32, 0, 0, 128, } }, /* 4: 250 0 */ + { .bxt = { 48, 0, 0, 104, } }, /* 5: 250 1.5 */ + { .bxt = { 54, 0, 0, 85, } }, /* 6: 250 4 */ + { .bxt = { 43, 0, 0, 128, } }, /* 7: 300 0 */ + { .bxt = { 54, 0, 0, 101, } }, /* 8: 300 1.5 */ + { .bxt = { 48, 0, 0, 128, } }, /* 9: 300 0 */ +}; + +static const struct intel_ddi_buf_trans bxt_ddi_translations_edp = { + .entries = _bxt_ddi_translations_edp, + .num_entries = ARRAY_SIZE(_bxt_ddi_translations_edp), }; /* BSpec has 2 recommended values - entries 0 and 8. * Using the entry with higher vswing. */ -static const struct bxt_ddi_buf_trans bxt_ddi_translations_hdmi[] = { - /* Idx NT mV diff db */ - { 52, 0x9A, 0, 128, }, /* 0: 400 0 */ - { 52, 0x9A, 0, 85, }, /* 1: 400 3.5 */ - { 52, 0x9A, 0, 64, }, /* 2: 400 6 */ - { 42, 0x9A, 0, 43, }, /* 3: 400 9.5 */ - { 77, 0x9A, 0, 128, }, /* 4: 600 0 */ - { 77, 0x9A, 0, 85, }, /* 5: 600 3.5 */ - { 77, 0x9A, 0, 64, }, /* 6: 600 6 */ - { 102, 0x9A, 0, 128, }, /* 7: 800 0 */ - { 102, 0x9A, 0, 85, }, /* 8: 800 3.5 */ - { 154, 0x9A, 1, 128, }, /* 9: 1200 0 */ -}; - -/* Voltage Swing Programming for VccIO 0.85V for DP */ -static const struct cnl_ddi_buf_trans cnl_ddi_translations_dp_0_85V[] = { - /* NT mV Trans mV db */ - { 0xA, 0x5D, 0x3F, 0x00, 0x00 }, /* 350 350 0.0 */ - { 0xA, 0x6A, 0x38, 0x00, 0x07 }, /* 350 500 3.1 */ - { 0xB, 0x7A, 0x32, 0x00, 0x0D }, /* 350 700 6.0 */ - { 0x6, 0x7C, 0x2D, 0x00, 0x12 }, /* 350 900 8.2 */ - { 0xA, 0x69, 0x3F, 0x00, 0x00 }, /* 500 500 0.0 */ - { 0xB, 0x7A, 0x36, 0x00, 0x09 }, /* 500 700 2.9 */ - { 0x6, 0x7C, 0x30, 0x00, 0x0F }, /* 500 900 5.1 */ - { 0xB, 0x7D, 0x3C, 0x00, 0x03 }, /* 650 725 0.9 */ - { 0x6, 0x7C, 0x34, 0x00, 0x0B }, /* 600 900 3.5 */ - { 0x6, 0x7B, 0x3F, 0x00, 0x00 }, /* 900 900 0.0 */ -}; - -/* Voltage Swing Programming for VccIO 0.85V for HDMI */ -static const struct cnl_ddi_buf_trans cnl_ddi_translations_hdmi_0_85V[] = { - /* NT mV Trans mV db */ - { 0xA, 0x60, 0x3F, 0x00, 0x00 }, /* 450 450 0.0 */ - { 0xB, 0x73, 0x36, 0x00, 0x09 }, /* 450 650 3.2 */ - { 0x6, 0x7F, 0x31, 0x00, 0x0E }, /* 450 850 5.5 */ - { 0xB, 0x73, 0x3F, 0x00, 0x00 }, /* 650 650 0.0 */ - { 0x6, 0x7F, 0x37, 0x00, 0x08 }, /* 650 850 2.3 */ - { 0x6, 0x7F, 0x3F, 0x00, 0x00 }, /* 850 850 0.0 */ - { 0x6, 0x7F, 0x35, 0x00, 0x0A }, /* 600 850 3.0 */ -}; - -/* Voltage Swing Programming for VccIO 0.85V for eDP */ -static const struct cnl_ddi_buf_trans cnl_ddi_translations_edp_0_85V[] = { - /* NT mV Trans mV db */ - { 0xA, 0x66, 0x3A, 0x00, 0x05 }, /* 384 500 2.3 */ - { 0x0, 0x7F, 0x38, 0x00, 0x07 }, /* 153 200 2.3 */ - { 0x8, 0x7F, 0x38, 0x00, 0x07 }, /* 192 250 2.3 */ - { 0x1, 0x7F, 0x38, 0x00, 0x07 }, /* 230 300 2.3 */ - { 0x9, 0x7F, 0x38, 0x00, 0x07 }, /* 269 350 2.3 */ - { 0xA, 0x66, 0x3C, 0x00, 0x03 }, /* 446 500 1.0 */ - { 0xB, 0x70, 0x3C, 0x00, 0x03 }, /* 460 600 2.3 */ - { 0xC, 0x75, 0x3C, 0x00, 0x03 }, /* 537 700 2.3 */ - { 0x2, 0x7F, 0x3F, 0x00, 0x00 }, /* 400 400 0.0 */ -}; - -/* Voltage Swing Programming for VccIO 0.95V for DP */ -static const struct cnl_ddi_buf_trans cnl_ddi_translations_dp_0_95V[] = { - /* NT mV Trans mV db */ - { 0xA, 0x5D, 0x3F, 0x00, 0x00 }, /* 350 350 0.0 */ - { 0xA, 0x6A, 0x38, 0x00, 0x07 }, /* 350 500 3.1 */ - { 0xB, 0x7A, 0x32, 0x00, 0x0D }, /* 350 700 6.0 */ - { 0x6, 0x7C, 0x2D, 0x00, 0x12 }, /* 350 900 8.2 */ - { 0xA, 0x69, 0x3F, 0x00, 0x00 }, /* 500 500 0.0 */ - { 0xB, 0x7A, 0x36, 0x00, 0x09 }, /* 500 700 2.9 */ - { 0x6, 0x7C, 0x30, 0x00, 0x0F }, /* 500 900 5.1 */ - { 0xB, 0x7D, 0x3C, 0x00, 0x03 }, /* 650 725 0.9 */ - { 0x6, 0x7C, 0x34, 0x00, 0x0B }, /* 600 900 3.5 */ - { 0x6, 0x7B, 0x3F, 0x00, 0x00 }, /* 900 900 0.0 */ -}; - -/* Voltage Swing Programming for VccIO 0.95V for HDMI */ -static const struct cnl_ddi_buf_trans cnl_ddi_translations_hdmi_0_95V[] = { - /* NT mV Trans mV db */ - { 0xA, 0x5C, 0x3F, 0x00, 0x00 }, /* 400 400 0.0 */ - { 0xB, 0x69, 0x37, 0x00, 0x08 }, /* 400 600 3.5 */ - { 0x5, 0x76, 0x31, 0x00, 0x0E }, /* 400 800 6.0 */ - { 0xA, 0x5E, 0x3F, 0x00, 0x00 }, /* 450 450 0.0 */ - { 0xB, 0x69, 0x3F, 0x00, 0x00 }, /* 600 600 0.0 */ - { 0xB, 0x79, 0x35, 0x00, 0x0A }, /* 600 850 3.0 */ - { 0x6, 0x7D, 0x32, 0x00, 0x0D }, /* 600 1000 4.4 */ - { 0x5, 0x76, 0x3F, 0x00, 0x00 }, /* 800 800 0.0 */ - { 0x6, 0x7D, 0x39, 0x00, 0x06 }, /* 800 1000 1.9 */ - { 0x6, 0x7F, 0x39, 0x00, 0x06 }, /* 850 1050 1.8 */ - { 0x6, 0x7F, 0x3F, 0x00, 0x00 }, /* 1050 1050 0.0 */ -}; - -/* Voltage Swing Programming for VccIO 0.95V for eDP */ -static const struct cnl_ddi_buf_trans cnl_ddi_translations_edp_0_95V[] = { - /* NT mV Trans mV db */ - { 0xA, 0x61, 0x3A, 0x00, 0x05 }, /* 384 500 2.3 */ - { 0x0, 0x7F, 0x38, 0x00, 0x07 }, /* 153 200 2.3 */ - { 0x8, 0x7F, 0x38, 0x00, 0x07 }, /* 192 250 2.3 */ - { 0x1, 0x7F, 0x38, 0x00, 0x07 }, /* 230 300 2.3 */ - { 0x9, 0x7F, 0x38, 0x00, 0x07 }, /* 269 350 2.3 */ - { 0xA, 0x61, 0x3C, 0x00, 0x03 }, /* 446 500 1.0 */ - { 0xB, 0x68, 0x39, 0x00, 0x06 }, /* 460 600 2.3 */ - { 0xC, 0x6E, 0x39, 0x00, 0x06 }, /* 537 700 2.3 */ - { 0x4, 0x7F, 0x3A, 0x00, 0x05 }, /* 460 600 2.3 */ - { 0x2, 0x7F, 0x3F, 0x00, 0x00 }, /* 400 400 0.0 */ -}; - -/* Voltage Swing Programming for VccIO 1.05V for DP */ -static const struct cnl_ddi_buf_trans cnl_ddi_translations_dp_1_05V[] = { - /* NT mV Trans mV db */ - { 0xA, 0x58, 0x3F, 0x00, 0x00 }, /* 400 400 0.0 */ - { 0xB, 0x64, 0x37, 0x00, 0x08 }, /* 400 600 3.5 */ - { 0x5, 0x70, 0x31, 0x00, 0x0E }, /* 400 800 6.0 */ - { 0x6, 0x7F, 0x2C, 0x00, 0x13 }, /* 400 1050 8.4 */ - { 0xB, 0x64, 0x3F, 0x00, 0x00 }, /* 600 600 0.0 */ - { 0x5, 0x73, 0x35, 0x00, 0x0A }, /* 600 850 3.0 */ - { 0x6, 0x7F, 0x30, 0x00, 0x0F }, /* 550 1050 5.6 */ - { 0x5, 0x76, 0x3E, 0x00, 0x01 }, /* 850 900 0.5 */ - { 0x6, 0x7F, 0x36, 0x00, 0x09 }, /* 750 1050 2.9 */ - { 0x6, 0x7F, 0x3F, 0x00, 0x00 }, /* 1050 1050 0.0 */ -}; - -/* Voltage Swing Programming for VccIO 1.05V for HDMI */ -static const struct cnl_ddi_buf_trans cnl_ddi_translations_hdmi_1_05V[] = { - /* NT mV Trans mV db */ - { 0xA, 0x58, 0x3F, 0x00, 0x00 }, /* 400 400 0.0 */ - { 0xB, 0x64, 0x37, 0x00, 0x08 }, /* 400 600 3.5 */ - { 0x5, 0x70, 0x31, 0x00, 0x0E }, /* 400 800 6.0 */ - { 0xA, 0x5B, 0x3F, 0x00, 0x00 }, /* 450 450 0.0 */ - { 0xB, 0x64, 0x3F, 0x00, 0x00 }, /* 600 600 0.0 */ - { 0x5, 0x73, 0x35, 0x00, 0x0A }, /* 600 850 3.0 */ - { 0x6, 0x7C, 0x32, 0x00, 0x0D }, /* 600 1000 4.4 */ - { 0x5, 0x70, 0x3F, 0x00, 0x00 }, /* 800 800 0.0 */ - { 0x6, 0x7C, 0x39, 0x00, 0x06 }, /* 800 1000 1.9 */ - { 0x6, 0x7F, 0x39, 0x00, 0x06 }, /* 850 1050 1.8 */ - { 0x6, 0x7F, 0x3F, 0x00, 0x00 }, /* 1050 1050 0.0 */ -}; - -/* Voltage Swing Programming for VccIO 1.05V for eDP */ -static const struct cnl_ddi_buf_trans cnl_ddi_translations_edp_1_05V[] = { - /* NT mV Trans mV db */ - { 0xA, 0x5E, 0x3A, 0x00, 0x05 }, /* 384 500 2.3 */ - { 0x0, 0x7F, 0x38, 0x00, 0x07 }, /* 153 200 2.3 */ - { 0x8, 0x7F, 0x38, 0x00, 0x07 }, /* 192 250 2.3 */ - { 0x1, 0x7F, 0x38, 0x00, 0x07 }, /* 230 300 2.3 */ - { 0x9, 0x7F, 0x38, 0x00, 0x07 }, /* 269 350 2.3 */ - { 0xA, 0x5E, 0x3C, 0x00, 0x03 }, /* 446 500 1.0 */ - { 0xB, 0x64, 0x39, 0x00, 0x06 }, /* 460 600 2.3 */ - { 0xE, 0x6A, 0x39, 0x00, 0x06 }, /* 537 700 2.3 */ - { 0x2, 0x7F, 0x3F, 0x00, 0x00 }, /* 400 400 0.0 */ +static const union intel_ddi_buf_trans_entry _bxt_ddi_translations_hdmi[] = { + /* Idx NT mV diff db */ + { .bxt = { 52, 0x9A, 0, 128, } }, /* 0: 400 0 */ + { .bxt = { 52, 0x9A, 0, 85, } }, /* 1: 400 3.5 */ + { .bxt = { 52, 0x9A, 0, 64, } }, /* 2: 400 6 */ + { .bxt = { 42, 0x9A, 0, 43, } }, /* 3: 400 9.5 */ + { .bxt = { 77, 0x9A, 0, 128, } }, /* 4: 600 0 */ + { .bxt = { 77, 0x9A, 0, 85, } }, /* 5: 600 3.5 */ + { .bxt = { 77, 0x9A, 0, 64, } }, /* 6: 600 6 */ + { .bxt = { 102, 0x9A, 0, 128, } }, /* 7: 800 0 */ + { .bxt = { 102, 0x9A, 0, 85, } }, /* 8: 800 3.5 */ + { .bxt = { 154, 0x9A, 1, 128, } }, /* 9: 1200 0 */ +}; + +static const struct intel_ddi_buf_trans bxt_ddi_translations_hdmi = { + .entries = _bxt_ddi_translations_hdmi, + .num_entries = ARRAY_SIZE(_bxt_ddi_translations_hdmi), + .hdmi_default_entry = ARRAY_SIZE(_bxt_ddi_translations_hdmi) - 1, }; /* icl_combo_phy_ddi_translations */ -static const struct cnl_ddi_buf_trans icl_combo_phy_ddi_translations_dp_hbr2[] = { - /* NT mV Trans mV db */ - { 0xA, 0x35, 0x3F, 0x00, 0x00 }, /* 350 350 0.0 */ - { 0xA, 0x4F, 0x37, 0x00, 0x08 }, /* 350 500 3.1 */ - { 0xC, 0x71, 0x2F, 0x00, 0x10 }, /* 350 700 6.0 */ - { 0x6, 0x7F, 0x2B, 0x00, 0x14 }, /* 350 900 8.2 */ - { 0xA, 0x4C, 0x3F, 0x00, 0x00 }, /* 500 500 0.0 */ - { 0xC, 0x73, 0x34, 0x00, 0x0B }, /* 500 700 2.9 */ - { 0x6, 0x7F, 0x2F, 0x00, 0x10 }, /* 500 900 5.1 */ - { 0xC, 0x6C, 0x3C, 0x00, 0x03 }, /* 650 700 0.6 */ - { 0x6, 0x7F, 0x35, 0x00, 0x0A }, /* 600 900 3.5 */ - { 0x6, 0x7F, 0x3F, 0x00, 0x00 }, /* 900 900 0.0 */ -}; - -static const struct cnl_ddi_buf_trans icl_combo_phy_ddi_translations_edp_hbr2[] = { - /* NT mV Trans mV db */ - { 0x0, 0x7F, 0x3F, 0x00, 0x00 }, /* 200 200 0.0 */ - { 0x8, 0x7F, 0x38, 0x00, 0x07 }, /* 200 250 1.9 */ - { 0x1, 0x7F, 0x33, 0x00, 0x0C }, /* 200 300 3.5 */ - { 0x9, 0x7F, 0x31, 0x00, 0x0E }, /* 200 350 4.9 */ - { 0x8, 0x7F, 0x3F, 0x00, 0x00 }, /* 250 250 0.0 */ - { 0x1, 0x7F, 0x38, 0x00, 0x07 }, /* 250 300 1.6 */ - { 0x9, 0x7F, 0x35, 0x00, 0x0A }, /* 250 350 2.9 */ - { 0x1, 0x7F, 0x3F, 0x00, 0x00 }, /* 300 300 0.0 */ - { 0x9, 0x7F, 0x38, 0x00, 0x07 }, /* 300 350 1.3 */ - { 0x9, 0x7F, 0x3F, 0x00, 0x00 }, /* 350 350 0.0 */ -}; - -static const struct cnl_ddi_buf_trans icl_combo_phy_ddi_translations_edp_hbr3[] = { - /* NT mV Trans mV db */ - { 0xA, 0x35, 0x3F, 0x00, 0x00 }, /* 350 350 0.0 */ - { 0xA, 0x4F, 0x37, 0x00, 0x08 }, /* 350 500 3.1 */ - { 0xC, 0x71, 0x2F, 0x00, 0x10 }, /* 350 700 6.0 */ - { 0x6, 0x7F, 0x2B, 0x00, 0x14 }, /* 350 900 8.2 */ - { 0xA, 0x4C, 0x3F, 0x00, 0x00 }, /* 500 500 0.0 */ - { 0xC, 0x73, 0x34, 0x00, 0x0B }, /* 500 700 2.9 */ - { 0x6, 0x7F, 0x2F, 0x00, 0x10 }, /* 500 900 5.1 */ - { 0xC, 0x6C, 0x3C, 0x00, 0x03 }, /* 650 700 0.6 */ - { 0x6, 0x7F, 0x35, 0x00, 0x0A }, /* 600 900 3.5 */ - { 0x6, 0x7F, 0x3F, 0x00, 0x00 }, /* 900 900 0.0 */ -}; - -static const struct cnl_ddi_buf_trans icl_combo_phy_ddi_translations_hdmi[] = { - /* NT mV Trans mV db */ - { 0xA, 0x60, 0x3F, 0x00, 0x00 }, /* 450 450 0.0 */ - { 0xB, 0x73, 0x36, 0x00, 0x09 }, /* 450 650 3.2 */ - { 0x6, 0x7F, 0x31, 0x00, 0x0E }, /* 450 850 5.5 */ - { 0xB, 0x73, 0x3F, 0x00, 0x00 }, /* 650 650 0.0 ALS */ - { 0x6, 0x7F, 0x37, 0x00, 0x08 }, /* 650 850 2.3 */ - { 0x6, 0x7F, 0x3F, 0x00, 0x00 }, /* 850 850 0.0 */ - { 0x6, 0x7F, 0x35, 0x00, 0x0A }, /* 600 850 3.0 */ -}; - -static const struct cnl_ddi_buf_trans ehl_combo_phy_ddi_translations_dp[] = { - /* NT mV Trans mV db */ - { 0xA, 0x33, 0x3F, 0x00, 0x00 }, /* 350 350 0.0 */ - { 0xA, 0x47, 0x36, 0x00, 0x09 }, /* 350 500 3.1 */ - { 0xC, 0x64, 0x34, 0x00, 0x0B }, /* 350 700 6.0 */ - { 0x6, 0x7F, 0x30, 0x00, 0x0F }, /* 350 900 8.2 */ - { 0xA, 0x46, 0x3F, 0x00, 0x00 }, /* 500 500 0.0 */ - { 0xC, 0x64, 0x38, 0x00, 0x07 }, /* 500 700 2.9 */ - { 0x6, 0x7F, 0x32, 0x00, 0x0D }, /* 500 900 5.1 */ - { 0xC, 0x61, 0x3F, 0x00, 0x00 }, /* 650 700 0.6 */ - { 0x6, 0x7F, 0x38, 0x00, 0x07 }, /* 600 900 3.5 */ - { 0x6, 0x7F, 0x3F, 0x00, 0x00 }, /* 900 900 0.0 */ -}; - -static const struct cnl_ddi_buf_trans jsl_combo_phy_ddi_translations_edp_hbr[] = { - /* NT mV Trans mV db */ - { 0x8, 0x7F, 0x3F, 0x00, 0x00 }, /* 200 200 0.0 */ - { 0x8, 0x7F, 0x38, 0x00, 0x07 }, /* 200 250 1.9 */ - { 0x1, 0x7F, 0x33, 0x00, 0x0C }, /* 200 300 3.5 */ - { 0xA, 0x35, 0x36, 0x00, 0x09 }, /* 200 350 4.9 */ - { 0x8, 0x7F, 0x3F, 0x00, 0x00 }, /* 250 250 0.0 */ - { 0x1, 0x7F, 0x38, 0x00, 0x07 }, /* 250 300 1.6 */ - { 0xA, 0x35, 0x35, 0x00, 0x0A }, /* 250 350 2.9 */ - { 0x1, 0x7F, 0x3F, 0x00, 0x00 }, /* 300 300 0.0 */ - { 0xA, 0x35, 0x38, 0x00, 0x07 }, /* 300 350 1.3 */ - { 0xA, 0x35, 0x3F, 0x00, 0x00 }, /* 350 350 0.0 */ -}; - -static const struct cnl_ddi_buf_trans jsl_combo_phy_ddi_translations_edp_hbr2[] = { - /* NT mV Trans mV db */ - { 0x8, 0x7F, 0x3F, 0x00, 0x00 }, /* 200 200 0.0 */ - { 0x8, 0x7F, 0x3F, 0x00, 0x00 }, /* 200 250 1.9 */ - { 0x1, 0x7F, 0x3D, 0x00, 0x02 }, /* 200 300 3.5 */ - { 0xA, 0x35, 0x38, 0x00, 0x07 }, /* 200 350 4.9 */ - { 0x8, 0x7F, 0x3F, 0x00, 0x00 }, /* 250 250 0.0 */ - { 0x1, 0x7F, 0x3F, 0x00, 0x00 }, /* 250 300 1.6 */ - { 0xA, 0x35, 0x3A, 0x00, 0x05 }, /* 250 350 2.9 */ - { 0x1, 0x7F, 0x3F, 0x00, 0x00 }, /* 300 300 0.0 */ - { 0xA, 0x35, 0x38, 0x00, 0x07 }, /* 300 350 1.3 */ - { 0xA, 0x35, 0x3F, 0x00, 0x00 }, /* 350 350 0.0 */ -}; - -static const struct cnl_ddi_buf_trans dg1_combo_phy_ddi_translations_dp_rbr_hbr[] = { - /* NT mV Trans mV db */ - { 0xA, 0x32, 0x3F, 0x00, 0x00 }, /* 350 350 0.0 */ - { 0xA, 0x48, 0x35, 0x00, 0x0A }, /* 350 500 3.1 */ - { 0xC, 0x63, 0x2F, 0x00, 0x10 }, /* 350 700 6.0 */ - { 0x6, 0x7F, 0x2C, 0x00, 0x13 }, /* 350 900 8.2 */ - { 0xA, 0x43, 0x3F, 0x00, 0x00 }, /* 500 500 0.0 */ - { 0xC, 0x60, 0x36, 0x00, 0x09 }, /* 500 700 2.9 */ - { 0x6, 0x7F, 0x30, 0x00, 0x0F }, /* 500 900 5.1 */ - { 0xC, 0x60, 0x3F, 0x00, 0x00 }, /* 650 700 0.6 */ - { 0x6, 0x7F, 0x37, 0x00, 0x08 }, /* 600 900 3.5 */ - { 0x6, 0x7F, 0x3F, 0x00, 0x00 }, /* 900 900 0.0 */ -}; - -static const struct cnl_ddi_buf_trans dg1_combo_phy_ddi_translations_dp_hbr2_hbr3[] = { - /* NT mV Trans mV db */ - { 0xA, 0x32, 0x3F, 0x00, 0x00 }, /* 350 350 0.0 */ - { 0xA, 0x48, 0x35, 0x00, 0x0A }, /* 350 500 3.1 */ - { 0xC, 0x63, 0x2F, 0x00, 0x10 }, /* 350 700 6.0 */ - { 0x6, 0x7F, 0x2C, 0x00, 0x13 }, /* 350 900 8.2 */ - { 0xA, 0x43, 0x3F, 0x00, 0x00 }, /* 500 500 0.0 */ - { 0xC, 0x60, 0x36, 0x00, 0x09 }, /* 500 700 2.9 */ - { 0x6, 0x7F, 0x30, 0x00, 0x0F }, /* 500 900 5.1 */ - { 0xC, 0x58, 0x3F, 0x00, 0x00 }, /* 650 700 0.6 */ - { 0x6, 0x7F, 0x35, 0x00, 0x0A }, /* 600 900 3.5 */ - { 0x6, 0x7F, 0x3F, 0x00, 0x00 }, /* 900 900 0.0 */ -}; - -static const struct icl_mg_phy_ddi_buf_trans icl_mg_phy_ddi_translations_rbr_hbr[] = { - /* Voltage swing pre-emphasis */ - { 0x18, 0x00, 0x00 }, /* 0 0 */ - { 0x1D, 0x00, 0x05 }, /* 0 1 */ - { 0x24, 0x00, 0x0C }, /* 0 2 */ - { 0x2B, 0x00, 0x14 }, /* 0 3 */ - { 0x21, 0x00, 0x00 }, /* 1 0 */ - { 0x2B, 0x00, 0x08 }, /* 1 1 */ - { 0x30, 0x00, 0x0F }, /* 1 2 */ - { 0x31, 0x00, 0x03 }, /* 2 0 */ - { 0x34, 0x00, 0x0B }, /* 2 1 */ - { 0x3F, 0x00, 0x00 }, /* 3 0 */ -}; - -static const struct icl_mg_phy_ddi_buf_trans icl_mg_phy_ddi_translations_hbr2_hbr3[] = { - /* Voltage swing pre-emphasis */ - { 0x18, 0x00, 0x00 }, /* 0 0 */ - { 0x1D, 0x00, 0x05 }, /* 0 1 */ - { 0x24, 0x00, 0x0C }, /* 0 2 */ - { 0x2B, 0x00, 0x14 }, /* 0 3 */ - { 0x26, 0x00, 0x00 }, /* 1 0 */ - { 0x2C, 0x00, 0x07 }, /* 1 1 */ - { 0x33, 0x00, 0x0C }, /* 1 2 */ - { 0x2E, 0x00, 0x00 }, /* 2 0 */ - { 0x36, 0x00, 0x09 }, /* 2 1 */ - { 0x3F, 0x00, 0x00 }, /* 3 0 */ -}; - -static const struct icl_mg_phy_ddi_buf_trans icl_mg_phy_ddi_translations_hdmi[] = { - /* HDMI Preset VS Pre-emph */ - { 0x1A, 0x0, 0x0 }, /* 1 400mV 0dB */ - { 0x20, 0x0, 0x0 }, /* 2 500mV 0dB */ - { 0x29, 0x0, 0x0 }, /* 3 650mV 0dB */ - { 0x32, 0x0, 0x0 }, /* 4 800mV 0dB */ - { 0x3F, 0x0, 0x0 }, /* 5 1000mV 0dB */ - { 0x3A, 0x0, 0x5 }, /* 6 Full -1.5 dB */ - { 0x39, 0x0, 0x6 }, /* 7 Full -1.8 dB */ - { 0x38, 0x0, 0x7 }, /* 8 Full -2 dB */ - { 0x37, 0x0, 0x8 }, /* 9 Full -2.5 dB */ - { 0x36, 0x0, 0x9 }, /* 10 Full -3 dB */ -}; - -static const struct tgl_dkl_phy_ddi_buf_trans tgl_dkl_phy_dp_ddi_trans[] = { - /* VS pre-emp Non-trans mV Pre-emph dB */ - { 0x7, 0x0, 0x00 }, /* 0 0 400mV 0 dB */ - { 0x5, 0x0, 0x05 }, /* 0 1 400mV 3.5 dB */ - { 0x2, 0x0, 0x0B }, /* 0 2 400mV 6 dB */ - { 0x0, 0x0, 0x18 }, /* 0 3 400mV 9.5 dB */ - { 0x5, 0x0, 0x00 }, /* 1 0 600mV 0 dB */ - { 0x2, 0x0, 0x08 }, /* 1 1 600mV 3.5 dB */ - { 0x0, 0x0, 0x14 }, /* 1 2 600mV 6 dB */ - { 0x2, 0x0, 0x00 }, /* 2 0 800mV 0 dB */ - { 0x0, 0x0, 0x0B }, /* 2 1 800mV 3.5 dB */ - { 0x0, 0x0, 0x00 }, /* 3 0 1200mV 0 dB HDMI default */ -}; - -static const struct tgl_dkl_phy_ddi_buf_trans tgl_dkl_phy_dp_ddi_trans_hbr2[] = { - /* VS pre-emp Non-trans mV Pre-emph dB */ - { 0x7, 0x0, 0x00 }, /* 0 0 400mV 0 dB */ - { 0x5, 0x0, 0x05 }, /* 0 1 400mV 3.5 dB */ - { 0x2, 0x0, 0x0B }, /* 0 2 400mV 6 dB */ - { 0x0, 0x0, 0x19 }, /* 0 3 400mV 9.5 dB */ - { 0x5, 0x0, 0x00 }, /* 1 0 600mV 0 dB */ - { 0x2, 0x0, 0x08 }, /* 1 1 600mV 3.5 dB */ - { 0x0, 0x0, 0x14 }, /* 1 2 600mV 6 dB */ - { 0x2, 0x0, 0x00 }, /* 2 0 800mV 0 dB */ - { 0x0, 0x0, 0x0B }, /* 2 1 800mV 3.5 dB */ - { 0x0, 0x0, 0x00 }, /* 3 0 1200mV 0 dB HDMI default */ -}; - -static const struct tgl_dkl_phy_ddi_buf_trans tgl_dkl_phy_hdmi_ddi_trans[] = { - /* HDMI Preset VS Pre-emph */ - { 0x7, 0x0, 0x0 }, /* 1 400mV 0dB */ - { 0x6, 0x0, 0x0 }, /* 2 500mV 0dB */ - { 0x4, 0x0, 0x0 }, /* 3 650mV 0dB */ - { 0x2, 0x0, 0x0 }, /* 4 800mV 0dB */ - { 0x0, 0x0, 0x0 }, /* 5 1000mV 0dB */ - { 0x0, 0x0, 0x5 }, /* 6 Full -1.5 dB */ - { 0x0, 0x0, 0x6 }, /* 7 Full -1.8 dB */ - { 0x0, 0x0, 0x7 }, /* 8 Full -2 dB */ - { 0x0, 0x0, 0x8 }, /* 9 Full -2.5 dB */ - { 0x0, 0x0, 0xA }, /* 10 Full -3 dB */ -}; - -static const struct cnl_ddi_buf_trans tgl_combo_phy_ddi_translations_dp_hbr[] = { - /* NT mV Trans mV db */ - { 0xA, 0x32, 0x3F, 0x00, 0x00 }, /* 350 350 0.0 */ - { 0xA, 0x4F, 0x37, 0x00, 0x08 }, /* 350 500 3.1 */ - { 0xC, 0x71, 0x2F, 0x00, 0x10 }, /* 350 700 6.0 */ - { 0x6, 0x7D, 0x2B, 0x00, 0x14 }, /* 350 900 8.2 */ - { 0xA, 0x4C, 0x3F, 0x00, 0x00 }, /* 500 500 0.0 */ - { 0xC, 0x73, 0x34, 0x00, 0x0B }, /* 500 700 2.9 */ - { 0x6, 0x7F, 0x2F, 0x00, 0x10 }, /* 500 900 5.1 */ - { 0xC, 0x6C, 0x3C, 0x00, 0x03 }, /* 650 700 0.6 */ - { 0x6, 0x7F, 0x35, 0x00, 0x0A }, /* 600 900 3.5 */ - { 0x6, 0x7F, 0x3F, 0x00, 0x00 }, /* 900 900 0.0 */ -}; - -static const struct cnl_ddi_buf_trans tgl_combo_phy_ddi_translations_dp_hbr2[] = { - /* NT mV Trans mV db */ - { 0xA, 0x35, 0x3F, 0x00, 0x00 }, /* 350 350 0.0 */ - { 0xA, 0x4F, 0x37, 0x00, 0x08 }, /* 350 500 3.1 */ - { 0xC, 0x63, 0x2F, 0x00, 0x10 }, /* 350 700 6.0 */ - { 0x6, 0x7F, 0x2B, 0x00, 0x14 }, /* 350 900 8.2 */ - { 0xA, 0x47, 0x3F, 0x00, 0x00 }, /* 500 500 0.0 */ - { 0xC, 0x63, 0x34, 0x00, 0x0B }, /* 500 700 2.9 */ - { 0x6, 0x7F, 0x2F, 0x00, 0x10 }, /* 500 900 5.1 */ - { 0xC, 0x61, 0x3C, 0x00, 0x03 }, /* 650 700 0.6 */ - { 0x6, 0x7B, 0x35, 0x00, 0x0A }, /* 600 900 3.5 */ - { 0x6, 0x7F, 0x3F, 0x00, 0x00 }, /* 900 900 0.0 */ -}; - -static const struct cnl_ddi_buf_trans tgl_uy_combo_phy_ddi_translations_dp_hbr2[] = { - /* NT mV Trans mV db */ - { 0xA, 0x35, 0x3F, 0x00, 0x00 }, /* 350 350 0.0 */ - { 0xA, 0x4F, 0x36, 0x00, 0x09 }, /* 350 500 3.1 */ - { 0xC, 0x60, 0x32, 0x00, 0x0D }, /* 350 700 6.0 */ - { 0xC, 0x7F, 0x2D, 0x00, 0x12 }, /* 350 900 8.2 */ - { 0xC, 0x47, 0x3F, 0x00, 0x00 }, /* 500 500 0.0 */ - { 0xC, 0x6F, 0x36, 0x00, 0x09 }, /* 500 700 2.9 */ - { 0x6, 0x7D, 0x32, 0x00, 0x0D }, /* 500 900 5.1 */ - { 0x6, 0x60, 0x3C, 0x00, 0x03 }, /* 650 700 0.6 */ - { 0x6, 0x7F, 0x34, 0x00, 0x0B }, /* 600 900 3.5 */ - { 0x6, 0x7F, 0x3F, 0x00, 0x00 }, /* 900 900 0.0 */ +static const union intel_ddi_buf_trans_entry _icl_combo_phy_ddi_translations_dp_hbr2_edp_hbr3[] = { + /* NT mV Trans mV db */ + { .icl = { 0xA, 0x35, 0x3F, 0x00, 0x00 } }, /* 350 350 0.0 */ + { .icl = { 0xA, 0x4F, 0x37, 0x00, 0x08 } }, /* 350 500 3.1 */ + { .icl = { 0xC, 0x71, 0x2F, 0x00, 0x10 } }, /* 350 700 6.0 */ + { .icl = { 0x6, 0x7F, 0x2B, 0x00, 0x14 } }, /* 350 900 8.2 */ + { .icl = { 0xA, 0x4C, 0x3F, 0x00, 0x00 } }, /* 500 500 0.0 */ + { .icl = { 0xC, 0x73, 0x34, 0x00, 0x0B } }, /* 500 700 2.9 */ + { .icl = { 0x6, 0x7F, 0x2F, 0x00, 0x10 } }, /* 500 900 5.1 */ + { .icl = { 0xC, 0x6C, 0x3C, 0x00, 0x03 } }, /* 650 700 0.6 */ + { .icl = { 0x6, 0x7F, 0x35, 0x00, 0x0A } }, /* 600 900 3.5 */ + { .icl = { 0x6, 0x7F, 0x3F, 0x00, 0x00 } }, /* 900 900 0.0 */ +}; + +static const struct intel_ddi_buf_trans icl_combo_phy_ddi_translations_dp_hbr2_edp_hbr3 = { + .entries = _icl_combo_phy_ddi_translations_dp_hbr2_edp_hbr3, + .num_entries = ARRAY_SIZE(_icl_combo_phy_ddi_translations_dp_hbr2_edp_hbr3), +}; + +static const union intel_ddi_buf_trans_entry _icl_combo_phy_ddi_translations_edp_hbr2[] = { + /* NT mV Trans mV db */ + { .icl = { 0x0, 0x7F, 0x3F, 0x00, 0x00 } }, /* 200 200 0.0 */ + { .icl = { 0x8, 0x7F, 0x38, 0x00, 0x07 } }, /* 200 250 1.9 */ + { .icl = { 0x1, 0x7F, 0x33, 0x00, 0x0C } }, /* 200 300 3.5 */ + { .icl = { 0x9, 0x7F, 0x31, 0x00, 0x0E } }, /* 200 350 4.9 */ + { .icl = { 0x8, 0x7F, 0x3F, 0x00, 0x00 } }, /* 250 250 0.0 */ + { .icl = { 0x1, 0x7F, 0x38, 0x00, 0x07 } }, /* 250 300 1.6 */ + { .icl = { 0x9, 0x7F, 0x35, 0x00, 0x0A } }, /* 250 350 2.9 */ + { .icl = { 0x1, 0x7F, 0x3F, 0x00, 0x00 } }, /* 300 300 0.0 */ + { .icl = { 0x9, 0x7F, 0x38, 0x00, 0x07 } }, /* 300 350 1.3 */ + { .icl = { 0x9, 0x7F, 0x3F, 0x00, 0x00 } }, /* 350 350 0.0 */ +}; + +static const struct intel_ddi_buf_trans icl_combo_phy_ddi_translations_edp_hbr2 = { + .entries = _icl_combo_phy_ddi_translations_edp_hbr2, + .num_entries = ARRAY_SIZE(_icl_combo_phy_ddi_translations_edp_hbr2), +}; + +static const union intel_ddi_buf_trans_entry _icl_combo_phy_ddi_translations_hdmi[] = { + /* NT mV Trans mV db */ + { .icl = { 0xA, 0x60, 0x3F, 0x00, 0x00 } }, /* 450 450 0.0 */ + { .icl = { 0xB, 0x73, 0x36, 0x00, 0x09 } }, /* 450 650 3.2 */ + { .icl = { 0x6, 0x7F, 0x31, 0x00, 0x0E } }, /* 450 850 5.5 */ + { .icl = { 0xB, 0x73, 0x3F, 0x00, 0x00 } }, /* 650 650 0.0 ALS */ + { .icl = { 0x6, 0x7F, 0x37, 0x00, 0x08 } }, /* 650 850 2.3 */ + { .icl = { 0x6, 0x7F, 0x3F, 0x00, 0x00 } }, /* 850 850 0.0 */ + { .icl = { 0x6, 0x7F, 0x35, 0x00, 0x0A } }, /* 600 850 3.0 */ +}; + +static const struct intel_ddi_buf_trans icl_combo_phy_ddi_translations_hdmi = { + .entries = _icl_combo_phy_ddi_translations_hdmi, + .num_entries = ARRAY_SIZE(_icl_combo_phy_ddi_translations_hdmi), + .hdmi_default_entry = ARRAY_SIZE(_icl_combo_phy_ddi_translations_hdmi) - 1, +}; + +static const union intel_ddi_buf_trans_entry _ehl_combo_phy_ddi_translations_dp[] = { + /* NT mV Trans mV db */ + { .icl = { 0xA, 0x33, 0x3F, 0x00, 0x00 } }, /* 350 350 0.0 */ + { .icl = { 0xA, 0x47, 0x36, 0x00, 0x09 } }, /* 350 500 3.1 */ + { .icl = { 0xC, 0x64, 0x34, 0x00, 0x0B } }, /* 350 700 6.0 */ + { .icl = { 0x6, 0x7F, 0x30, 0x00, 0x0F } }, /* 350 900 8.2 */ + { .icl = { 0xA, 0x46, 0x3F, 0x00, 0x00 } }, /* 500 500 0.0 */ + { .icl = { 0xC, 0x64, 0x38, 0x00, 0x07 } }, /* 500 700 2.9 */ + { .icl = { 0x6, 0x7F, 0x32, 0x00, 0x0D } }, /* 500 900 5.1 */ + { .icl = { 0xC, 0x61, 0x3F, 0x00, 0x00 } }, /* 650 700 0.6 */ + { .icl = { 0x6, 0x7F, 0x38, 0x00, 0x07 } }, /* 600 900 3.5 */ + { .icl = { 0x6, 0x7F, 0x3F, 0x00, 0x00 } }, /* 900 900 0.0 */ +}; + +static const struct intel_ddi_buf_trans ehl_combo_phy_ddi_translations_dp = { + .entries = _ehl_combo_phy_ddi_translations_dp, + .num_entries = ARRAY_SIZE(_ehl_combo_phy_ddi_translations_dp), +}; + +static const union intel_ddi_buf_trans_entry _ehl_combo_phy_ddi_translations_edp_hbr2[] = { + /* NT mV Trans mV db */ + { .icl = { 0x8, 0x7F, 0x3F, 0x00, 0x00 } }, /* 200 200 0.0 */ + { .icl = { 0x8, 0x7F, 0x3F, 0x00, 0x00 } }, /* 200 250 1.9 */ + { .icl = { 0x1, 0x7F, 0x3D, 0x00, 0x02 } }, /* 200 300 3.5 */ + { .icl = { 0xA, 0x35, 0x39, 0x00, 0x06 } }, /* 200 350 4.9 */ + { .icl = { 0x8, 0x7F, 0x3F, 0x00, 0x00 } }, /* 250 250 0.0 */ + { .icl = { 0x1, 0x7F, 0x3C, 0x00, 0x03 } }, /* 250 300 1.6 */ + { .icl = { 0xA, 0x35, 0x39, 0x00, 0x06 } }, /* 250 350 2.9 */ + { .icl = { 0x1, 0x7F, 0x3F, 0x00, 0x00 } }, /* 300 300 0.0 */ + { .icl = { 0xA, 0x35, 0x38, 0x00, 0x07 } }, /* 300 350 1.3 */ + { .icl = { 0xA, 0x35, 0x3F, 0x00, 0x00 } }, /* 350 350 0.0 */ +}; + +static const struct intel_ddi_buf_trans ehl_combo_phy_ddi_translations_edp_hbr2 = { + .entries = _ehl_combo_phy_ddi_translations_edp_hbr2, + .num_entries = ARRAY_SIZE(_ehl_combo_phy_ddi_translations_edp_hbr2), +}; + +static const union intel_ddi_buf_trans_entry _jsl_combo_phy_ddi_translations_edp_hbr[] = { + /* NT mV Trans mV db */ + { .icl = { 0x8, 0x7F, 0x3F, 0x00, 0x00 } }, /* 200 200 0.0 */ + { .icl = { 0x8, 0x7F, 0x38, 0x00, 0x07 } }, /* 200 250 1.9 */ + { .icl = { 0x1, 0x7F, 0x33, 0x00, 0x0C } }, /* 200 300 3.5 */ + { .icl = { 0xA, 0x35, 0x36, 0x00, 0x09 } }, /* 200 350 4.9 */ + { .icl = { 0x8, 0x7F, 0x3F, 0x00, 0x00 } }, /* 250 250 0.0 */ + { .icl = { 0x1, 0x7F, 0x38, 0x00, 0x07 } }, /* 250 300 1.6 */ + { .icl = { 0xA, 0x35, 0x35, 0x00, 0x0A } }, /* 250 350 2.9 */ + { .icl = { 0x1, 0x7F, 0x3F, 0x00, 0x00 } }, /* 300 300 0.0 */ + { .icl = { 0xA, 0x35, 0x38, 0x00, 0x07 } }, /* 300 350 1.3 */ + { .icl = { 0xA, 0x35, 0x3F, 0x00, 0x00 } }, /* 350 350 0.0 */ +}; + +static const struct intel_ddi_buf_trans jsl_combo_phy_ddi_translations_edp_hbr = { + .entries = _jsl_combo_phy_ddi_translations_edp_hbr, + .num_entries = ARRAY_SIZE(_jsl_combo_phy_ddi_translations_edp_hbr), +}; + +static const union intel_ddi_buf_trans_entry _jsl_combo_phy_ddi_translations_edp_hbr2[] = { + /* NT mV Trans mV db */ + { .icl = { 0x8, 0x7F, 0x3F, 0x00, 0x00 } }, /* 200 200 0.0 */ + { .icl = { 0x8, 0x7F, 0x3F, 0x00, 0x00 } }, /* 200 250 1.9 */ + { .icl = { 0x1, 0x7F, 0x3D, 0x00, 0x02 } }, /* 200 300 3.5 */ + { .icl = { 0xA, 0x35, 0x38, 0x00, 0x07 } }, /* 200 350 4.9 */ + { .icl = { 0x8, 0x7F, 0x3F, 0x00, 0x00 } }, /* 250 250 0.0 */ + { .icl = { 0x1, 0x7F, 0x3F, 0x00, 0x00 } }, /* 250 300 1.6 */ + { .icl = { 0xA, 0x35, 0x3A, 0x00, 0x05 } }, /* 250 350 2.9 */ + { .icl = { 0x1, 0x7F, 0x3F, 0x00, 0x00 } }, /* 300 300 0.0 */ + { .icl = { 0xA, 0x35, 0x38, 0x00, 0x07 } }, /* 300 350 1.3 */ + { .icl = { 0xA, 0x35, 0x3F, 0x00, 0x00 } }, /* 350 350 0.0 */ +}; + +static const struct intel_ddi_buf_trans jsl_combo_phy_ddi_translations_edp_hbr2 = { + .entries = _jsl_combo_phy_ddi_translations_edp_hbr2, + .num_entries = ARRAY_SIZE(_jsl_combo_phy_ddi_translations_edp_hbr2), +}; + +static const union intel_ddi_buf_trans_entry _dg1_combo_phy_ddi_translations_dp_rbr_hbr[] = { + /* NT mV Trans mV db */ + { .icl = { 0xA, 0x32, 0x3F, 0x00, 0x00 } }, /* 350 350 0.0 */ + { .icl = { 0xA, 0x48, 0x35, 0x00, 0x0A } }, /* 350 500 3.1 */ + { .icl = { 0xC, 0x63, 0x2F, 0x00, 0x10 } }, /* 350 700 6.0 */ + { .icl = { 0x6, 0x7F, 0x2C, 0x00, 0x13 } }, /* 350 900 8.2 */ + { .icl = { 0xA, 0x43, 0x3F, 0x00, 0x00 } }, /* 500 500 0.0 */ + { .icl = { 0xC, 0x60, 0x36, 0x00, 0x09 } }, /* 500 700 2.9 */ + { .icl = { 0x6, 0x7F, 0x30, 0x00, 0x0F } }, /* 500 900 5.1 */ + { .icl = { 0xC, 0x60, 0x3F, 0x00, 0x00 } }, /* 650 700 0.6 */ + { .icl = { 0x6, 0x7F, 0x37, 0x00, 0x08 } }, /* 600 900 3.5 */ + { .icl = { 0x6, 0x7F, 0x3F, 0x00, 0x00 } }, /* 900 900 0.0 */ +}; + +static const struct intel_ddi_buf_trans dg1_combo_phy_ddi_translations_dp_rbr_hbr = { + .entries = _dg1_combo_phy_ddi_translations_dp_rbr_hbr, + .num_entries = ARRAY_SIZE(_dg1_combo_phy_ddi_translations_dp_rbr_hbr), +}; + +static const union intel_ddi_buf_trans_entry _dg1_combo_phy_ddi_translations_dp_hbr2_hbr3[] = { + /* NT mV Trans mV db */ + { .icl = { 0xA, 0x32, 0x3F, 0x00, 0x00 } }, /* 350 350 0.0 */ + { .icl = { 0xA, 0x48, 0x35, 0x00, 0x0A } }, /* 350 500 3.1 */ + { .icl = { 0xC, 0x63, 0x2F, 0x00, 0x10 } }, /* 350 700 6.0 */ + { .icl = { 0x6, 0x7F, 0x2C, 0x00, 0x13 } }, /* 350 900 8.2 */ + { .icl = { 0xA, 0x43, 0x3F, 0x00, 0x00 } }, /* 500 500 0.0 */ + { .icl = { 0xC, 0x60, 0x36, 0x00, 0x09 } }, /* 500 700 2.9 */ + { .icl = { 0x6, 0x7F, 0x30, 0x00, 0x0F } }, /* 500 900 5.1 */ + { .icl = { 0xC, 0x58, 0x3F, 0x00, 0x00 } }, /* 650 700 0.6 */ + { .icl = { 0x6, 0x7F, 0x35, 0x00, 0x0A } }, /* 600 900 3.5 */ + { .icl = { 0x6, 0x7F, 0x3F, 0x00, 0x00 } }, /* 900 900 0.0 */ +}; + +static const struct intel_ddi_buf_trans dg1_combo_phy_ddi_translations_dp_hbr2_hbr3 = { + .entries = _dg1_combo_phy_ddi_translations_dp_hbr2_hbr3, + .num_entries = ARRAY_SIZE(_dg1_combo_phy_ddi_translations_dp_hbr2_hbr3), +}; + +static const union intel_ddi_buf_trans_entry _icl_mg_phy_ddi_translations_rbr_hbr[] = { + /* Voltage swing pre-emphasis */ + { .mg = { 0x18, 0x00, 0x00 } }, /* 0 0 */ + { .mg = { 0x1D, 0x00, 0x05 } }, /* 0 1 */ + { .mg = { 0x24, 0x00, 0x0C } }, /* 0 2 */ + { .mg = { 0x2B, 0x00, 0x14 } }, /* 0 3 */ + { .mg = { 0x21, 0x00, 0x00 } }, /* 1 0 */ + { .mg = { 0x2B, 0x00, 0x08 } }, /* 1 1 */ + { .mg = { 0x30, 0x00, 0x0F } }, /* 1 2 */ + { .mg = { 0x31, 0x00, 0x03 } }, /* 2 0 */ + { .mg = { 0x34, 0x00, 0x0B } }, /* 2 1 */ + { .mg = { 0x3F, 0x00, 0x00 } }, /* 3 0 */ +}; + +static const struct intel_ddi_buf_trans icl_mg_phy_ddi_translations_rbr_hbr = { + .entries = _icl_mg_phy_ddi_translations_rbr_hbr, + .num_entries = ARRAY_SIZE(_icl_mg_phy_ddi_translations_rbr_hbr), +}; + +static const union intel_ddi_buf_trans_entry _icl_mg_phy_ddi_translations_hbr2_hbr3[] = { + /* Voltage swing pre-emphasis */ + { .mg = { 0x18, 0x00, 0x00 } }, /* 0 0 */ + { .mg = { 0x1D, 0x00, 0x05 } }, /* 0 1 */ + { .mg = { 0x24, 0x00, 0x0C } }, /* 0 2 */ + { .mg = { 0x2B, 0x00, 0x14 } }, /* 0 3 */ + { .mg = { 0x26, 0x00, 0x00 } }, /* 1 0 */ + { .mg = { 0x2C, 0x00, 0x07 } }, /* 1 1 */ + { .mg = { 0x33, 0x00, 0x0C } }, /* 1 2 */ + { .mg = { 0x2E, 0x00, 0x00 } }, /* 2 0 */ + { .mg = { 0x36, 0x00, 0x09 } }, /* 2 1 */ + { .mg = { 0x3F, 0x00, 0x00 } }, /* 3 0 */ +}; + +static const struct intel_ddi_buf_trans icl_mg_phy_ddi_translations_hbr2_hbr3 = { + .entries = _icl_mg_phy_ddi_translations_hbr2_hbr3, + .num_entries = ARRAY_SIZE(_icl_mg_phy_ddi_translations_hbr2_hbr3), +}; + +static const union intel_ddi_buf_trans_entry _icl_mg_phy_ddi_translations_hdmi[] = { + /* HDMI Preset VS Pre-emph */ + { .mg = { 0x1A, 0x0, 0x0 } }, /* 1 400mV 0dB */ + { .mg = { 0x20, 0x0, 0x0 } }, /* 2 500mV 0dB */ + { .mg = { 0x29, 0x0, 0x0 } }, /* 3 650mV 0dB */ + { .mg = { 0x32, 0x0, 0x0 } }, /* 4 800mV 0dB */ + { .mg = { 0x3F, 0x0, 0x0 } }, /* 5 1000mV 0dB */ + { .mg = { 0x3A, 0x0, 0x5 } }, /* 6 Full -1.5 dB */ + { .mg = { 0x39, 0x0, 0x6 } }, /* 7 Full -1.8 dB */ + { .mg = { 0x38, 0x0, 0x7 } }, /* 8 Full -2 dB */ + { .mg = { 0x37, 0x0, 0x8 } }, /* 9 Full -2.5 dB */ + { .mg = { 0x36, 0x0, 0x9 } }, /* 10 Full -3 dB */ +}; + +static const struct intel_ddi_buf_trans icl_mg_phy_ddi_translations_hdmi = { + .entries = _icl_mg_phy_ddi_translations_hdmi, + .num_entries = ARRAY_SIZE(_icl_mg_phy_ddi_translations_hdmi), + .hdmi_default_entry = ARRAY_SIZE(_icl_mg_phy_ddi_translations_hdmi) - 1, +}; + +static const union intel_ddi_buf_trans_entry _tgl_dkl_phy_ddi_translations_dp_hbr[] = { + /* VS pre-emp Non-trans mV Pre-emph dB */ + { .dkl = { 0x7, 0x0, 0x00 } }, /* 0 0 400mV 0 dB */ + { .dkl = { 0x5, 0x0, 0x05 } }, /* 0 1 400mV 3.5 dB */ + { .dkl = { 0x2, 0x0, 0x0B } }, /* 0 2 400mV 6 dB */ + { .dkl = { 0x0, 0x0, 0x18 } }, /* 0 3 400mV 9.5 dB */ + { .dkl = { 0x5, 0x0, 0x00 } }, /* 1 0 600mV 0 dB */ + { .dkl = { 0x2, 0x0, 0x08 } }, /* 1 1 600mV 3.5 dB */ + { .dkl = { 0x0, 0x0, 0x14 } }, /* 1 2 600mV 6 dB */ + { .dkl = { 0x2, 0x0, 0x00 } }, /* 2 0 800mV 0 dB */ + { .dkl = { 0x0, 0x0, 0x0B } }, /* 2 1 800mV 3.5 dB */ + { .dkl = { 0x0, 0x0, 0x00 } }, /* 3 0 1200mV 0 dB HDMI default */ +}; + +static const struct intel_ddi_buf_trans tgl_dkl_phy_ddi_translations_dp_hbr = { + .entries = _tgl_dkl_phy_ddi_translations_dp_hbr, + .num_entries = ARRAY_SIZE(_tgl_dkl_phy_ddi_translations_dp_hbr), +}; + +static const union intel_ddi_buf_trans_entry _tgl_dkl_phy_ddi_translations_dp_hbr2[] = { + /* VS pre-emp Non-trans mV Pre-emph dB */ + { .dkl = { 0x7, 0x0, 0x00 } }, /* 0 0 400mV 0 dB */ + { .dkl = { 0x5, 0x0, 0x05 } }, /* 0 1 400mV 3.5 dB */ + { .dkl = { 0x2, 0x0, 0x0B } }, /* 0 2 400mV 6 dB */ + { .dkl = { 0x0, 0x0, 0x19 } }, /* 0 3 400mV 9.5 dB */ + { .dkl = { 0x5, 0x0, 0x00 } }, /* 1 0 600mV 0 dB */ + { .dkl = { 0x2, 0x0, 0x08 } }, /* 1 1 600mV 3.5 dB */ + { .dkl = { 0x0, 0x0, 0x14 } }, /* 1 2 600mV 6 dB */ + { .dkl = { 0x2, 0x0, 0x00 } }, /* 2 0 800mV 0 dB */ + { .dkl = { 0x0, 0x0, 0x0B } }, /* 2 1 800mV 3.5 dB */ + { .dkl = { 0x0, 0x0, 0x00 } }, /* 3 0 1200mV 0 dB HDMI default */ +}; + +static const struct intel_ddi_buf_trans tgl_dkl_phy_ddi_translations_dp_hbr2 = { + .entries = _tgl_dkl_phy_ddi_translations_dp_hbr2, + .num_entries = ARRAY_SIZE(_tgl_dkl_phy_ddi_translations_dp_hbr2), +}; + +static const union intel_ddi_buf_trans_entry _tgl_dkl_phy_ddi_translations_hdmi[] = { + /* HDMI Preset VS Pre-emph */ + { .dkl = { 0x7, 0x0, 0x0 } }, /* 1 400mV 0dB */ + { .dkl = { 0x6, 0x0, 0x0 } }, /* 2 500mV 0dB */ + { .dkl = { 0x4, 0x0, 0x0 } }, /* 3 650mV 0dB */ + { .dkl = { 0x2, 0x0, 0x0 } }, /* 4 800mV 0dB */ + { .dkl = { 0x0, 0x0, 0x0 } }, /* 5 1000mV 0dB */ + { .dkl = { 0x0, 0x0, 0x5 } }, /* 6 Full -1.5 dB */ + { .dkl = { 0x0, 0x0, 0x6 } }, /* 7 Full -1.8 dB */ + { .dkl = { 0x0, 0x0, 0x7 } }, /* 8 Full -2 dB */ + { .dkl = { 0x0, 0x0, 0x8 } }, /* 9 Full -2.5 dB */ + { .dkl = { 0x0, 0x0, 0xA } }, /* 10 Full -3 dB */ +}; + +static const struct intel_ddi_buf_trans tgl_dkl_phy_ddi_translations_hdmi = { + .entries = _tgl_dkl_phy_ddi_translations_hdmi, + .num_entries = ARRAY_SIZE(_tgl_dkl_phy_ddi_translations_hdmi), + .hdmi_default_entry = ARRAY_SIZE(_tgl_dkl_phy_ddi_translations_hdmi) - 1, +}; + +static const union intel_ddi_buf_trans_entry _tgl_combo_phy_ddi_translations_dp_hbr[] = { + /* NT mV Trans mV db */ + { .icl = { 0xA, 0x32, 0x3F, 0x00, 0x00 } }, /* 350 350 0.0 */ + { .icl = { 0xA, 0x4F, 0x37, 0x00, 0x08 } }, /* 350 500 3.1 */ + { .icl = { 0xC, 0x71, 0x2F, 0x00, 0x10 } }, /* 350 700 6.0 */ + { .icl = { 0x6, 0x7D, 0x2B, 0x00, 0x14 } }, /* 350 900 8.2 */ + { .icl = { 0xA, 0x4C, 0x3F, 0x00, 0x00 } }, /* 500 500 0.0 */ + { .icl = { 0xC, 0x73, 0x34, 0x00, 0x0B } }, /* 500 700 2.9 */ + { .icl = { 0x6, 0x7F, 0x2F, 0x00, 0x10 } }, /* 500 900 5.1 */ + { .icl = { 0xC, 0x6C, 0x3C, 0x00, 0x03 } }, /* 650 700 0.6 */ + { .icl = { 0x6, 0x7F, 0x35, 0x00, 0x0A } }, /* 600 900 3.5 */ + { .icl = { 0x6, 0x7F, 0x3F, 0x00, 0x00 } }, /* 900 900 0.0 */ +}; + +static const struct intel_ddi_buf_trans tgl_combo_phy_ddi_translations_dp_hbr = { + .entries = _tgl_combo_phy_ddi_translations_dp_hbr, + .num_entries = ARRAY_SIZE(_tgl_combo_phy_ddi_translations_dp_hbr), +}; + +static const union intel_ddi_buf_trans_entry _tgl_combo_phy_ddi_translations_dp_hbr2[] = { + /* NT mV Trans mV db */ + { .icl = { 0xA, 0x35, 0x3F, 0x00, 0x00 } }, /* 350 350 0.0 */ + { .icl = { 0xA, 0x4F, 0x37, 0x00, 0x08 } }, /* 350 500 3.1 */ + { .icl = { 0xC, 0x63, 0x2F, 0x00, 0x10 } }, /* 350 700 6.0 */ + { .icl = { 0x6, 0x7F, 0x2B, 0x00, 0x14 } }, /* 350 900 8.2 */ + { .icl = { 0xA, 0x47, 0x3F, 0x00, 0x00 } }, /* 500 500 0.0 */ + { .icl = { 0xC, 0x63, 0x34, 0x00, 0x0B } }, /* 500 700 2.9 */ + { .icl = { 0x6, 0x7F, 0x2F, 0x00, 0x10 } }, /* 500 900 5.1 */ + { .icl = { 0xC, 0x61, 0x3C, 0x00, 0x03 } }, /* 650 700 0.6 */ + { .icl = { 0x6, 0x7B, 0x35, 0x00, 0x0A } }, /* 600 900 3.5 */ + { .icl = { 0x6, 0x7F, 0x3F, 0x00, 0x00 } }, /* 900 900 0.0 */ +}; + +static const struct intel_ddi_buf_trans tgl_combo_phy_ddi_translations_dp_hbr2 = { + .entries = _tgl_combo_phy_ddi_translations_dp_hbr2, + .num_entries = ARRAY_SIZE(_tgl_combo_phy_ddi_translations_dp_hbr2), +}; + +static const union intel_ddi_buf_trans_entry _tgl_uy_combo_phy_ddi_translations_dp_hbr2[] = { + /* NT mV Trans mV db */ + { .icl = { 0xA, 0x35, 0x3F, 0x00, 0x00 } }, /* 350 350 0.0 */ + { .icl = { 0xA, 0x4F, 0x36, 0x00, 0x09 } }, /* 350 500 3.1 */ + { .icl = { 0xC, 0x60, 0x32, 0x00, 0x0D } }, /* 350 700 6.0 */ + { .icl = { 0xC, 0x7F, 0x2D, 0x00, 0x12 } }, /* 350 900 8.2 */ + { .icl = { 0xC, 0x47, 0x3F, 0x00, 0x00 } }, /* 500 500 0.0 */ + { .icl = { 0xC, 0x6F, 0x36, 0x00, 0x09 } }, /* 500 700 2.9 */ + { .icl = { 0x6, 0x7D, 0x32, 0x00, 0x0D } }, /* 500 900 5.1 */ + { .icl = { 0x6, 0x60, 0x3C, 0x00, 0x03 } }, /* 650 700 0.6 */ + { .icl = { 0x6, 0x7F, 0x34, 0x00, 0x0B } }, /* 600 900 3.5 */ + { .icl = { 0x6, 0x7F, 0x3F, 0x00, 0x00 } }, /* 900 900 0.0 */ +}; + +static const struct intel_ddi_buf_trans tgl_uy_combo_phy_ddi_translations_dp_hbr2 = { + .entries = _tgl_uy_combo_phy_ddi_translations_dp_hbr2, + .num_entries = ARRAY_SIZE(_tgl_uy_combo_phy_ddi_translations_dp_hbr2), }; /* * Cloned the HOBL entry to comply with the voltage and pre-emphasis entries * that DisplayPort specification requires */ -static const struct cnl_ddi_buf_trans tgl_combo_phy_ddi_translations_edp_hbr2_hobl[] = { - /* VS pre-emp */ - { 0x6, 0x7F, 0x3F, 0x00, 0x00 }, /* 0 0 */ - { 0x6, 0x7F, 0x3F, 0x00, 0x00 }, /* 0 1 */ - { 0x6, 0x7F, 0x3F, 0x00, 0x00 }, /* 0 2 */ - { 0x6, 0x7F, 0x3F, 0x00, 0x00 }, /* 0 3 */ - { 0x6, 0x7F, 0x3F, 0x00, 0x00 }, /* 1 0 */ - { 0x6, 0x7F, 0x3F, 0x00, 0x00 }, /* 1 1 */ - { 0x6, 0x7F, 0x3F, 0x00, 0x00 }, /* 1 2 */ - { 0x6, 0x7F, 0x3F, 0x00, 0x00 }, /* 2 0 */ - { 0x6, 0x7F, 0x3F, 0x00, 0x00 }, /* 2 1 */ -}; - -static const struct cnl_ddi_buf_trans rkl_combo_phy_ddi_translations_dp_hbr[] = { - /* NT mV Trans mV db */ - { 0xA, 0x2F, 0x3F, 0x00, 0x00 }, /* 350 350 0.0 */ - { 0xA, 0x4F, 0x37, 0x00, 0x08 }, /* 350 500 3.1 */ - { 0xC, 0x63, 0x2F, 0x00, 0x10 }, /* 350 700 6.0 */ - { 0x6, 0x7D, 0x2A, 0x00, 0x15 }, /* 350 900 8.2 */ - { 0xA, 0x4C, 0x3F, 0x00, 0x00 }, /* 500 500 0.0 */ - { 0xC, 0x73, 0x34, 0x00, 0x0B }, /* 500 700 2.9 */ - { 0x6, 0x7F, 0x2F, 0x00, 0x10 }, /* 500 900 5.1 */ - { 0xC, 0x6E, 0x3E, 0x00, 0x01 }, /* 650 700 0.6 */ - { 0x6, 0x7F, 0x35, 0x00, 0x0A }, /* 600 900 3.5 */ - { 0x6, 0x7F, 0x3F, 0x00, 0x00 }, /* 900 900 0.0 */ -}; - -static const struct cnl_ddi_buf_trans rkl_combo_phy_ddi_translations_dp_hbr2_hbr3[] = { - /* NT mV Trans mV db */ - { 0xA, 0x35, 0x3F, 0x00, 0x00 }, /* 350 350 0.0 */ - { 0xA, 0x50, 0x38, 0x00, 0x07 }, /* 350 500 3.1 */ - { 0xC, 0x61, 0x33, 0x00, 0x0C }, /* 350 700 6.0 */ - { 0x6, 0x7F, 0x2E, 0x00, 0x11 }, /* 350 900 8.2 */ - { 0xA, 0x47, 0x3F, 0x00, 0x00 }, /* 500 500 0.0 */ - { 0xC, 0x5F, 0x38, 0x00, 0x07 }, /* 500 700 2.9 */ - { 0x6, 0x7F, 0x2F, 0x00, 0x10 }, /* 500 900 5.1 */ - { 0xC, 0x5F, 0x3F, 0x00, 0x00 }, /* 650 700 0.6 */ - { 0x6, 0x7E, 0x36, 0x00, 0x09 }, /* 600 900 3.5 */ - { 0x6, 0x7F, 0x3F, 0x00, 0x00 }, /* 900 900 0.0 */ -}; - -static const struct tgl_dkl_phy_ddi_buf_trans adlp_dkl_phy_dp_ddi_trans_hbr[] = { - /* VS pre-emp Non-trans mV Pre-emph dB */ - { 0x7, 0x0, 0x01 }, /* 0 0 400mV 0 dB */ - { 0x5, 0x0, 0x06 }, /* 0 1 400mV 3.5 dB */ - { 0x2, 0x0, 0x0B }, /* 0 2 400mV 6 dB */ - { 0x0, 0x0, 0x17 }, /* 0 3 400mV 9.5 dB */ - { 0x5, 0x0, 0x00 }, /* 1 0 600mV 0 dB */ - { 0x2, 0x0, 0x08 }, /* 1 1 600mV 3.5 dB */ - { 0x0, 0x0, 0x14 }, /* 1 2 600mV 6 dB */ - { 0x2, 0x0, 0x00 }, /* 2 0 800mV 0 dB */ - { 0x0, 0x0, 0x0B }, /* 2 1 800mV 3.5 dB */ - { 0x0, 0x0, 0x00 }, /* 3 0 1200mV 0 dB */ -}; - -static const struct tgl_dkl_phy_ddi_buf_trans adlp_dkl_phy_dp_ddi_trans_hbr2_hbr3[] = { - /* VS pre-emp Non-trans mV Pre-emph dB */ - { 0x7, 0x0, 0x00 }, /* 0 0 400mV 0 dB */ - { 0x5, 0x0, 0x04 }, /* 0 1 400mV 3.5 dB */ - { 0x2, 0x0, 0x0A }, /* 0 2 400mV 6 dB */ - { 0x0, 0x0, 0x18 }, /* 0 3 400mV 9.5 dB */ - { 0x5, 0x0, 0x00 }, /* 1 0 600mV 0 dB */ - { 0x2, 0x0, 0x06 }, /* 1 1 600mV 3.5 dB */ - { 0x0, 0x0, 0x14 }, /* 1 2 600mV 6 dB */ - { 0x2, 0x0, 0x00 }, /* 2 0 800mV 0 dB */ - { 0x0, 0x0, 0x09 }, /* 2 1 800mV 3.5 dB */ - { 0x0, 0x0, 0x00 }, /* 3 0 1200mV 0 dB */ -}; - -bool is_hobl_buf_trans(const struct cnl_ddi_buf_trans *table) -{ - return table == tgl_combo_phy_ddi_translations_edp_hbr2_hobl; -} +static const union intel_ddi_buf_trans_entry _tgl_combo_phy_ddi_translations_edp_hbr2_hobl[] = { + /* VS pre-emp */ + { .icl = { 0x6, 0x7F, 0x3F, 0x00, 0x00 } }, /* 0 0 */ + { .icl = { 0x6, 0x7F, 0x3F, 0x00, 0x00 } }, /* 0 1 */ + { .icl = { 0x6, 0x7F, 0x3F, 0x00, 0x00 } }, /* 0 2 */ + { .icl = { 0x6, 0x7F, 0x3F, 0x00, 0x00 } }, /* 0 3 */ + { .icl = { 0x6, 0x7F, 0x3F, 0x00, 0x00 } }, /* 1 0 */ + { .icl = { 0x6, 0x7F, 0x3F, 0x00, 0x00 } }, /* 1 1 */ + { .icl = { 0x6, 0x7F, 0x3F, 0x00, 0x00 } }, /* 1 2 */ + { .icl = { 0x6, 0x7F, 0x3F, 0x00, 0x00 } }, /* 2 0 */ + { .icl = { 0x6, 0x7F, 0x3F, 0x00, 0x00 } }, /* 2 1 */ +}; -static const struct ddi_buf_trans * -bdw_get_buf_trans_edp(struct intel_encoder *encoder, int *n_entries) -{ - struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); +static const struct intel_ddi_buf_trans tgl_combo_phy_ddi_translations_edp_hbr2_hobl = { + .entries = _tgl_combo_phy_ddi_translations_edp_hbr2_hobl, + .num_entries = ARRAY_SIZE(_tgl_combo_phy_ddi_translations_edp_hbr2_hobl), +}; - if (dev_priv->vbt.edp.low_vswing) { - *n_entries = ARRAY_SIZE(bdw_ddi_translations_edp); - return bdw_ddi_translations_edp; - } else { - *n_entries = ARRAY_SIZE(bdw_ddi_translations_dp); - return bdw_ddi_translations_dp; - } -} +static const union intel_ddi_buf_trans_entry _rkl_combo_phy_ddi_translations_dp_hbr[] = { + /* NT mV Trans mV db */ + { .icl = { 0xA, 0x2F, 0x3F, 0x00, 0x00 } }, /* 350 350 0.0 */ + { .icl = { 0xA, 0x4F, 0x37, 0x00, 0x08 } }, /* 350 500 3.1 */ + { .icl = { 0xC, 0x63, 0x2F, 0x00, 0x10 } }, /* 350 700 6.0 */ + { .icl = { 0x6, 0x7D, 0x2A, 0x00, 0x15 } }, /* 350 900 8.2 */ + { .icl = { 0xA, 0x4C, 0x3F, 0x00, 0x00 } }, /* 500 500 0.0 */ + { .icl = { 0xC, 0x73, 0x34, 0x00, 0x0B } }, /* 500 700 2.9 */ + { .icl = { 0x6, 0x7F, 0x2F, 0x00, 0x10 } }, /* 500 900 5.1 */ + { .icl = { 0xC, 0x6E, 0x3E, 0x00, 0x01 } }, /* 650 700 0.6 */ + { .icl = { 0x6, 0x7F, 0x35, 0x00, 0x0A } }, /* 600 900 3.5 */ + { .icl = { 0x6, 0x7F, 0x3F, 0x00, 0x00 } }, /* 900 900 0.0 */ +}; -static const struct ddi_buf_trans * -skl_get_buf_trans_dp(struct intel_encoder *encoder, int *n_entries) -{ - struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); +static const struct intel_ddi_buf_trans rkl_combo_phy_ddi_translations_dp_hbr = { + .entries = _rkl_combo_phy_ddi_translations_dp_hbr, + .num_entries = ARRAY_SIZE(_rkl_combo_phy_ddi_translations_dp_hbr), +}; - if (IS_SKL_ULX(dev_priv)) { - *n_entries = ARRAY_SIZE(skl_y_ddi_translations_dp); - return skl_y_ddi_translations_dp; - } else if (IS_SKL_ULT(dev_priv)) { - *n_entries = ARRAY_SIZE(skl_u_ddi_translations_dp); - return skl_u_ddi_translations_dp; - } else { - *n_entries = ARRAY_SIZE(skl_ddi_translations_dp); - return skl_ddi_translations_dp; - } -} +static const union intel_ddi_buf_trans_entry _rkl_combo_phy_ddi_translations_dp_hbr2_hbr3[] = { + /* NT mV Trans mV db */ + { .icl = { 0xA, 0x35, 0x3F, 0x00, 0x00 } }, /* 350 350 0.0 */ + { .icl = { 0xA, 0x50, 0x38, 0x00, 0x07 } }, /* 350 500 3.1 */ + { .icl = { 0xC, 0x61, 0x33, 0x00, 0x0C } }, /* 350 700 6.0 */ + { .icl = { 0x6, 0x7F, 0x2E, 0x00, 0x11 } }, /* 350 900 8.2 */ + { .icl = { 0xA, 0x47, 0x3F, 0x00, 0x00 } }, /* 500 500 0.0 */ + { .icl = { 0xC, 0x5F, 0x38, 0x00, 0x07 } }, /* 500 700 2.9 */ + { .icl = { 0x6, 0x7F, 0x2F, 0x00, 0x10 } }, /* 500 900 5.1 */ + { .icl = { 0xC, 0x5F, 0x3F, 0x00, 0x00 } }, /* 650 700 0.6 */ + { .icl = { 0x6, 0x7E, 0x36, 0x00, 0x09 } }, /* 600 900 3.5 */ + { .icl = { 0x6, 0x7F, 0x3F, 0x00, 0x00 } }, /* 900 900 0.0 */ +}; -static const struct ddi_buf_trans * -kbl_get_buf_trans_dp(struct intel_encoder *encoder, int *n_entries) -{ - struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); +static const struct intel_ddi_buf_trans rkl_combo_phy_ddi_translations_dp_hbr2_hbr3 = { + .entries = _rkl_combo_phy_ddi_translations_dp_hbr2_hbr3, + .num_entries = ARRAY_SIZE(_rkl_combo_phy_ddi_translations_dp_hbr2_hbr3), +}; - if (IS_KBL_ULX(dev_priv) || - IS_CFL_ULX(dev_priv) || - IS_CML_ULX(dev_priv)) { - *n_entries = ARRAY_SIZE(kbl_y_ddi_translations_dp); - return kbl_y_ddi_translations_dp; - } else if (IS_KBL_ULT(dev_priv) || - IS_CFL_ULT(dev_priv) || - IS_CML_ULT(dev_priv)) { - *n_entries = ARRAY_SIZE(kbl_u_ddi_translations_dp); - return kbl_u_ddi_translations_dp; - } else { - *n_entries = ARRAY_SIZE(kbl_ddi_translations_dp); - return kbl_ddi_translations_dp; - } -} +static const union intel_ddi_buf_trans_entry _adls_combo_phy_ddi_translations_dp_hbr2_hbr3[] = { + /* NT mV Trans mV db */ + { .icl = { 0xA, 0x35, 0x3F, 0x00, 0x00 } }, /* 350 350 0.0 */ + { .icl = { 0xA, 0x4F, 0x37, 0x00, 0x08 } }, /* 350 500 3.1 */ + { .icl = { 0xC, 0x63, 0x31, 0x00, 0x0E } }, /* 350 700 6.0 */ + { .icl = { 0x6, 0x7F, 0x2C, 0x00, 0x13 } }, /* 350 900 8.2 */ + { .icl = { 0xA, 0x47, 0x3F, 0x00, 0x00 } }, /* 500 500 0.0 */ + { .icl = { 0xC, 0x63, 0x37, 0x00, 0x08 } }, /* 500 700 2.9 */ + { .icl = { 0x6, 0x73, 0x32, 0x00, 0x0D } }, /* 500 900 5.1 */ + { .icl = { 0xC, 0x58, 0x3F, 0x00, 0x00 } }, /* 650 700 0.6 */ + { .icl = { 0x6, 0x7F, 0x35, 0x00, 0x0A } }, /* 600 900 3.5 */ + { .icl = { 0x6, 0x7F, 0x3F, 0x00, 0x00 } }, /* 900 900 0.0 */ +}; -static const struct ddi_buf_trans * -skl_get_buf_trans_edp(struct intel_encoder *encoder, int *n_entries) -{ - struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); +static const struct intel_ddi_buf_trans adls_combo_phy_ddi_translations_dp_hbr2_hbr3 = { + .entries = _adls_combo_phy_ddi_translations_dp_hbr2_hbr3, + .num_entries = ARRAY_SIZE(_adls_combo_phy_ddi_translations_dp_hbr2_hbr3), +}; - if (dev_priv->vbt.edp.low_vswing) { - if (IS_SKL_ULX(dev_priv) || - IS_KBL_ULX(dev_priv) || - IS_CFL_ULX(dev_priv) || - IS_CML_ULX(dev_priv)) { - *n_entries = ARRAY_SIZE(skl_y_ddi_translations_edp); - return skl_y_ddi_translations_edp; - } else if (IS_SKL_ULT(dev_priv) || - IS_KBL_ULT(dev_priv) || - IS_CFL_ULT(dev_priv) || - IS_CML_ULT(dev_priv)) { - *n_entries = ARRAY_SIZE(skl_u_ddi_translations_edp); - return skl_u_ddi_translations_edp; - } else { - *n_entries = ARRAY_SIZE(skl_ddi_translations_edp); - return skl_ddi_translations_edp; - } - } +static const union intel_ddi_buf_trans_entry _adls_combo_phy_ddi_translations_edp_hbr2[] = { + /* NT mV Trans mV db */ + { .icl = { 0x9, 0x73, 0x3D, 0x00, 0x02 } }, /* 200 200 0.0 */ + { .icl = { 0x9, 0x7A, 0x3C, 0x00, 0x03 } }, /* 200 250 1.9 */ + { .icl = { 0x9, 0x7F, 0x3B, 0x00, 0x04 } }, /* 200 300 3.5 */ + { .icl = { 0x4, 0x6C, 0x33, 0x00, 0x0C } }, /* 200 350 4.9 */ + { .icl = { 0x2, 0x73, 0x3A, 0x00, 0x05 } }, /* 250 250 0.0 */ + { .icl = { 0x2, 0x7C, 0x38, 0x00, 0x07 } }, /* 250 300 1.6 */ + { .icl = { 0x4, 0x5A, 0x36, 0x00, 0x09 } }, /* 250 350 2.9 */ + { .icl = { 0x4, 0x57, 0x3D, 0x00, 0x02 } }, /* 300 300 0.0 */ + { .icl = { 0x4, 0x65, 0x38, 0x00, 0x07 } }, /* 300 350 1.3 */ + { .icl = { 0x4, 0x6C, 0x3A, 0x00, 0x05 } }, /* 350 350 0.0 */ +}; - if (IS_KABYLAKE(dev_priv) || - IS_COFFEELAKE(dev_priv) || - IS_COMETLAKE(dev_priv)) - return kbl_get_buf_trans_dp(encoder, n_entries); - else - return skl_get_buf_trans_dp(encoder, n_entries); -} +static const struct intel_ddi_buf_trans adls_combo_phy_ddi_translations_edp_hbr2 = { + .entries = _adls_combo_phy_ddi_translations_edp_hbr2, + .num_entries = ARRAY_SIZE(_adls_combo_phy_ddi_translations_edp_hbr2), +}; -static const struct ddi_buf_trans * -skl_get_buf_trans_hdmi(struct drm_i915_private *dev_priv, int *n_entries) -{ - if (IS_SKL_ULX(dev_priv) || - IS_KBL_ULX(dev_priv) || - IS_CFL_ULX(dev_priv) || - IS_CML_ULX(dev_priv)) { - *n_entries = ARRAY_SIZE(skl_y_ddi_translations_hdmi); - return skl_y_ddi_translations_hdmi; - } else { - *n_entries = ARRAY_SIZE(skl_ddi_translations_hdmi); - return skl_ddi_translations_hdmi; - } -} +static const union intel_ddi_buf_trans_entry _adls_combo_phy_ddi_translations_edp_hbr3[] = { + /* NT mV Trans mV db */ + { .icl = { 0xA, 0x35, 0x3F, 0x00, 0x00 } }, /* 350 350 0.0 */ + { .icl = { 0xA, 0x4F, 0x37, 0x00, 0x08 } }, /* 350 500 3.1 */ + { .icl = { 0xC, 0x63, 0x31, 0x00, 0x0E } }, /* 350 700 6.0 */ + { .icl = { 0x6, 0x7F, 0x2C, 0x00, 0x13 } }, /* 350 900 8.2 */ + { .icl = { 0xA, 0x47, 0x3F, 0x00, 0x00 } }, /* 500 500 0.0 */ + { .icl = { 0xC, 0x63, 0x37, 0x00, 0x08 } }, /* 500 700 2.9 */ + { .icl = { 0x6, 0x73, 0x32, 0x00, 0x0D } }, /* 500 900 5.1 */ + { .icl = { 0xC, 0x58, 0x3F, 0x00, 0x00 } }, /* 650 700 0.6 */ + { .icl = { 0x6, 0x7F, 0x35, 0x00, 0x0A } }, /* 600 900 3.5 */ + { .icl = { 0x6, 0x7F, 0x3F, 0x00, 0x00 } }, /* 900 900 0.0 */ +}; -static int skl_buf_trans_num_entries(enum port port, int n_entries) -{ - /* Only DDIA and DDIE can select the 10th register with DP */ - if (port == PORT_A || port == PORT_E) - return min(n_entries, 10); - else - return min(n_entries, 9); -} +static const struct intel_ddi_buf_trans adls_combo_phy_ddi_translations_edp_hbr3 = { + .entries = _adls_combo_phy_ddi_translations_edp_hbr3, + .num_entries = ARRAY_SIZE(_adls_combo_phy_ddi_translations_edp_hbr3), +}; -const struct ddi_buf_trans * -intel_ddi_get_buf_trans_dp(struct intel_encoder *encoder, int *n_entries) -{ - struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); +static const union intel_ddi_buf_trans_entry _adlp_combo_phy_ddi_translations_hdmi[] = { + /* NT mV Trans mV db */ + { .icl = { 0x6, 0x60, 0x3F, 0x00, 0x00 } }, /* 400 400 0.0 */ + { .icl = { 0x6, 0x68, 0x3F, 0x00, 0x00 } }, /* 500 500 0.0 */ + { .icl = { 0xA, 0x73, 0x3F, 0x00, 0x00 } }, /* 650 650 0.0 ALS */ + { .icl = { 0xA, 0x78, 0x3F, 0x00, 0x00 } }, /* 800 800 0.0 */ + { .icl = { 0xB, 0x7F, 0x3F, 0x00, 0x00 } }, /* 1000 1000 0.0 Re-timer */ + { .icl = { 0xB, 0x7F, 0x3B, 0x00, 0x04 } }, /* Full Red -1.5 */ + { .icl = { 0xB, 0x7F, 0x39, 0x00, 0x06 } }, /* Full Red -1.8 */ + { .icl = { 0xB, 0x7F, 0x37, 0x00, 0x08 } }, /* Full Red -2.0 CRLS */ + { .icl = { 0xB, 0x7F, 0x35, 0x00, 0x0A } }, /* Full Red -2.5 */ + { .icl = { 0xB, 0x7F, 0x33, 0x00, 0x0C } }, /* Full Red -3.0 */ +}; - if (IS_KABYLAKE(dev_priv) || - IS_COFFEELAKE(dev_priv) || - IS_COMETLAKE(dev_priv)) { - const struct ddi_buf_trans *ddi_translations = - kbl_get_buf_trans_dp(encoder, n_entries); - *n_entries = skl_buf_trans_num_entries(encoder->port, *n_entries); - return ddi_translations; - } else if (IS_SKYLAKE(dev_priv)) { - const struct ddi_buf_trans *ddi_translations = - skl_get_buf_trans_dp(encoder, n_entries); - *n_entries = skl_buf_trans_num_entries(encoder->port, *n_entries); - return ddi_translations; - } else if (IS_BROADWELL(dev_priv)) { - *n_entries = ARRAY_SIZE(bdw_ddi_translations_dp); - return bdw_ddi_translations_dp; - } else if (IS_HASWELL(dev_priv)) { - *n_entries = ARRAY_SIZE(hsw_ddi_translations_dp); - return hsw_ddi_translations_dp; - } +static const struct intel_ddi_buf_trans adlp_combo_phy_ddi_translations_hdmi = { + .entries = _adlp_combo_phy_ddi_translations_hdmi, + .num_entries = ARRAY_SIZE(_adlp_combo_phy_ddi_translations_hdmi), + .hdmi_default_entry = ARRAY_SIZE(_adlp_combo_phy_ddi_translations_hdmi) - 1, +}; - *n_entries = 0; - return NULL; -} +static const union intel_ddi_buf_trans_entry _adlp_combo_phy_ddi_translations_dp_hbr[] = { + /* NT mV Trans mV db */ + { .icl = { 0xA, 0x35, 0x3F, 0x00, 0x00 } }, /* 350 350 0.0 */ + { .icl = { 0xA, 0x4F, 0x37, 0x00, 0x08 } }, /* 350 500 3.1 */ + { .icl = { 0xC, 0x71, 0x31, 0x00, 0x0E } }, /* 350 700 6.0 */ + { .icl = { 0x6, 0x7F, 0x2C, 0x00, 0x13 } }, /* 350 900 8.2 */ + { .icl = { 0xA, 0x4C, 0x3F, 0x00, 0x00 } }, /* 500 500 0.0 */ + { .icl = { 0xC, 0x73, 0x34, 0x00, 0x0B } }, /* 500 700 2.9 */ + { .icl = { 0x6, 0x7F, 0x2F, 0x00, 0x10 } }, /* 500 900 5.1 */ + { .icl = { 0xC, 0x73, 0x3E, 0x00, 0x01 } }, /* 650 700 0.6 */ + { .icl = { 0x6, 0x7F, 0x35, 0x00, 0x0A } }, /* 600 900 3.5 */ + { .icl = { 0x6, 0x7F, 0x3F, 0x00, 0x00 } }, /* 900 900 0.0 */ +}; -const struct ddi_buf_trans * -intel_ddi_get_buf_trans_edp(struct intel_encoder *encoder, int *n_entries) -{ - struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); +static const struct intel_ddi_buf_trans adlp_combo_phy_ddi_translations_dp_hbr = { + .entries = _adlp_combo_phy_ddi_translations_dp_hbr, + .num_entries = ARRAY_SIZE(_adlp_combo_phy_ddi_translations_dp_hbr), +}; - if (DISPLAY_VER(dev_priv) == 9 && !IS_BROXTON(dev_priv)) { - const struct ddi_buf_trans *ddi_translations = - skl_get_buf_trans_edp(encoder, n_entries); - *n_entries = skl_buf_trans_num_entries(encoder->port, *n_entries); - return ddi_translations; - } else if (IS_BROADWELL(dev_priv)) { - return bdw_get_buf_trans_edp(encoder, n_entries); - } else if (IS_HASWELL(dev_priv)) { - *n_entries = ARRAY_SIZE(hsw_ddi_translations_dp); - return hsw_ddi_translations_dp; - } +static const union intel_ddi_buf_trans_entry _adlp_combo_phy_ddi_translations_dp_hbr2_hbr3[] = { + /* NT mV Trans mV db */ + { .icl = { 0xA, 0x35, 0x3F, 0x00, 0x00 } }, /* 350 350 0.0 */ + { .icl = { 0xA, 0x4F, 0x37, 0x00, 0x08 } }, /* 350 500 3.1 */ + { .icl = { 0xC, 0x71, 0x2F, 0x00, 0x10 } }, /* 350 700 6.0 */ + { .icl = { 0x6, 0x7F, 0x2B, 0x00, 0x14 } }, /* 350 900 8.2 */ + { .icl = { 0xA, 0x4C, 0x3F, 0x00, 0x00 } }, /* 500 500 0.0 */ + { .icl = { 0xC, 0x73, 0x34, 0x00, 0x0B } }, /* 500 700 2.9 */ + { .icl = { 0x6, 0x7F, 0x30, 0x00, 0x0F } }, /* 500 900 5.1 */ + { .icl = { 0xC, 0x63, 0x3F, 0x00, 0x00 } }, /* 650 700 0.6 */ + { .icl = { 0x6, 0x7F, 0x38, 0x00, 0x07 } }, /* 600 900 3.5 */ + { .icl = { 0x6, 0x7F, 0x3F, 0x00, 0x00 } }, /* 900 900 0.0 */ +}; - *n_entries = 0; - return NULL; -} +static const struct intel_ddi_buf_trans adlp_combo_phy_ddi_translations_dp_hbr2_hbr3 = { + .entries = _adlp_combo_phy_ddi_translations_dp_hbr2_hbr3, + .num_entries = ARRAY_SIZE(_adlp_combo_phy_ddi_translations_dp_hbr2_hbr3), +}; -const struct ddi_buf_trans * -intel_ddi_get_buf_trans_fdi(struct drm_i915_private *dev_priv, - int *n_entries) -{ - if (IS_BROADWELL(dev_priv)) { - *n_entries = ARRAY_SIZE(bdw_ddi_translations_fdi); - return bdw_ddi_translations_fdi; - } else if (IS_HASWELL(dev_priv)) { - *n_entries = ARRAY_SIZE(hsw_ddi_translations_fdi); - return hsw_ddi_translations_fdi; - } +static const struct intel_ddi_buf_trans adlp_combo_phy_ddi_translations_edp_hbr3 = { + .entries = _icl_combo_phy_ddi_translations_dp_hbr2_edp_hbr3, + .num_entries = ARRAY_SIZE(_icl_combo_phy_ddi_translations_dp_hbr2_edp_hbr3), +}; + +static const struct intel_ddi_buf_trans adlp_combo_phy_ddi_translations_edp_up_to_hbr2 = { + .entries = _icl_combo_phy_ddi_translations_edp_hbr2, + .num_entries = ARRAY_SIZE(_icl_combo_phy_ddi_translations_edp_hbr2), +}; - *n_entries = 0; - return NULL; +static const union intel_ddi_buf_trans_entry _adlp_dkl_phy_ddi_translations_dp_hbr[] = { + /* VS pre-emp Non-trans mV Pre-emph dB */ + { .dkl = { 0x7, 0x0, 0x01 } }, /* 0 0 400mV 0 dB */ + { .dkl = { 0x5, 0x0, 0x06 } }, /* 0 1 400mV 3.5 dB */ + { .dkl = { 0x2, 0x0, 0x0B } }, /* 0 2 400mV 6 dB */ + { .dkl = { 0x0, 0x0, 0x17 } }, /* 0 3 400mV 9.5 dB */ + { .dkl = { 0x5, 0x0, 0x00 } }, /* 1 0 600mV 0 dB */ + { .dkl = { 0x2, 0x0, 0x08 } }, /* 1 1 600mV 3.5 dB */ + { .dkl = { 0x0, 0x0, 0x14 } }, /* 1 2 600mV 6 dB */ + { .dkl = { 0x2, 0x0, 0x00 } }, /* 2 0 800mV 0 dB */ + { .dkl = { 0x0, 0x0, 0x0B } }, /* 2 1 800mV 3.5 dB */ + { .dkl = { 0x0, 0x0, 0x00 } }, /* 3 0 1200mV 0 dB */ +}; + +static const struct intel_ddi_buf_trans adlp_dkl_phy_ddi_translations_dp_hbr = { + .entries = _adlp_dkl_phy_ddi_translations_dp_hbr, + .num_entries = ARRAY_SIZE(_adlp_dkl_phy_ddi_translations_dp_hbr), +}; + +static const union intel_ddi_buf_trans_entry _adlp_dkl_phy_ddi_translations_dp_hbr2_hbr3[] = { + /* VS pre-emp Non-trans mV Pre-emph dB */ + { .dkl = { 0x7, 0x0, 0x00 } }, /* 0 0 400mV 0 dB */ + { .dkl = { 0x5, 0x0, 0x04 } }, /* 0 1 400mV 3.5 dB */ + { .dkl = { 0x2, 0x0, 0x0A } }, /* 0 2 400mV 6 dB */ + { .dkl = { 0x0, 0x0, 0x18 } }, /* 0 3 400mV 9.5 dB */ + { .dkl = { 0x5, 0x0, 0x00 } }, /* 1 0 600mV 0 dB */ + { .dkl = { 0x2, 0x0, 0x06 } }, /* 1 1 600mV 3.5 dB */ + { .dkl = { 0x0, 0x0, 0x14 } }, /* 1 2 600mV 6 dB */ + { .dkl = { 0x2, 0x0, 0x00 } }, /* 2 0 800mV 0 dB */ + { .dkl = { 0x0, 0x0, 0x09 } }, /* 2 1 800mV 3.5 dB */ + { .dkl = { 0x0, 0x0, 0x00 } }, /* 3 0 1200mV 0 dB */ +}; + +static const struct intel_ddi_buf_trans adlp_dkl_phy_ddi_translations_dp_hbr2_hbr3 = { + .entries = _adlp_dkl_phy_ddi_translations_dp_hbr2_hbr3, + .num_entries = ARRAY_SIZE(_adlp_dkl_phy_ddi_translations_dp_hbr2_hbr3), +}; + +bool is_hobl_buf_trans(const struct intel_ddi_buf_trans *table) +{ + return table == &tgl_combo_phy_ddi_translations_edp_hbr2_hobl; } -const struct ddi_buf_trans * -intel_ddi_get_buf_trans_hdmi(struct intel_encoder *encoder, - int *n_entries) +static const struct intel_ddi_buf_trans * +intel_get_buf_trans(const struct intel_ddi_buf_trans *ddi_translations, int *num_entries) { - struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + *num_entries = ddi_translations->num_entries; + return ddi_translations; +} - if (DISPLAY_VER(dev_priv) == 9 && !IS_BROXTON(dev_priv)) { - return skl_get_buf_trans_hdmi(dev_priv, n_entries); - } else if (IS_BROADWELL(dev_priv)) { - *n_entries = ARRAY_SIZE(bdw_ddi_translations_hdmi); - return bdw_ddi_translations_hdmi; - } else if (IS_HASWELL(dev_priv)) { - *n_entries = ARRAY_SIZE(hsw_ddi_translations_hdmi); - return hsw_ddi_translations_hdmi; - } +static const struct intel_ddi_buf_trans * +hsw_get_buf_trans(struct intel_encoder *encoder, + const struct intel_crtc_state *crtc_state, + int *n_entries) +{ + if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_ANALOG)) + return intel_get_buf_trans(&hsw_ddi_translations_fdi, n_entries); + else if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_HDMI)) + return intel_get_buf_trans(&hsw_ddi_translations_hdmi, n_entries); + else + return intel_get_buf_trans(&hsw_ddi_translations_dp, n_entries); +} - *n_entries = 0; - return NULL; +static const struct intel_ddi_buf_trans * +bdw_get_buf_trans(struct intel_encoder *encoder, + const struct intel_crtc_state *crtc_state, + int *n_entries) +{ + struct drm_i915_private *i915 = to_i915(encoder->base.dev); + + if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_ANALOG)) + return intel_get_buf_trans(&bdw_ddi_translations_fdi, n_entries); + else if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_HDMI)) + return intel_get_buf_trans(&bdw_ddi_translations_hdmi, n_entries); + else if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_EDP) && + i915->vbt.edp.low_vswing) + return intel_get_buf_trans(&bdw_ddi_translations_edp, n_entries); + else + return intel_get_buf_trans(&bdw_ddi_translations_dp, n_entries); } -static const struct bxt_ddi_buf_trans * -bxt_get_buf_trans_dp(struct intel_encoder *encoder, int *n_entries) +static int skl_buf_trans_num_entries(enum port port, int n_entries) { - *n_entries = ARRAY_SIZE(bxt_ddi_translations_dp); - return bxt_ddi_translations_dp; + /* Only DDIA and DDIE can select the 10th register with DP */ + if (port == PORT_A || port == PORT_E) + return min(n_entries, 10); + else + return min(n_entries, 9); } -static const struct bxt_ddi_buf_trans * -bxt_get_buf_trans_edp(struct intel_encoder *encoder, int *n_entries) +static const struct intel_ddi_buf_trans * +_skl_get_buf_trans_dp(struct intel_encoder *encoder, + const struct intel_ddi_buf_trans *ddi_translations, + int *n_entries) { - struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + ddi_translations = intel_get_buf_trans(ddi_translations, n_entries); + *n_entries = skl_buf_trans_num_entries(encoder->port, *n_entries); + return ddi_translations; +} - if (dev_priv->vbt.edp.low_vswing) { - *n_entries = ARRAY_SIZE(bxt_ddi_translations_edp); - return bxt_ddi_translations_edp; - } +static const struct intel_ddi_buf_trans * +skl_y_get_buf_trans(struct intel_encoder *encoder, + const struct intel_crtc_state *crtc_state, + int *n_entries) +{ + struct drm_i915_private *i915 = to_i915(encoder->base.dev); - return bxt_get_buf_trans_dp(encoder, n_entries); + if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_HDMI)) + return intel_get_buf_trans(&skl_y_ddi_translations_hdmi, n_entries); + else if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_EDP) && + i915->vbt.edp.low_vswing) + return _skl_get_buf_trans_dp(encoder, &skl_y_ddi_translations_edp, n_entries); + else + return _skl_get_buf_trans_dp(encoder, &skl_y_ddi_translations_dp, n_entries); } -static const struct bxt_ddi_buf_trans * -bxt_get_buf_trans_hdmi(struct intel_encoder *encoder, int *n_entries) +static const struct intel_ddi_buf_trans * +skl_u_get_buf_trans(struct intel_encoder *encoder, + const struct intel_crtc_state *crtc_state, + int *n_entries) { - *n_entries = ARRAY_SIZE(bxt_ddi_translations_hdmi); - return bxt_ddi_translations_hdmi; + struct drm_i915_private *i915 = to_i915(encoder->base.dev); + + if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_HDMI)) + return intel_get_buf_trans(&skl_ddi_translations_hdmi, n_entries); + else if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_EDP) && + i915->vbt.edp.low_vswing) + return _skl_get_buf_trans_dp(encoder, &skl_u_ddi_translations_edp, n_entries); + else + return _skl_get_buf_trans_dp(encoder, &skl_u_ddi_translations_dp, n_entries); } -const struct bxt_ddi_buf_trans * -bxt_get_buf_trans(struct intel_encoder *encoder, +static const struct intel_ddi_buf_trans * +skl_get_buf_trans(struct intel_encoder *encoder, const struct intel_crtc_state *crtc_state, int *n_entries) { + struct drm_i915_private *i915 = to_i915(encoder->base.dev); + if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_HDMI)) - return bxt_get_buf_trans_hdmi(encoder, n_entries); - if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_EDP)) - return bxt_get_buf_trans_edp(encoder, n_entries); - return bxt_get_buf_trans_dp(encoder, n_entries); + return intel_get_buf_trans(&skl_ddi_translations_hdmi, n_entries); + else if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_EDP) && + i915->vbt.edp.low_vswing) + return _skl_get_buf_trans_dp(encoder, &skl_ddi_translations_edp, n_entries); + else + return _skl_get_buf_trans_dp(encoder, &skl_ddi_translations_dp, n_entries); } -static const struct cnl_ddi_buf_trans * -cnl_get_buf_trans_hdmi(struct intel_encoder *encoder, int *n_entries) +static const struct intel_ddi_buf_trans * +kbl_y_get_buf_trans(struct intel_encoder *encoder, + const struct intel_crtc_state *crtc_state, + int *n_entries) { - struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - u32 voltage = intel_de_read(dev_priv, CNL_PORT_COMP_DW3) & VOLTAGE_INFO_MASK; - - if (voltage == VOLTAGE_INFO_0_85V) { - *n_entries = ARRAY_SIZE(cnl_ddi_translations_hdmi_0_85V); - return cnl_ddi_translations_hdmi_0_85V; - } else if (voltage == VOLTAGE_INFO_0_95V) { - *n_entries = ARRAY_SIZE(cnl_ddi_translations_hdmi_0_95V); - return cnl_ddi_translations_hdmi_0_95V; - } else if (voltage == VOLTAGE_INFO_1_05V) { - *n_entries = ARRAY_SIZE(cnl_ddi_translations_hdmi_1_05V); - return cnl_ddi_translations_hdmi_1_05V; - } else { - *n_entries = 1; /* shut up gcc */ - MISSING_CASE(voltage); - } - return NULL; -} + struct drm_i915_private *i915 = to_i915(encoder->base.dev); -static const struct cnl_ddi_buf_trans * -cnl_get_buf_trans_dp(struct intel_encoder *encoder, int *n_entries) -{ - struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - u32 voltage = intel_de_read(dev_priv, CNL_PORT_COMP_DW3) & VOLTAGE_INFO_MASK; - - if (voltage == VOLTAGE_INFO_0_85V) { - *n_entries = ARRAY_SIZE(cnl_ddi_translations_dp_0_85V); - return cnl_ddi_translations_dp_0_85V; - } else if (voltage == VOLTAGE_INFO_0_95V) { - *n_entries = ARRAY_SIZE(cnl_ddi_translations_dp_0_95V); - return cnl_ddi_translations_dp_0_95V; - } else if (voltage == VOLTAGE_INFO_1_05V) { - *n_entries = ARRAY_SIZE(cnl_ddi_translations_dp_1_05V); - return cnl_ddi_translations_dp_1_05V; - } else { - *n_entries = 1; /* shut up gcc */ - MISSING_CASE(voltage); - } - return NULL; + if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_HDMI)) + return intel_get_buf_trans(&skl_y_ddi_translations_hdmi, n_entries); + else if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_EDP) && + i915->vbt.edp.low_vswing) + return _skl_get_buf_trans_dp(encoder, &skl_y_ddi_translations_edp, n_entries); + else + return _skl_get_buf_trans_dp(encoder, &kbl_y_ddi_translations_dp, n_entries); } -static const struct cnl_ddi_buf_trans * -cnl_get_buf_trans_edp(struct intel_encoder *encoder, int *n_entries) +static const struct intel_ddi_buf_trans * +kbl_u_get_buf_trans(struct intel_encoder *encoder, + const struct intel_crtc_state *crtc_state, + int *n_entries) { - struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - u32 voltage = intel_de_read(dev_priv, CNL_PORT_COMP_DW3) & VOLTAGE_INFO_MASK; - - if (dev_priv->vbt.edp.low_vswing) { - if (voltage == VOLTAGE_INFO_0_85V) { - *n_entries = ARRAY_SIZE(cnl_ddi_translations_edp_0_85V); - return cnl_ddi_translations_edp_0_85V; - } else if (voltage == VOLTAGE_INFO_0_95V) { - *n_entries = ARRAY_SIZE(cnl_ddi_translations_edp_0_95V); - return cnl_ddi_translations_edp_0_95V; - } else if (voltage == VOLTAGE_INFO_1_05V) { - *n_entries = ARRAY_SIZE(cnl_ddi_translations_edp_1_05V); - return cnl_ddi_translations_edp_1_05V; - } else { - *n_entries = 1; /* shut up gcc */ - MISSING_CASE(voltage); - } - return NULL; - } else { - return cnl_get_buf_trans_dp(encoder, n_entries); - } + struct drm_i915_private *i915 = to_i915(encoder->base.dev); + + if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_HDMI)) + return intel_get_buf_trans(&skl_ddi_translations_hdmi, n_entries); + else if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_EDP) && + i915->vbt.edp.low_vswing) + return _skl_get_buf_trans_dp(encoder, &skl_u_ddi_translations_edp, n_entries); + else + return _skl_get_buf_trans_dp(encoder, &kbl_u_ddi_translations_dp, n_entries); } -const struct cnl_ddi_buf_trans * -cnl_get_buf_trans(struct intel_encoder *encoder, +static const struct intel_ddi_buf_trans * +kbl_get_buf_trans(struct intel_encoder *encoder, const struct intel_crtc_state *crtc_state, int *n_entries) { + struct drm_i915_private *i915 = to_i915(encoder->base.dev); + if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_HDMI)) - return cnl_get_buf_trans_hdmi(encoder, n_entries); - if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_EDP)) - return cnl_get_buf_trans_edp(encoder, n_entries); - return cnl_get_buf_trans_dp(encoder, n_entries); + return intel_get_buf_trans(&skl_ddi_translations_hdmi, n_entries); + else if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_EDP) && + i915->vbt.edp.low_vswing) + return _skl_get_buf_trans_dp(encoder, &skl_ddi_translations_edp, n_entries); + else + return _skl_get_buf_trans_dp(encoder, &kbl_ddi_translations_dp, n_entries); } -static const struct cnl_ddi_buf_trans * -icl_get_combo_buf_trans_hdmi(struct intel_encoder *encoder, - const struct intel_crtc_state *crtc_state, - int *n_entries) +static const struct intel_ddi_buf_trans * +bxt_get_buf_trans(struct intel_encoder *encoder, + const struct intel_crtc_state *crtc_state, + int *n_entries) { - *n_entries = ARRAY_SIZE(icl_combo_phy_ddi_translations_hdmi); - return icl_combo_phy_ddi_translations_hdmi; + struct drm_i915_private *i915 = to_i915(encoder->base.dev); + + if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_HDMI)) + return intel_get_buf_trans(&bxt_ddi_translations_hdmi, n_entries); + else if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_EDP) && + i915->vbt.edp.low_vswing) + return intel_get_buf_trans(&bxt_ddi_translations_edp, n_entries); + else + return intel_get_buf_trans(&bxt_ddi_translations_dp, n_entries); } -static const struct cnl_ddi_buf_trans * +static const struct intel_ddi_buf_trans * icl_get_combo_buf_trans_dp(struct intel_encoder *encoder, const struct intel_crtc_state *crtc_state, int *n_entries) { - *n_entries = ARRAY_SIZE(icl_combo_phy_ddi_translations_dp_hbr2); - return icl_combo_phy_ddi_translations_dp_hbr2; + return intel_get_buf_trans(&icl_combo_phy_ddi_translations_dp_hbr2_edp_hbr3, + n_entries); } -static const struct cnl_ddi_buf_trans * +static const struct intel_ddi_buf_trans * icl_get_combo_buf_trans_edp(struct intel_encoder *encoder, const struct intel_crtc_state *crtc_state, int *n_entries) @@ -1109,294 +1174,391 @@ icl_get_combo_buf_trans_edp(struct intel_encoder *encoder, struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); if (crtc_state->port_clock > 540000) { - *n_entries = ARRAY_SIZE(icl_combo_phy_ddi_translations_edp_hbr3); - return icl_combo_phy_ddi_translations_edp_hbr3; + return intel_get_buf_trans(&icl_combo_phy_ddi_translations_dp_hbr2_edp_hbr3, + n_entries); } else if (dev_priv->vbt.edp.low_vswing) { - *n_entries = ARRAY_SIZE(icl_combo_phy_ddi_translations_edp_hbr2); - return icl_combo_phy_ddi_translations_edp_hbr2; - } else if (IS_DG1(dev_priv) && crtc_state->port_clock > 270000) { - *n_entries = ARRAY_SIZE(dg1_combo_phy_ddi_translations_dp_hbr2_hbr3); - return dg1_combo_phy_ddi_translations_dp_hbr2_hbr3; - } else if (IS_DG1(dev_priv)) { - *n_entries = ARRAY_SIZE(dg1_combo_phy_ddi_translations_dp_rbr_hbr); - return dg1_combo_phy_ddi_translations_dp_rbr_hbr; + return intel_get_buf_trans(&icl_combo_phy_ddi_translations_edp_hbr2, + n_entries); } return icl_get_combo_buf_trans_dp(encoder, crtc_state, n_entries); } -const struct cnl_ddi_buf_trans * +static const struct intel_ddi_buf_trans * icl_get_combo_buf_trans(struct intel_encoder *encoder, const struct intel_crtc_state *crtc_state, int *n_entries) { if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_HDMI)) - return icl_get_combo_buf_trans_hdmi(encoder, crtc_state, n_entries); + return intel_get_buf_trans(&icl_combo_phy_ddi_translations_hdmi, n_entries); else if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_EDP)) return icl_get_combo_buf_trans_edp(encoder, crtc_state, n_entries); else return icl_get_combo_buf_trans_dp(encoder, crtc_state, n_entries); } -static const struct icl_mg_phy_ddi_buf_trans * -icl_get_mg_buf_trans_hdmi(struct intel_encoder *encoder, - const struct intel_crtc_state *crtc_state, - int *n_entries) -{ - *n_entries = ARRAY_SIZE(icl_mg_phy_ddi_translations_hdmi); - return icl_mg_phy_ddi_translations_hdmi; -} - -static const struct icl_mg_phy_ddi_buf_trans * +static const struct intel_ddi_buf_trans * icl_get_mg_buf_trans_dp(struct intel_encoder *encoder, const struct intel_crtc_state *crtc_state, int *n_entries) { if (crtc_state->port_clock > 270000) { - *n_entries = ARRAY_SIZE(icl_mg_phy_ddi_translations_hbr2_hbr3); - return icl_mg_phy_ddi_translations_hbr2_hbr3; + return intel_get_buf_trans(&icl_mg_phy_ddi_translations_hbr2_hbr3, + n_entries); } else { - *n_entries = ARRAY_SIZE(icl_mg_phy_ddi_translations_rbr_hbr); - return icl_mg_phy_ddi_translations_rbr_hbr; + return intel_get_buf_trans(&icl_mg_phy_ddi_translations_rbr_hbr, + n_entries); } } -const struct icl_mg_phy_ddi_buf_trans * +static const struct intel_ddi_buf_trans * icl_get_mg_buf_trans(struct intel_encoder *encoder, const struct intel_crtc_state *crtc_state, int *n_entries) { if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_HDMI)) - return icl_get_mg_buf_trans_hdmi(encoder, crtc_state, n_entries); + return intel_get_buf_trans(&icl_mg_phy_ddi_translations_hdmi, n_entries); else return icl_get_mg_buf_trans_dp(encoder, crtc_state, n_entries); } -static const struct cnl_ddi_buf_trans * -ehl_get_combo_buf_trans_hdmi(struct intel_encoder *encoder, - const struct intel_crtc_state *crtc_state, - int *n_entries) +static const struct intel_ddi_buf_trans * +ehl_get_combo_buf_trans_edp(struct intel_encoder *encoder, + const struct intel_crtc_state *crtc_state, + int *n_entries) +{ + if (crtc_state->port_clock > 270000) + return intel_get_buf_trans(&ehl_combo_phy_ddi_translations_edp_hbr2, n_entries); + else + return intel_get_buf_trans(&icl_combo_phy_ddi_translations_edp_hbr2, n_entries); +} + +static const struct intel_ddi_buf_trans * +ehl_get_combo_buf_trans(struct intel_encoder *encoder, + const struct intel_crtc_state *crtc_state, + int *n_entries) +{ + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + + if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_HDMI)) + return intel_get_buf_trans(&icl_combo_phy_ddi_translations_hdmi, n_entries); + else if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_EDP) && + dev_priv->vbt.edp.low_vswing) + return ehl_get_combo_buf_trans_edp(encoder, crtc_state, n_entries); + else + return intel_get_buf_trans(&ehl_combo_phy_ddi_translations_dp, n_entries); +} + +static const struct intel_ddi_buf_trans * +jsl_get_combo_buf_trans_edp(struct intel_encoder *encoder, + const struct intel_crtc_state *crtc_state, + int *n_entries) { - *n_entries = ARRAY_SIZE(icl_combo_phy_ddi_translations_hdmi); - return icl_combo_phy_ddi_translations_hdmi; + if (crtc_state->port_clock > 270000) + return intel_get_buf_trans(&jsl_combo_phy_ddi_translations_edp_hbr2, n_entries); + else + return intel_get_buf_trans(&jsl_combo_phy_ddi_translations_edp_hbr, n_entries); +} + +static const struct intel_ddi_buf_trans * +jsl_get_combo_buf_trans(struct intel_encoder *encoder, + const struct intel_crtc_state *crtc_state, + int *n_entries) +{ + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + + if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_HDMI)) + return intel_get_buf_trans(&icl_combo_phy_ddi_translations_hdmi, n_entries); + else if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_EDP) && + dev_priv->vbt.edp.low_vswing) + return jsl_get_combo_buf_trans_edp(encoder, crtc_state, n_entries); + else + return intel_get_buf_trans(&icl_combo_phy_ddi_translations_dp_hbr2_edp_hbr3, n_entries); } -static const struct cnl_ddi_buf_trans * -ehl_get_combo_buf_trans_dp(struct intel_encoder *encoder, +static const struct intel_ddi_buf_trans * +tgl_get_combo_buf_trans_dp(struct intel_encoder *encoder, const struct intel_crtc_state *crtc_state, int *n_entries) { - *n_entries = ARRAY_SIZE(ehl_combo_phy_ddi_translations_dp); - return ehl_combo_phy_ddi_translations_dp; + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + + if (crtc_state->port_clock > 270000) { + if (IS_TGL_U(dev_priv) || IS_TGL_Y(dev_priv)) { + return intel_get_buf_trans(&tgl_uy_combo_phy_ddi_translations_dp_hbr2, + n_entries); + } else { + return intel_get_buf_trans(&tgl_combo_phy_ddi_translations_dp_hbr2, + n_entries); + } + } else { + return intel_get_buf_trans(&tgl_combo_phy_ddi_translations_dp_hbr, + n_entries); + } } -static const struct cnl_ddi_buf_trans * -ehl_get_combo_buf_trans_edp(struct intel_encoder *encoder, +static const struct intel_ddi_buf_trans * +tgl_get_combo_buf_trans_edp(struct intel_encoder *encoder, const struct intel_crtc_state *crtc_state, int *n_entries) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + struct intel_dp *intel_dp = enc_to_intel_dp(encoder); - if (dev_priv->vbt.edp.low_vswing) { - *n_entries = ARRAY_SIZE(icl_combo_phy_ddi_translations_edp_hbr2); - return icl_combo_phy_ddi_translations_edp_hbr2; + if (crtc_state->port_clock > 540000) { + return intel_get_buf_trans(&icl_combo_phy_ddi_translations_dp_hbr2_edp_hbr3, + n_entries); + } else if (dev_priv->vbt.edp.hobl && !intel_dp->hobl_failed) { + return intel_get_buf_trans(&tgl_combo_phy_ddi_translations_edp_hbr2_hobl, + n_entries); + } else if (dev_priv->vbt.edp.low_vswing) { + return intel_get_buf_trans(&icl_combo_phy_ddi_translations_edp_hbr2, + n_entries); } - return ehl_get_combo_buf_trans_dp(encoder, crtc_state, n_entries); + return tgl_get_combo_buf_trans_dp(encoder, crtc_state, n_entries); } -const struct cnl_ddi_buf_trans * -ehl_get_combo_buf_trans(struct intel_encoder *encoder, +static const struct intel_ddi_buf_trans * +tgl_get_combo_buf_trans(struct intel_encoder *encoder, const struct intel_crtc_state *crtc_state, int *n_entries) { if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_HDMI)) - return ehl_get_combo_buf_trans_hdmi(encoder, crtc_state, n_entries); + return intel_get_buf_trans(&icl_combo_phy_ddi_translations_hdmi, n_entries); else if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_EDP)) - return ehl_get_combo_buf_trans_edp(encoder, crtc_state, n_entries); + return tgl_get_combo_buf_trans_edp(encoder, crtc_state, n_entries); else - return ehl_get_combo_buf_trans_dp(encoder, crtc_state, n_entries); + return tgl_get_combo_buf_trans_dp(encoder, crtc_state, n_entries); } -static const struct cnl_ddi_buf_trans * -jsl_get_combo_buf_trans_hdmi(struct intel_encoder *encoder, - const struct intel_crtc_state *crtc_state, - int *n_entries) +static const struct intel_ddi_buf_trans * +dg1_get_combo_buf_trans_dp(struct intel_encoder *encoder, + const struct intel_crtc_state *crtc_state, + int *n_entries) { - *n_entries = ARRAY_SIZE(icl_combo_phy_ddi_translations_hdmi); - return icl_combo_phy_ddi_translations_hdmi; + if (crtc_state->port_clock > 270000) + return intel_get_buf_trans(&dg1_combo_phy_ddi_translations_dp_hbr2_hbr3, + n_entries); + else + return intel_get_buf_trans(&dg1_combo_phy_ddi_translations_dp_rbr_hbr, + n_entries); } -static const struct cnl_ddi_buf_trans * -jsl_get_combo_buf_trans_dp(struct intel_encoder *encoder, +static const struct intel_ddi_buf_trans * +dg1_get_combo_buf_trans_edp(struct intel_encoder *encoder, + const struct intel_crtc_state *crtc_state, + int *n_entries) +{ + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + struct intel_dp *intel_dp = enc_to_intel_dp(encoder); + + if (crtc_state->port_clock > 540000) + return intel_get_buf_trans(&icl_combo_phy_ddi_translations_dp_hbr2_edp_hbr3, + n_entries); + else if (dev_priv->vbt.edp.hobl && !intel_dp->hobl_failed) + return intel_get_buf_trans(&tgl_combo_phy_ddi_translations_edp_hbr2_hobl, + n_entries); + else if (dev_priv->vbt.edp.low_vswing) + return intel_get_buf_trans(&icl_combo_phy_ddi_translations_edp_hbr2, + n_entries); + else + return dg1_get_combo_buf_trans_dp(encoder, crtc_state, n_entries); +} + +static const struct intel_ddi_buf_trans * +dg1_get_combo_buf_trans(struct intel_encoder *encoder, + const struct intel_crtc_state *crtc_state, + int *n_entries) +{ + if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_HDMI)) + return intel_get_buf_trans(&icl_combo_phy_ddi_translations_hdmi, n_entries); + else if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_EDP)) + return dg1_get_combo_buf_trans_edp(encoder, crtc_state, n_entries); + else + return dg1_get_combo_buf_trans_dp(encoder, crtc_state, n_entries); +} + +static const struct intel_ddi_buf_trans * +rkl_get_combo_buf_trans_dp(struct intel_encoder *encoder, const struct intel_crtc_state *crtc_state, int *n_entries) { - *n_entries = ARRAY_SIZE(icl_combo_phy_ddi_translations_dp_hbr2); - return icl_combo_phy_ddi_translations_dp_hbr2; + if (crtc_state->port_clock > 270000) + return intel_get_buf_trans(&rkl_combo_phy_ddi_translations_dp_hbr2_hbr3, n_entries); + else + return intel_get_buf_trans(&rkl_combo_phy_ddi_translations_dp_hbr, n_entries); } -static const struct cnl_ddi_buf_trans * -jsl_get_combo_buf_trans_edp(struct intel_encoder *encoder, +static const struct intel_ddi_buf_trans * +rkl_get_combo_buf_trans_edp(struct intel_encoder *encoder, const struct intel_crtc_state *crtc_state, int *n_entries) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + struct intel_dp *intel_dp = enc_to_intel_dp(encoder); - if (dev_priv->vbt.edp.low_vswing) { - if (crtc_state->port_clock > 270000) { - *n_entries = ARRAY_SIZE(jsl_combo_phy_ddi_translations_edp_hbr2); - return jsl_combo_phy_ddi_translations_edp_hbr2; - } else { - *n_entries = ARRAY_SIZE(jsl_combo_phy_ddi_translations_edp_hbr); - return jsl_combo_phy_ddi_translations_edp_hbr; - } + if (crtc_state->port_clock > 540000) { + return intel_get_buf_trans(&icl_combo_phy_ddi_translations_dp_hbr2_edp_hbr3, + n_entries); + } else if (dev_priv->vbt.edp.hobl && !intel_dp->hobl_failed) { + return intel_get_buf_trans(&tgl_combo_phy_ddi_translations_edp_hbr2_hobl, + n_entries); + } else if (dev_priv->vbt.edp.low_vswing) { + return intel_get_buf_trans(&icl_combo_phy_ddi_translations_edp_hbr2, + n_entries); } - return jsl_get_combo_buf_trans_dp(encoder, crtc_state, n_entries); + return rkl_get_combo_buf_trans_dp(encoder, crtc_state, n_entries); } -const struct cnl_ddi_buf_trans * -jsl_get_combo_buf_trans(struct intel_encoder *encoder, +static const struct intel_ddi_buf_trans * +rkl_get_combo_buf_trans(struct intel_encoder *encoder, const struct intel_crtc_state *crtc_state, int *n_entries) { if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_HDMI)) - return jsl_get_combo_buf_trans_hdmi(encoder, crtc_state, n_entries); + return intel_get_buf_trans(&icl_combo_phy_ddi_translations_hdmi, n_entries); else if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_EDP)) - return jsl_get_combo_buf_trans_edp(encoder, crtc_state, n_entries); + return rkl_get_combo_buf_trans_edp(encoder, crtc_state, n_entries); else - return jsl_get_combo_buf_trans_dp(encoder, crtc_state, n_entries); + return rkl_get_combo_buf_trans_dp(encoder, crtc_state, n_entries); } -static const struct cnl_ddi_buf_trans * -tgl_get_combo_buf_trans_hdmi(struct intel_encoder *encoder, +static const struct intel_ddi_buf_trans * +adls_get_combo_buf_trans_dp(struct intel_encoder *encoder, + const struct intel_crtc_state *crtc_state, + int *n_entries) +{ + if (crtc_state->port_clock > 270000) + return intel_get_buf_trans(&adls_combo_phy_ddi_translations_dp_hbr2_hbr3, n_entries); + else + return intel_get_buf_trans(&tgl_combo_phy_ddi_translations_dp_hbr, n_entries); +} + +static const struct intel_ddi_buf_trans * +adls_get_combo_buf_trans_edp(struct intel_encoder *encoder, const struct intel_crtc_state *crtc_state, int *n_entries) { - *n_entries = ARRAY_SIZE(icl_combo_phy_ddi_translations_hdmi); - return icl_combo_phy_ddi_translations_hdmi; + struct drm_i915_private *i915 = to_i915(encoder->base.dev); + struct intel_dp *intel_dp = enc_to_intel_dp(encoder); + + if (crtc_state->port_clock > 540000) + return intel_get_buf_trans(&adls_combo_phy_ddi_translations_edp_hbr3, n_entries); + else if (i915->vbt.edp.hobl && !intel_dp->hobl_failed) + return intel_get_buf_trans(&tgl_combo_phy_ddi_translations_edp_hbr2_hobl, n_entries); + else if (i915->vbt.edp.low_vswing) + return intel_get_buf_trans(&adls_combo_phy_ddi_translations_edp_hbr2, n_entries); + else + return adls_get_combo_buf_trans_dp(encoder, crtc_state, n_entries); } -static const struct cnl_ddi_buf_trans * -tgl_get_combo_buf_trans_dp(struct intel_encoder *encoder, - const struct intel_crtc_state *crtc_state, - int *n_entries) +static const struct intel_ddi_buf_trans * +adls_get_combo_buf_trans(struct intel_encoder *encoder, + const struct intel_crtc_state *crtc_state, + int *n_entries) { - struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - - if (crtc_state->port_clock > 270000) { - if (IS_ROCKETLAKE(dev_priv)) { - *n_entries = ARRAY_SIZE(rkl_combo_phy_ddi_translations_dp_hbr2_hbr3); - return rkl_combo_phy_ddi_translations_dp_hbr2_hbr3; - } else if (IS_TGL_U(dev_priv) || IS_TGL_Y(dev_priv)) { - *n_entries = ARRAY_SIZE(tgl_uy_combo_phy_ddi_translations_dp_hbr2); - return tgl_uy_combo_phy_ddi_translations_dp_hbr2; - } else { - *n_entries = ARRAY_SIZE(tgl_combo_phy_ddi_translations_dp_hbr2); - return tgl_combo_phy_ddi_translations_dp_hbr2; - } - } else { - if (IS_ROCKETLAKE(dev_priv)) { - *n_entries = ARRAY_SIZE(rkl_combo_phy_ddi_translations_dp_hbr); - return rkl_combo_phy_ddi_translations_dp_hbr; - } else { - *n_entries = ARRAY_SIZE(tgl_combo_phy_ddi_translations_dp_hbr); - return tgl_combo_phy_ddi_translations_dp_hbr; - } - } + if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_HDMI)) + return intel_get_buf_trans(&icl_combo_phy_ddi_translations_hdmi, n_entries); + else if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_EDP)) + return adls_get_combo_buf_trans_edp(encoder, crtc_state, n_entries); + else + return adls_get_combo_buf_trans_dp(encoder, crtc_state, n_entries); } -static const struct cnl_ddi_buf_trans * -tgl_get_combo_buf_trans_edp(struct intel_encoder *encoder, +static const struct intel_ddi_buf_trans * +adlp_get_combo_buf_trans_dp(struct intel_encoder *encoder, const struct intel_crtc_state *crtc_state, int *n_entries) { + if (crtc_state->port_clock > 270000) + return intel_get_buf_trans(&adlp_combo_phy_ddi_translations_dp_hbr2_hbr3, n_entries); + else + return intel_get_buf_trans(&adlp_combo_phy_ddi_translations_dp_hbr, n_entries); +} + +static const struct intel_ddi_buf_trans * +adlp_get_combo_buf_trans_edp(struct intel_encoder *encoder, + const struct intel_crtc_state *crtc_state, + int *n_entries) +{ struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); struct intel_dp *intel_dp = enc_to_intel_dp(encoder); if (crtc_state->port_clock > 540000) { - *n_entries = ARRAY_SIZE(icl_combo_phy_ddi_translations_edp_hbr3); - return icl_combo_phy_ddi_translations_edp_hbr3; + return intel_get_buf_trans(&adlp_combo_phy_ddi_translations_edp_hbr3, + n_entries); } else if (dev_priv->vbt.edp.hobl && !intel_dp->hobl_failed) { - *n_entries = ARRAY_SIZE(tgl_combo_phy_ddi_translations_edp_hbr2_hobl); - return tgl_combo_phy_ddi_translations_edp_hbr2_hobl; + return intel_get_buf_trans(&tgl_combo_phy_ddi_translations_edp_hbr2_hobl, + n_entries); } else if (dev_priv->vbt.edp.low_vswing) { - *n_entries = ARRAY_SIZE(icl_combo_phy_ddi_translations_edp_hbr2); - return icl_combo_phy_ddi_translations_edp_hbr2; + return intel_get_buf_trans(&adlp_combo_phy_ddi_translations_edp_up_to_hbr2, + n_entries); } - return tgl_get_combo_buf_trans_dp(encoder, crtc_state, n_entries); + return adlp_get_combo_buf_trans_dp(encoder, crtc_state, n_entries); } -const struct cnl_ddi_buf_trans * -tgl_get_combo_buf_trans(struct intel_encoder *encoder, - const struct intel_crtc_state *crtc_state, - int *n_entries) +static const struct intel_ddi_buf_trans * +adlp_get_combo_buf_trans(struct intel_encoder *encoder, + const struct intel_crtc_state *crtc_state, + int *n_entries) { if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_HDMI)) - return tgl_get_combo_buf_trans_hdmi(encoder, crtc_state, n_entries); + return intel_get_buf_trans(&adlp_combo_phy_ddi_translations_hdmi, n_entries); else if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_EDP)) - return tgl_get_combo_buf_trans_edp(encoder, crtc_state, n_entries); + return adlp_get_combo_buf_trans_edp(encoder, crtc_state, n_entries); else - return tgl_get_combo_buf_trans_dp(encoder, crtc_state, n_entries); + return adlp_get_combo_buf_trans_dp(encoder, crtc_state, n_entries); } -static const struct tgl_dkl_phy_ddi_buf_trans * -tgl_get_dkl_buf_trans_hdmi(struct intel_encoder *encoder, - const struct intel_crtc_state *crtc_state, - int *n_entries) -{ - *n_entries = ARRAY_SIZE(tgl_dkl_phy_hdmi_ddi_trans); - return tgl_dkl_phy_hdmi_ddi_trans; -} - -static const struct tgl_dkl_phy_ddi_buf_trans * +static const struct intel_ddi_buf_trans * tgl_get_dkl_buf_trans_dp(struct intel_encoder *encoder, const struct intel_crtc_state *crtc_state, int *n_entries) { if (crtc_state->port_clock > 270000) { - *n_entries = ARRAY_SIZE(tgl_dkl_phy_dp_ddi_trans_hbr2); - return tgl_dkl_phy_dp_ddi_trans_hbr2; + return intel_get_buf_trans(&tgl_dkl_phy_ddi_translations_dp_hbr2, + n_entries); } else { - *n_entries = ARRAY_SIZE(tgl_dkl_phy_dp_ddi_trans); - return tgl_dkl_phy_dp_ddi_trans; + return intel_get_buf_trans(&tgl_dkl_phy_ddi_translations_dp_hbr, + n_entries); } } -const struct tgl_dkl_phy_ddi_buf_trans * +static const struct intel_ddi_buf_trans * tgl_get_dkl_buf_trans(struct intel_encoder *encoder, const struct intel_crtc_state *crtc_state, int *n_entries) { if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_HDMI)) - return tgl_get_dkl_buf_trans_hdmi(encoder, crtc_state, n_entries); + return intel_get_buf_trans(&tgl_dkl_phy_ddi_translations_hdmi, n_entries); else return tgl_get_dkl_buf_trans_dp(encoder, crtc_state, n_entries); } -static const struct tgl_dkl_phy_ddi_buf_trans * +static const struct intel_ddi_buf_trans * adlp_get_dkl_buf_trans_dp(struct intel_encoder *encoder, const struct intel_crtc_state *crtc_state, int *n_entries) { if (crtc_state->port_clock > 270000) { - *n_entries = ARRAY_SIZE(adlp_dkl_phy_dp_ddi_trans_hbr2_hbr3); - return adlp_dkl_phy_dp_ddi_trans_hbr2_hbr3; + return intel_get_buf_trans(&adlp_dkl_phy_ddi_translations_dp_hbr2_hbr3, + n_entries); + } else { + return intel_get_buf_trans(&adlp_dkl_phy_ddi_translations_dp_hbr, + n_entries); } - - *n_entries = ARRAY_SIZE(adlp_dkl_phy_dp_ddi_trans_hbr); - return adlp_dkl_phy_dp_ddi_trans_hbr; } -const struct tgl_dkl_phy_ddi_buf_trans * +static const struct intel_ddi_buf_trans * adlp_get_dkl_buf_trans(struct intel_encoder *encoder, const struct intel_crtc_state *crtc_state, int *n_entries) { if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_HDMI)) - return tgl_get_dkl_buf_trans_hdmi(encoder, crtc_state, n_entries); + return intel_get_buf_trans(&tgl_dkl_phy_ddi_translations_hdmi, n_entries); else return adlp_get_dkl_buf_trans_dp(encoder, crtc_state, n_entries); } @@ -1406,43 +1568,68 @@ int intel_ddi_hdmi_num_entries(struct intel_encoder *encoder, int *default_entry) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - enum phy phy = intel_port_to_phy(dev_priv, encoder->port); + const struct intel_ddi_buf_trans *ddi_translations; int n_entries; - if (DISPLAY_VER(dev_priv) >= 12) { - if (intel_phy_is_combo(dev_priv, phy)) - tgl_get_combo_buf_trans_hdmi(encoder, crtc_state, &n_entries); - else - tgl_get_dkl_buf_trans_hdmi(encoder, crtc_state, &n_entries); - *default_entry = n_entries - 1; - } else if (DISPLAY_VER(dev_priv) == 11) { - if (intel_phy_is_combo(dev_priv, phy)) - icl_get_combo_buf_trans_hdmi(encoder, crtc_state, &n_entries); - else - icl_get_mg_buf_trans_hdmi(encoder, crtc_state, &n_entries); - *default_entry = n_entries - 1; - } else if (IS_CANNONLAKE(dev_priv)) { - cnl_get_buf_trans_hdmi(encoder, &n_entries); - *default_entry = n_entries - 1; - } else if (IS_GEMINILAKE(dev_priv) || IS_BROXTON(dev_priv)) { - bxt_get_buf_trans_hdmi(encoder, &n_entries); - *default_entry = n_entries - 1; - } else if (DISPLAY_VER(dev_priv) == 9) { - intel_ddi_get_buf_trans_hdmi(encoder, &n_entries); - *default_entry = 8; - } else if (IS_BROADWELL(dev_priv)) { - intel_ddi_get_buf_trans_hdmi(encoder, &n_entries); - *default_entry = 7; - } else if (IS_HASWELL(dev_priv)) { - intel_ddi_get_buf_trans_hdmi(encoder, &n_entries); - *default_entry = 6; - } else { - drm_WARN(&dev_priv->drm, 1, "ddi translation table missing\n"); + ddi_translations = encoder->get_buf_trans(encoder, crtc_state, &n_entries); + + if (drm_WARN_ON(&dev_priv->drm, !ddi_translations)) { + *default_entry = 0; return 0; } - if (drm_WARN_ON_ONCE(&dev_priv->drm, n_entries == 0)) - return 0; + *default_entry = ddi_translations->hdmi_default_entry; return n_entries; } + +void intel_ddi_buf_trans_init(struct intel_encoder *encoder) +{ + struct drm_i915_private *i915 = to_i915(encoder->base.dev); + enum phy phy = intel_port_to_phy(i915, encoder->port); + + if (IS_ALDERLAKE_P(i915)) { + if (intel_phy_is_combo(i915, phy)) + encoder->get_buf_trans = adlp_get_combo_buf_trans; + else + encoder->get_buf_trans = adlp_get_dkl_buf_trans; + } else if (IS_ALDERLAKE_S(i915)) { + encoder->get_buf_trans = adls_get_combo_buf_trans; + } else if (IS_ROCKETLAKE(i915)) { + encoder->get_buf_trans = rkl_get_combo_buf_trans; + } else if (IS_DG1(i915)) { + encoder->get_buf_trans = dg1_get_combo_buf_trans; + } else if (DISPLAY_VER(i915) >= 12) { + if (intel_phy_is_combo(i915, phy)) + encoder->get_buf_trans = tgl_get_combo_buf_trans; + else + encoder->get_buf_trans = tgl_get_dkl_buf_trans; + } else if (DISPLAY_VER(i915) == 11) { + if (IS_PLATFORM(i915, INTEL_JASPERLAKE)) + encoder->get_buf_trans = jsl_get_combo_buf_trans; + else if (IS_PLATFORM(i915, INTEL_ELKHARTLAKE)) + encoder->get_buf_trans = ehl_get_combo_buf_trans; + else if (intel_phy_is_combo(i915, phy)) + encoder->get_buf_trans = icl_get_combo_buf_trans; + else + encoder->get_buf_trans = icl_get_mg_buf_trans; + } else if (IS_GEMINILAKE(i915) || IS_BROXTON(i915)) { + encoder->get_buf_trans = bxt_get_buf_trans; + } else if (IS_CML_ULX(i915) || IS_CFL_ULX(i915) || IS_KBL_ULX(i915)) { + encoder->get_buf_trans = kbl_y_get_buf_trans; + } else if (IS_CML_ULT(i915) || IS_CFL_ULT(i915) || IS_KBL_ULT(i915)) { + encoder->get_buf_trans = kbl_u_get_buf_trans; + } else if (IS_COMETLAKE(i915) || IS_COFFEELAKE(i915) || IS_KABYLAKE(i915)) { + encoder->get_buf_trans = kbl_get_buf_trans; + } else if (IS_SKL_ULX(i915)) { + encoder->get_buf_trans = skl_y_get_buf_trans; + } else if (IS_SKL_ULT(i915)) { + encoder->get_buf_trans = skl_u_get_buf_trans; + } else if (IS_SKYLAKE(i915)) { + encoder->get_buf_trans = skl_get_buf_trans; + } else if (IS_BROADWELL(i915)) { + encoder->get_buf_trans = bdw_get_buf_trans; + } else { + encoder->get_buf_trans = hsw_get_buf_trans; + } +} diff --git a/drivers/gpu/drm/i915/display/intel_ddi_buf_trans.h b/drivers/gpu/drm/i915/display/intel_ddi_buf_trans.h index 4c2efab38642..2acd720f9d4f 100644 --- a/drivers/gpu/drm/i915/display/intel_ddi_buf_trans.h +++ b/drivers/gpu/drm/i915/display/intel_ddi_buf_trans.h @@ -12,7 +12,7 @@ struct drm_i915_private; struct intel_encoder; struct intel_crtc_state; -struct ddi_buf_trans { +struct hsw_ddi_buf_trans { u32 trans1; /* balance leg enable, de-emph level */ u32 trans2; /* vref sel, vswing */ u8 i_boost; /* SKL: I_boost; valid: 0x0, 0x1, 0x3, 0x7 */ @@ -25,7 +25,7 @@ struct bxt_ddi_buf_trans { u8 deemphasis; }; -struct cnl_ddi_buf_trans { +struct icl_ddi_buf_trans { u8 dw2_swing_sel; u8 dw7_n_scalar; u8 dw4_cursor_coeff; @@ -45,60 +45,26 @@ struct tgl_dkl_phy_ddi_buf_trans { u32 dkl_de_emphasis_control; }; -bool is_hobl_buf_trans(const struct cnl_ddi_buf_trans *table); +union intel_ddi_buf_trans_entry { + struct hsw_ddi_buf_trans hsw; + struct bxt_ddi_buf_trans bxt; + struct icl_ddi_buf_trans icl; + struct icl_mg_phy_ddi_buf_trans mg; + struct tgl_dkl_phy_ddi_buf_trans dkl; +}; + +struct intel_ddi_buf_trans { + const union intel_ddi_buf_trans_entry *entries; + u8 num_entries; + u8 hdmi_default_entry; +}; + +bool is_hobl_buf_trans(const struct intel_ddi_buf_trans *table); int intel_ddi_hdmi_num_entries(struct intel_encoder *encoder, const struct intel_crtc_state *crtc_state, int *default_entry); -const struct ddi_buf_trans * -intel_ddi_get_buf_trans_edp(struct intel_encoder *encoder, int *n_entries); -const struct ddi_buf_trans * -intel_ddi_get_buf_trans_fdi(struct drm_i915_private *dev_priv, - int *n_entries); -const struct ddi_buf_trans * -intel_ddi_get_buf_trans_hdmi(struct intel_encoder *encoder, - int *n_entries); -const struct ddi_buf_trans * -intel_ddi_get_buf_trans_dp(struct intel_encoder *encoder, int *n_entries); - -const struct bxt_ddi_buf_trans * -bxt_get_buf_trans(struct intel_encoder *encoder, - const struct intel_crtc_state *crtc_state, - int *n_entries); - -const struct tgl_dkl_phy_ddi_buf_trans * -adlp_get_dkl_buf_trans(struct intel_encoder *encoder, - const struct intel_crtc_state *crtc_state, - int *n_entries); -const struct cnl_ddi_buf_trans * -tgl_get_combo_buf_trans(struct intel_encoder *encoder, - const struct intel_crtc_state *crtc_state, - int *n_entries); -const struct tgl_dkl_phy_ddi_buf_trans * -tgl_get_dkl_buf_trans(struct intel_encoder *encoder, - const struct intel_crtc_state *crtc_state, - int *n_entries); -const struct cnl_ddi_buf_trans * -jsl_get_combo_buf_trans(struct intel_encoder *encoder, - const struct intel_crtc_state *crtc_state, - int *n_entries); -const struct cnl_ddi_buf_trans * -ehl_get_combo_buf_trans(struct intel_encoder *encoder, - const struct intel_crtc_state *crtc_state, - int *n_entries); -const struct cnl_ddi_buf_trans * -icl_get_combo_buf_trans(struct intel_encoder *encoder, - const struct intel_crtc_state *crtc_state, - int *n_entries); -const struct icl_mg_phy_ddi_buf_trans * -icl_get_mg_buf_trans(struct intel_encoder *encoder, - const struct intel_crtc_state *crtc_state, - int *n_entries); - -const struct cnl_ddi_buf_trans * -cnl_get_buf_trans(struct intel_encoder *encoder, - const struct intel_crtc_state *crtc_state, - int *n_entries); +void intel_ddi_buf_trans_init(struct intel_encoder *encoder); #endif diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index 0a8a2395c8ac..134a6acbd8fb 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -59,6 +59,7 @@ #include "display/intel_hdmi.h" #include "display/intel_lvds.h" #include "display/intel_sdvo.h" +#include "display/intel_snps_phy.h" #include "display/intel_tv.h" #include "display/intel_vdsc.h" #include "display/intel_vrr.h" @@ -975,7 +976,7 @@ void intel_enable_pipe(const struct intel_crtc_state *new_crtc_state) /* FIXME: assert CPU port conditions for SNB+ */ } - /* Wa_22012358565:adlp */ + /* Wa_22012358565:adl-p */ if (DISPLAY_VER(dev_priv) == 13) intel_de_rmw(dev_priv, PIPE_ARB_CTL(pipe), 0, PIPE_ARB_USE_PROG_SLOTS); @@ -1035,6 +1036,10 @@ void intel_disable_pipe(const struct intel_crtc_state *old_crtc_state) if (!IS_I830(dev_priv)) val &= ~PIPECONF_ENABLE; + if (DISPLAY_VER(dev_priv) >= 12) + intel_de_rmw(dev_priv, CHICKEN_TRANS(cpu_transcoder), + FECSTALL_DIS_DPTSTREAM_DPTTG, 0); + intel_de_write(dev_priv, reg, val); if ((val & PIPECONF_ENABLE) == 0) intel_wait_for_pipe_off(old_crtc_state); @@ -1331,6 +1336,9 @@ retry: ret = i915_gem_object_lock(obj, &ww); if (!ret && phys_cursor) ret = i915_gem_object_attach_phys(obj, alignment); + else if (!ret && HAS_LMEM(dev_priv)) + ret = i915_gem_object_migrate(obj, &ww, INTEL_REGION_LMEM); + /* TODO: Do we need to sync when migration becomes async? */ if (!ret) ret = i915_gem_object_pin_pages(obj); if (ret) @@ -1914,20 +1922,50 @@ static void intel_dpt_unpin(struct i915_address_space *vm) i915_vma_put(dpt->vma); } +static bool +intel_reuse_initial_plane_obj(struct drm_i915_private *i915, + const struct intel_initial_plane_config *plane_config, + struct drm_framebuffer **fb, + struct i915_vma **vma) +{ + struct intel_crtc *crtc; + + for_each_intel_crtc(&i915->drm, crtc) { + struct intel_crtc_state *crtc_state = + to_intel_crtc_state(crtc->base.state); + struct intel_plane *plane = + to_intel_plane(crtc->base.primary); + struct intel_plane_state *plane_state = + to_intel_plane_state(plane->base.state); + + if (!crtc_state->uapi.active) + continue; + + if (!plane_state->ggtt_vma) + continue; + + if (intel_plane_ggtt_offset(plane_state) == plane_config->base) { + *fb = plane_state->hw.fb; + *vma = plane_state->ggtt_vma; + return true; + } + } + + return false; +} + static void -intel_find_initial_plane_obj(struct intel_crtc *intel_crtc, +intel_find_initial_plane_obj(struct intel_crtc *crtc, struct intel_initial_plane_config *plane_config) { - struct drm_device *dev = intel_crtc->base.dev; + struct drm_device *dev = crtc->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); - struct drm_crtc *c; - struct drm_plane *primary = intel_crtc->base.primary; - struct drm_plane_state *plane_state = primary->state; - struct intel_plane *intel_plane = to_intel_plane(primary); - struct intel_plane_state *intel_state = - to_intel_plane_state(plane_state); struct intel_crtc_state *crtc_state = - to_intel_crtc_state(intel_crtc->base.state); + to_intel_crtc_state(crtc->base.state); + struct intel_plane *plane = + to_intel_plane(crtc->base.primary); + struct intel_plane_state *plane_state = + to_intel_plane_state(plane->base.state); struct drm_framebuffer *fb; struct i915_vma *vma; @@ -1939,7 +1977,7 @@ intel_find_initial_plane_obj(struct intel_crtc *intel_crtc, if (!plane_config->fb) return; - if (intel_alloc_initial_plane_obj(intel_crtc, plane_config)) { + if (intel_alloc_initial_plane_obj(crtc, plane_config)) { fb = &plane_config->fb->base; vma = plane_config->vma; goto valid_fb; @@ -1949,25 +1987,8 @@ intel_find_initial_plane_obj(struct intel_crtc *intel_crtc, * Failed to alloc the obj, check to see if we should share * an fb with another CRTC instead */ - for_each_crtc(dev, c) { - struct intel_plane_state *state; - - if (c == &intel_crtc->base) - continue; - - if (!to_intel_crtc_state(c->state)->uapi.active) - continue; - - state = to_intel_plane_state(c->primary->state); - if (!state->ggtt_vma) - continue; - - if (intel_plane_ggtt_offset(state) == plane_config->base) { - fb = state->hw.fb; - vma = state->ggtt_vma; - goto valid_fb; - } - } + if (intel_reuse_initial_plane_obj(dev_priv, plane_config, &fb, &vma)) + goto valid_fb; /* * We've failed to reconstruct the BIOS FB. Current display state @@ -1976,7 +1997,7 @@ intel_find_initial_plane_obj(struct intel_crtc *intel_crtc, * simplest solution is to just disable the primary plane now and * pretend the BIOS never had it enabled. */ - intel_plane_disable_noatomic(intel_crtc, intel_plane); + intel_plane_disable_noatomic(crtc, plane); if (crtc_state->bigjoiner) { struct intel_crtc *slave = crtc_state->bigjoiner_linked_crtc; @@ -1986,40 +2007,38 @@ intel_find_initial_plane_obj(struct intel_crtc *intel_crtc, return; valid_fb: - plane_state->rotation = plane_config->rotation; - intel_fb_fill_view(to_intel_framebuffer(fb), plane_state->rotation, - &intel_state->view); + plane_state->uapi.rotation = plane_config->rotation; + intel_fb_fill_view(to_intel_framebuffer(fb), + plane_state->uapi.rotation, &plane_state->view); __i915_vma_pin(vma); - intel_state->ggtt_vma = i915_vma_get(vma); - if (intel_plane_uses_fence(intel_state) && i915_vma_pin_fence(vma) == 0) - if (vma->fence) - intel_state->flags |= PLANE_HAS_FENCE; + plane_state->ggtt_vma = i915_vma_get(vma); + if (intel_plane_uses_fence(plane_state) && + i915_vma_pin_fence(vma) == 0 && vma->fence) + plane_state->flags |= PLANE_HAS_FENCE; - plane_state->src_x = 0; - plane_state->src_y = 0; - plane_state->src_w = fb->width << 16; - plane_state->src_h = fb->height << 16; + plane_state->uapi.src_x = 0; + plane_state->uapi.src_y = 0; + plane_state->uapi.src_w = fb->width << 16; + plane_state->uapi.src_h = fb->height << 16; - plane_state->crtc_x = 0; - plane_state->crtc_y = 0; - plane_state->crtc_w = fb->width; - plane_state->crtc_h = fb->height; + plane_state->uapi.crtc_x = 0; + plane_state->uapi.crtc_y = 0; + plane_state->uapi.crtc_w = fb->width; + plane_state->uapi.crtc_h = fb->height; if (plane_config->tiling) dev_priv->preserve_bios_swizzle = true; - plane_state->fb = fb; + plane_state->uapi.fb = fb; drm_framebuffer_get(fb); - plane_state->crtc = &intel_crtc->base; - intel_plane_copy_uapi_to_hw_state(intel_state, intel_state, - intel_crtc); + plane_state->uapi.crtc = &crtc->base; + intel_plane_copy_uapi_to_hw_state(plane_state, plane_state, crtc); intel_frontbuffer_flush(to_intel_frontbuffer(fb), ORIGIN_DIRTYFB); - atomic_or(to_intel_plane(primary)->frontbuffer_bit, - &to_intel_frontbuffer(fb)->bits); + atomic_or(plane->frontbuffer_bit, &to_intel_frontbuffer(fb)->bits); } unsigned int @@ -2193,8 +2212,29 @@ unlock: clear_bit_unlock(I915_RESET_MODESET, &dev_priv->gt.reset.flags); } -static void icl_set_pipe_chicken(struct intel_crtc *crtc) +static bool underrun_recovery_supported(const struct intel_crtc_state *crtc_state) { + if (crtc_state->pch_pfit.enabled && + (crtc_state->pipe_src_w > drm_rect_width(&crtc_state->pch_pfit.dst) || + crtc_state->pipe_src_h > drm_rect_height(&crtc_state->pch_pfit.dst) || + crtc_state->output_format == INTEL_OUTPUT_FORMAT_YCBCR420)) + return false; + + if (crtc_state->dsc.compression_enable) + return false; + + if (crtc_state->has_psr2) + return false; + + if (crtc_state->splitter.enable) + return false; + + return true; +} + +static void icl_set_pipe_chicken(const struct intel_crtc_state *crtc_state) +{ + struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); enum pipe pipe = crtc->pipe; u32 tmp; @@ -2215,19 +2255,19 @@ static void icl_set_pipe_chicken(struct intel_crtc *crtc) */ tmp |= PIXEL_ROUNDING_TRUNC_FB_PASSTHRU; - /* - * "The underrun recovery mechanism should be disabled - * when the following is enabled for this pipe: - * WiDi - * Downscaling (this includes YUV420 fullblend) - * COG - * DSC - * PSR2" - * - * FIXME: enable whenever possible... - */ - if (IS_ALDERLAKE_P(dev_priv)) - tmp |= UNDERRUN_RECOVERY_DISABLE; + if (IS_DG2(dev_priv)) { + /* + * Underrun recovery must always be disabled on DG2. However + * the chicken bit meaning is inverted compared to other + * platforms. + */ + tmp &= ~UNDERRUN_RECOVERY_ENABLE_DG2; + } else if (DISPLAY_VER(dev_priv) >= 13) { + if (underrun_recovery_supported(crtc_state)) + tmp &= ~UNDERRUN_RECOVERY_DISABLE_ADLP; + else + tmp |= UNDERRUN_RECOVERY_DISABLE_ADLP; + } intel_de_write(dev_priv, PIPE_CHICKEN(pipe), tmp); } @@ -2706,10 +2746,10 @@ void hsw_disable_ips(const struct intel_crtc_state *crtc_state) intel_wait_for_vblank(dev_priv, crtc->pipe); } -static void intel_crtc_dpms_overlay_disable(struct intel_crtc *intel_crtc) +static void intel_crtc_dpms_overlay_disable(struct intel_crtc *crtc) { - if (intel_crtc->overlay) - (void) intel_overlay_switch_off(intel_crtc->overlay); + if (crtc->overlay) + (void) intel_overlay_switch_off(crtc->overlay); /* Let userspace switch the overlay on again. In most cases userspace * has to recompute where to put it anyway. @@ -3177,6 +3217,28 @@ static void intel_encoders_enable(struct intel_atomic_state *state, } } +static void intel_encoders_pre_disable(struct intel_atomic_state *state, + struct intel_crtc *crtc) +{ + const struct intel_crtc_state *old_crtc_state = + intel_atomic_get_old_crtc_state(state, crtc); + const struct drm_connector_state *old_conn_state; + struct drm_connector *conn; + int i; + + for_each_old_connector_in_state(&state->base, conn, old_conn_state, i) { + struct intel_encoder *encoder = + to_intel_encoder(old_conn_state->best_encoder); + + if (old_conn_state->crtc != &crtc->base) + continue; + + if (encoder->pre_disable) + encoder->pre_disable(state, encoder, old_crtc_state, + old_conn_state); + } +} + static void intel_encoders_disable(struct intel_atomic_state *state, struct intel_crtc *crtc) { @@ -3386,13 +3448,17 @@ static void glk_pipe_scaler_clock_gating_wa(struct drm_i915_private *dev_priv, intel_de_write(dev_priv, CLKGATE_DIS_PSL(pipe), val); } -static void icl_pipe_mbus_enable(struct intel_crtc *crtc) +static void icl_pipe_mbus_enable(struct intel_crtc *crtc, bool joined_mbus) { struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); enum pipe pipe = crtc->pipe; u32 val; - val = MBUS_DBOX_A_CREDIT(2); + /* Wa_22010947358:adl-p */ + if (IS_ALDERLAKE_P(dev_priv)) + val = joined_mbus ? MBUS_DBOX_A_CREDIT(6) : MBUS_DBOX_A_CREDIT(4); + else + val = MBUS_DBOX_A_CREDIT(2); if (DISPLAY_VER(dev_priv) >= 12) { val |= MBUS_DBOX_BW_CREDIT(2); @@ -3460,7 +3526,8 @@ static void icl_ddi_bigjoiner_pre_enable(struct intel_atomic_state *state, * Enable sequence steps 1-7 on bigjoiner master */ intel_encoders_pre_pll_enable(state, master); - intel_enable_shared_dpll(master_crtc_state); + if (master_crtc_state->shared_dpll) + intel_enable_shared_dpll(master_crtc_state); intel_encoders_pre_enable(state, master); /* and DSC on slave */ @@ -3518,7 +3585,7 @@ static void hsw_crtc_enable(struct intel_atomic_state *state, crtc->active = true; - /* Display WA #1180: WaDisableScalarClockGating: glk, cnl */ + /* Display WA #1180: WaDisableScalarClockGating: glk */ psl_clkgate_wa = DISPLAY_VER(dev_priv) == 10 && new_crtc_state->pch_pfit.enabled; if (psl_clkgate_wa) @@ -3542,13 +3609,17 @@ static void hsw_crtc_enable(struct intel_atomic_state *state, hsw_set_linetime_wm(new_crtc_state); if (DISPLAY_VER(dev_priv) >= 11) - icl_set_pipe_chicken(crtc); + icl_set_pipe_chicken(new_crtc_state); if (dev_priv->display.initial_watermarks) dev_priv->display.initial_watermarks(state, crtc); - if (DISPLAY_VER(dev_priv) >= 11) - icl_pipe_mbus_enable(crtc); + if (DISPLAY_VER(dev_priv) >= 11) { + const struct intel_dbuf_state *dbuf_state = + intel_atomic_get_new_dbuf_state(state); + + icl_pipe_mbus_enable(crtc, dbuf_state->joined_mbus); + } if (new_crtc_state->bigjoiner_slave) intel_crtc_vblank_on(new_crtc_state); @@ -3682,6 +3753,13 @@ bool intel_phy_is_combo(struct drm_i915_private *dev_priv, enum phy phy) { if (phy == PHY_NONE) return false; + else if (IS_DG2(dev_priv)) + /* + * DG2 outputs labelled as "combo PHY" in the bspec use + * SNPS PHYs with completely different programming, + * hence we always return false here. + */ + return false; else if (IS_ALDERLAKE_S(dev_priv)) return phy <= PHY_E; else if (IS_DG1(dev_priv) || IS_ROCKETLAKE(dev_priv)) @@ -3696,7 +3774,10 @@ bool intel_phy_is_combo(struct drm_i915_private *dev_priv, enum phy phy) bool intel_phy_is_tc(struct drm_i915_private *dev_priv, enum phy phy) { - if (IS_ALDERLAKE_P(dev_priv)) + if (IS_DG2(dev_priv)) + /* DG2's "TC1" output uses a SNPS PHY */ + return false; + else if (IS_ALDERLAKE_P(dev_priv)) return phy >= PHY_F && phy <= PHY_I; else if (IS_TIGERLAKE(dev_priv)) return phy >= PHY_D && phy <= PHY_I; @@ -3706,6 +3787,20 @@ bool intel_phy_is_tc(struct drm_i915_private *dev_priv, enum phy phy) return false; } +bool intel_phy_is_snps(struct drm_i915_private *dev_priv, enum phy phy) +{ + if (phy == PHY_NONE) + return false; + else if (IS_DG2(dev_priv)) + /* + * All four "combo" ports and the TC1 port (PHY E) use + * Synopsis PHYs. + */ + return phy <= PHY_E; + + return false; +} + enum phy intel_port_to_phy(struct drm_i915_private *i915, enum port port) { if (DISPLAY_VER(i915) >= 13 && port >= PORT_D_XELPD) @@ -3850,7 +3945,7 @@ static u64 get_crtc_power_domains(struct intel_crtc_state *crtc_state) } if (HAS_DDI(dev_priv) && crtc_state->has_audio) - mask |= BIT_ULL(POWER_DOMAIN_AUDIO); + mask |= BIT_ULL(POWER_DOMAIN_AUDIO_MMIO); if (crtc_state->shared_dpll) mask |= BIT_ULL(POWER_DOMAIN_DISPLAY_CORE); @@ -6487,23 +6582,21 @@ int intel_get_load_detect_pipe(struct drm_connector *connector, struct intel_load_detect_pipe *old, struct drm_modeset_acquire_ctx *ctx) { - struct intel_crtc *intel_crtc; - struct intel_encoder *intel_encoder = + struct intel_encoder *encoder = intel_attached_encoder(to_intel_connector(connector)); - struct drm_crtc *possible_crtc; - struct drm_encoder *encoder = &intel_encoder->base; - struct drm_crtc *crtc = NULL; - struct drm_device *dev = encoder->dev; + struct intel_crtc *possible_crtc; + struct intel_crtc *crtc = NULL; + struct drm_device *dev = encoder->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); struct drm_mode_config *config = &dev->mode_config; struct drm_atomic_state *state = NULL, *restore_state = NULL; struct drm_connector_state *connector_state; struct intel_crtc_state *crtc_state; - int ret, i = -1; + int ret; drm_dbg_kms(&dev_priv->drm, "[CONNECTOR:%d:%s], [ENCODER:%d:%s]\n", connector->base.id, connector->name, - encoder->base.id, encoder->name); + encoder->base.base.id, encoder->base.name); old->restore_state = NULL; @@ -6521,9 +6614,9 @@ int intel_get_load_detect_pipe(struct drm_connector *connector, /* See if we already have a CRTC for this connector */ if (connector->state->crtc) { - crtc = connector->state->crtc; + crtc = to_intel_crtc(connector->state->crtc); - ret = drm_modeset_lock(&crtc->mutex, ctx); + ret = drm_modeset_lock(&crtc->base.mutex, ctx); if (ret) goto fail; @@ -6532,17 +6625,17 @@ int intel_get_load_detect_pipe(struct drm_connector *connector, } /* Find an unused one (if possible) */ - for_each_crtc(dev, possible_crtc) { - i++; - if (!(encoder->possible_crtcs & (1 << i))) + for_each_intel_crtc(dev, possible_crtc) { + if (!(encoder->base.possible_crtcs & + drm_crtc_mask(&possible_crtc->base))) continue; - ret = drm_modeset_lock(&possible_crtc->mutex, ctx); + ret = drm_modeset_lock(&possible_crtc->base.mutex, ctx); if (ret) goto fail; - if (possible_crtc->state->enable) { - drm_modeset_unlock(&possible_crtc->mutex); + if (possible_crtc->base.state->enable) { + drm_modeset_unlock(&possible_crtc->base.mutex); continue; } @@ -6561,8 +6654,6 @@ int intel_get_load_detect_pipe(struct drm_connector *connector, } found: - intel_crtc = to_intel_crtc(crtc); - state = drm_atomic_state_alloc(dev); restore_state = drm_atomic_state_alloc(dev); if (!state || !restore_state) { @@ -6579,11 +6670,11 @@ found: goto fail; } - ret = drm_atomic_set_crtc_for_connector(connector_state, crtc); + ret = drm_atomic_set_crtc_for_connector(connector_state, &crtc->base); if (ret) goto fail; - crtc_state = intel_atomic_get_crtc_state(state, intel_crtc); + crtc_state = intel_atomic_get_crtc_state(state, crtc); if (IS_ERR(crtc_state)) { ret = PTR_ERR(crtc_state); goto fail; @@ -6596,15 +6687,15 @@ found: if (ret) goto fail; - ret = intel_modeset_disable_planes(state, crtc); + ret = intel_modeset_disable_planes(state, &crtc->base); if (ret) goto fail; ret = PTR_ERR_OR_ZERO(drm_atomic_get_connector_state(restore_state, connector)); if (!ret) - ret = PTR_ERR_OR_ZERO(drm_atomic_get_crtc_state(restore_state, crtc)); + ret = PTR_ERR_OR_ZERO(drm_atomic_get_crtc_state(restore_state, &crtc->base)); if (!ret) - ret = drm_atomic_add_affected_planes(restore_state, crtc); + ret = drm_atomic_add_affected_planes(restore_state, &crtc->base); if (ret) { drm_dbg_kms(&dev_priv->drm, "Failed to create a copy of old state to restore: %i\n", @@ -6623,7 +6714,7 @@ found: drm_atomic_state_put(state); /* let the connector get through one full cycle before testing */ - intel_wait_for_vblank(dev_priv, intel_crtc->pipe); + intel_wait_for_vblank(dev_priv, crtc->pipe); return true; fail: @@ -7295,12 +7386,13 @@ static int intel_crtc_atomic_check(struct intel_atomic_state *state, } if (dev_priv->display.compute_pipe_wm) { - ret = dev_priv->display.compute_pipe_wm(crtc_state); + ret = dev_priv->display.compute_pipe_wm(state, crtc); if (ret) { drm_dbg_kms(&dev_priv->drm, "Target pipe watermarks are invalid\n"); return ret; } + } if (dev_priv->display.compute_intermediate_wm) { @@ -7313,7 +7405,7 @@ static int intel_crtc_atomic_check(struct intel_atomic_state *state, * old state and the new state. We can program these * immediately. */ - ret = dev_priv->display.compute_intermediate_wm(crtc_state); + ret = dev_priv->display.compute_intermediate_wm(state, crtc); if (ret) { drm_dbg_kms(&dev_priv->drm, "No valid intermediate pipe watermarks are possible\n"); @@ -8636,10 +8728,11 @@ intel_pipe_config_compare(const struct intel_crtc_state *current_config, PIPE_CONF_CHECK_BOOL(double_wide); - PIPE_CONF_CHECK_P(shared_dpll); + if (dev_priv->dpll.mgr) + PIPE_CONF_CHECK_P(shared_dpll); /* FIXME do the readout properly and get rid of this quirk */ - if (!PIPE_CONF_QUIRK(PIPE_CONFIG_QUIRK_BIGJOINER_SLAVE)) { + if (dev_priv->dpll.mgr && !PIPE_CONF_QUIRK(PIPE_CONFIG_QUIRK_BIGJOINER_SLAVE)) { PIPE_CONF_CHECK_X(dpll_hw_state.dpll); PIPE_CONF_CHECK_X(dpll_hw_state.dpll_md); PIPE_CONF_CHECK_X(dpll_hw_state.fp0); @@ -8671,7 +8764,9 @@ intel_pipe_config_compare(const struct intel_crtc_state *current_config, PIPE_CONF_CHECK_X(dpll_hw_state.mg_pll_ssc); PIPE_CONF_CHECK_X(dpll_hw_state.mg_pll_bias); PIPE_CONF_CHECK_X(dpll_hw_state.mg_pll_tdc_coldst_bias); + } + if (!PIPE_CONF_QUIRK(PIPE_CONFIG_QUIRK_BIGJOINER_SLAVE)) { PIPE_CONF_CHECK_X(dsi_pll.ctrl); PIPE_CONF_CHECK_X(dsi_pll.div); @@ -9009,6 +9104,10 @@ verify_crtc_state(struct intel_crtc *crtc, if (!new_crtc_state->hw.active) return; + if (new_crtc_state->bigjoiner_slave) + /* No PLLs set for slave */ + pipe_config->shared_dpll = NULL; + intel_pipe_config_sanity_check(dev_priv, pipe_config); if (!intel_pipe_config_compare(new_crtc_state, @@ -9112,6 +9211,55 @@ verify_shared_dpll_state(struct intel_crtc *crtc, } static void +verify_mpllb_state(struct intel_atomic_state *state, + struct intel_crtc_state *new_crtc_state) +{ + struct drm_i915_private *i915 = to_i915(state->base.dev); + struct intel_mpllb_state mpllb_hw_state = { 0 }; + struct intel_mpllb_state *mpllb_sw_state = &new_crtc_state->mpllb_state; + struct intel_crtc *crtc = to_intel_crtc(new_crtc_state->uapi.crtc); + struct intel_encoder *encoder; + + if (!IS_DG2(i915)) + return; + + if (!new_crtc_state->hw.active) + return; + + if (new_crtc_state->bigjoiner_slave) + return; + + encoder = intel_get_crtc_new_encoder(state, new_crtc_state); + intel_mpllb_readout_hw_state(encoder, &mpllb_hw_state); + +#define MPLLB_CHECK(name) do { \ + if (mpllb_sw_state->name != mpllb_hw_state.name) { \ + pipe_config_mismatch(false, crtc, "MPLLB:" __stringify(name), \ + "(expected 0x%08x, found 0x%08x)", \ + mpllb_sw_state->name, \ + mpllb_hw_state.name); \ + } \ +} while (0) + + MPLLB_CHECK(mpllb_cp); + MPLLB_CHECK(mpllb_div); + MPLLB_CHECK(mpllb_div2); + MPLLB_CHECK(mpllb_fracn1); + MPLLB_CHECK(mpllb_fracn2); + MPLLB_CHECK(mpllb_sscen); + MPLLB_CHECK(mpllb_sscstep); + + /* + * ref_control is handled by the hardware/firemware and never + * programmed by the software, but the proper values are supplied + * in the bspec for verification purposes. + */ + MPLLB_CHECK(ref_control); + +#undef MPLLB_CHECK +} + +static void intel_modeset_verify_crtc(struct intel_crtc *crtc, struct intel_atomic_state *state, struct intel_crtc_state *old_crtc_state, @@ -9124,6 +9272,7 @@ intel_modeset_verify_crtc(struct intel_crtc *crtc, verify_connector_state(state, crtc); verify_crtc_state(crtc, old_crtc_state, new_crtc_state); verify_shared_dpll_state(crtc, old_crtc_state, new_crtc_state); + verify_mpllb_state(state, new_crtc_state); } static void @@ -9749,7 +9898,7 @@ static int intel_atomic_check_async(struct intel_atomic_state *state) /* * FIXME: This check is kept generic for all platforms. - * Need to verify this for all gen9 and gen10 platforms to enable + * Need to verify this for all gen9 platforms to enable * this selectively if required. */ switch (new_plane_state->hw.fb->modifier) { @@ -10160,7 +10309,7 @@ static void intel_pipe_fastset(const struct intel_crtc_state *old_crtc_state, hsw_set_linetime_wm(new_crtc_state); if (DISPLAY_VER(dev_priv) >= 11) - icl_set_pipe_chicken(crtc); + icl_set_pipe_chicken(new_crtc_state); } static void commit_pipe_pre_planes(struct intel_atomic_state *state, @@ -10294,6 +10443,8 @@ static void intel_old_crtc_state_disables(struct intel_atomic_state *state, drm_WARN_ON(&dev_priv->drm, old_crtc_state->bigjoiner_slave); + intel_encoders_pre_disable(state, crtc); + intel_crtc_disable_planes(state, crtc); /* @@ -11328,7 +11479,12 @@ static void intel_setup_outputs(struct drm_i915_private *dev_priv) if (!HAS_DISPLAY(dev_priv)) return; - if (IS_ALDERLAKE_P(dev_priv)) { + if (IS_DG2(dev_priv)) { + intel_ddi_init(dev_priv, PORT_A); + intel_ddi_init(dev_priv, PORT_B); + intel_ddi_init(dev_priv, PORT_C); + intel_ddi_init(dev_priv, PORT_D_XELPD); + } else if (IS_ALDERLAKE_P(dev_priv)) { intel_ddi_init(dev_priv, PORT_A); intel_ddi_init(dev_priv, PORT_B); intel_ddi_init(dev_priv, PORT_TC1); @@ -11375,13 +11531,6 @@ static void intel_setup_outputs(struct drm_i915_private *dev_priv) intel_ddi_init(dev_priv, PORT_B); intel_ddi_init(dev_priv, PORT_C); vlv_dsi_init(dev_priv); - } else if (DISPLAY_VER(dev_priv) == 10) { - intel_ddi_init(dev_priv, PORT_A); - intel_ddi_init(dev_priv, PORT_B); - intel_ddi_init(dev_priv, PORT_C); - intel_ddi_init(dev_priv, PORT_D); - intel_ddi_init(dev_priv, PORT_E); - intel_ddi_init(dev_priv, PORT_F); } else if (DISPLAY_VER(dev_priv) >= 9) { intel_ddi_init(dev_priv, PORT_A); intel_ddi_init(dev_priv, PORT_B); @@ -11790,7 +11939,7 @@ intel_user_framebuffer_create(struct drm_device *dev, /* object is backed with LMEM for discrete */ i915 = to_i915(obj->base.dev); - if (HAS_LMEM(i915) && !i915_gem_object_is_lmem(obj)) { + if (HAS_LMEM(i915) && !i915_gem_object_can_migrate(obj, INTEL_REGION_LMEM)) { /* object is "remote", not in local memory */ i915_gem_object_put(obj); return ERR_PTR(-EREMOTE); @@ -13136,7 +13285,7 @@ get_encoder_power_domains(struct drm_i915_private *dev_priv) static void intel_early_display_was(struct drm_i915_private *dev_priv) { /* - * Display WA #1185 WaDisableDARBFClkGating:cnl,glk,icl,ehl,tgl + * Display WA #1185 WaDisableDARBFClkGating:glk,icl,ehl,tgl * Also known as Wa_14010480278. */ if (IS_DISPLAY_VER(dev_priv, 10, 12)) diff --git a/drivers/gpu/drm/i915/display/intel_display.h b/drivers/gpu/drm/i915/display/intel_display.h index c9dbaf074d77..284936f0ddab 100644 --- a/drivers/gpu/drm/i915/display/intel_display.h +++ b/drivers/gpu/drm/i915/display/intel_display.h @@ -561,6 +561,7 @@ struct drm_display_mode * intel_encoder_current_mode(struct intel_encoder *encoder); bool intel_phy_is_combo(struct drm_i915_private *dev_priv, enum phy phy); bool intel_phy_is_tc(struct drm_i915_private *dev_priv, enum phy phy); +bool intel_phy_is_snps(struct drm_i915_private *dev_priv, enum phy phy); enum tc_port intel_port_to_tc(struct drm_i915_private *dev_priv, enum port port); int intel_get_pipe_from_crtc_id_ioctl(struct drm_device *dev, void *data, diff --git a/drivers/gpu/drm/i915/display/intel_display_debugfs.c b/drivers/gpu/drm/i915/display/intel_display_debugfs.c index 88bb05d5c483..8fdacb252bb1 100644 --- a/drivers/gpu/drm/i915/display/intel_display_debugfs.c +++ b/drivers/gpu/drm/i915/display/intel_display_debugfs.c @@ -544,6 +544,11 @@ static int i915_dmc_info(struct seq_file *m, void *unused) seq_printf(m, "fw loaded: %s\n", yesno(intel_dmc_has_payload(dev_priv))); seq_printf(m, "path: %s\n", dmc->fw_path); + seq_printf(m, "Pipe A fw support: %s\n", + yesno(GRAPHICS_VER(dev_priv) >= 12)); + seq_printf(m, "Pipe A fw loaded: %s\n", yesno(dmc->dmc_info[DMC_FW_PIPEA].payload)); + seq_printf(m, "Pipe B fw support: %s\n", yesno(IS_ALDERLAKE_P(dev_priv))); + seq_printf(m, "Pipe B fw loaded: %s\n", yesno(dmc->dmc_info[DMC_FW_PIPEB].payload)); if (!intel_dmc_has_payload(dev_priv)) goto out; @@ -582,7 +587,7 @@ static int i915_dmc_info(struct seq_file *m, void *unused) out: seq_printf(m, "program base: 0x%08x\n", - intel_de_read(dev_priv, DMC_PROGRAM(0))); + intel_de_read(dev_priv, DMC_PROGRAM(dmc->dmc_info[DMC_FW_MAIN].start_mmioaddr, 0))); seq_printf(m, "ssp base: 0x%08x\n", intel_de_read(dev_priv, DMC_SSP_BASE)); seq_printf(m, "htp: 0x%08x\n", intel_de_read(dev_priv, DMC_HTP_SKL)); @@ -1225,7 +1230,7 @@ static int i915_ddb_info(struct seq_file *m, void *unused) static void drrs_status_per_crtc(struct seq_file *m, struct drm_device *dev, - struct intel_crtc *intel_crtc) + struct intel_crtc *crtc) { struct drm_i915_private *dev_priv = to_i915(dev); struct i915_drrs *drrs = &dev_priv->drrs; @@ -1237,7 +1242,7 @@ static void drrs_status_per_crtc(struct seq_file *m, drm_for_each_connector_iter(connector, &conn_iter) { bool supported = false; - if (connector->state->crtc != &intel_crtc->base) + if (connector->state->crtc != &crtc->base) continue; seq_printf(m, "%s:\n", connector->name); @@ -1252,7 +1257,7 @@ static void drrs_status_per_crtc(struct seq_file *m, seq_puts(m, "\n"); - if (to_intel_crtc_state(intel_crtc->base.state)->has_drrs) { + if (to_intel_crtc_state(crtc->base.state)->has_drrs) { struct intel_panel *panel; mutex_lock(&drrs->mutex); @@ -1298,16 +1303,16 @@ static int i915_drrs_status(struct seq_file *m, void *unused) { struct drm_i915_private *dev_priv = node_to_i915(m->private); struct drm_device *dev = &dev_priv->drm; - struct intel_crtc *intel_crtc; + struct intel_crtc *crtc; int active_crtc_cnt = 0; drm_modeset_lock_all(dev); - for_each_intel_crtc(dev, intel_crtc) { - if (intel_crtc->base.state->active) { + for_each_intel_crtc(dev, crtc) { + if (crtc->base.state->active) { active_crtc_cnt++; seq_printf(m, "\nCRTC %d: ", active_crtc_cnt); - drrs_status_per_crtc(m, dev, intel_crtc); + drrs_status_per_crtc(m, dev, crtc); } } drm_modeset_unlock_all(dev); @@ -2064,7 +2069,7 @@ i915_fifo_underrun_reset_write(struct file *filp, size_t cnt, loff_t *ppos) { struct drm_i915_private *dev_priv = filp->private_data; - struct intel_crtc *intel_crtc; + struct intel_crtc *crtc; struct drm_device *dev = &dev_priv->drm; int ret; bool reset; @@ -2076,15 +2081,15 @@ i915_fifo_underrun_reset_write(struct file *filp, if (!reset) return cnt; - for_each_intel_crtc(dev, intel_crtc) { + for_each_intel_crtc(dev, crtc) { struct drm_crtc_commit *commit; struct intel_crtc_state *crtc_state; - ret = drm_modeset_lock_single_interruptible(&intel_crtc->base.mutex); + ret = drm_modeset_lock_single_interruptible(&crtc->base.mutex); if (ret) return ret; - crtc_state = to_intel_crtc_state(intel_crtc->base.state); + crtc_state = to_intel_crtc_state(crtc->base.state); commit = crtc_state->uapi.commit; if (commit) { ret = wait_for_completion_interruptible(&commit->hw_done); @@ -2095,12 +2100,12 @@ i915_fifo_underrun_reset_write(struct file *filp, if (!ret && crtc_state->hw.active) { drm_dbg_kms(&dev_priv->drm, "Re-arming FIFO underruns on pipe %c\n", - pipe_name(intel_crtc->pipe)); + pipe_name(crtc->pipe)); - intel_crtc_arm_fifo_underrun(intel_crtc, crtc_state); + intel_crtc_arm_fifo_underrun(crtc, crtc_state); } - drm_modeset_unlock(&intel_crtc->base.mutex); + drm_modeset_unlock(&crtc->base.mutex); if (ret) return ret; @@ -2251,6 +2256,11 @@ static int i915_lpsp_capability_show(struct seq_file *m, void *data) if (connector->status != connector_status_connected) return -ENODEV; + if (DISPLAY_VER(i915) >= 13) { + LPSP_CAPABLE(encoder->port <= PORT_B); + return 0; + } + switch (DISPLAY_VER(i915)) { case 12: /* @@ -2385,6 +2395,73 @@ static const struct file_operations i915_dsc_fec_support_fops = { .write = i915_dsc_fec_support_write }; +static int i915_dsc_bpp_show(struct seq_file *m, void *data) +{ + struct drm_connector *connector = m->private; + struct drm_device *dev = connector->dev; + struct drm_crtc *crtc; + struct intel_crtc_state *crtc_state; + struct intel_encoder *encoder = intel_attached_encoder(to_intel_connector(connector)); + int ret; + + if (!encoder) + return -ENODEV; + + ret = drm_modeset_lock_single_interruptible(&dev->mode_config.connection_mutex); + if (ret) + return ret; + + crtc = connector->state->crtc; + if (connector->status != connector_status_connected || !crtc) { + ret = -ENODEV; + goto out; + } + + crtc_state = to_intel_crtc_state(crtc->state); + seq_printf(m, "Compressed_BPP: %d\n", crtc_state->dsc.compressed_bpp); + +out: drm_modeset_unlock(&dev->mode_config.connection_mutex); + + return ret; +} + +static ssize_t i915_dsc_bpp_write(struct file *file, + const char __user *ubuf, + size_t len, loff_t *offp) +{ + struct drm_connector *connector = + ((struct seq_file *)file->private_data)->private; + struct intel_encoder *encoder = intel_attached_encoder(to_intel_connector(connector)); + struct intel_dp *intel_dp = enc_to_intel_dp(encoder); + int dsc_bpp = 0; + int ret; + + ret = kstrtoint_from_user(ubuf, len, 0, &dsc_bpp); + if (ret < 0) + return ret; + + intel_dp->force_dsc_bpp = dsc_bpp; + *offp += len; + + return len; +} + +static int i915_dsc_bpp_open(struct inode *inode, + struct file *file) +{ + return single_open(file, i915_dsc_bpp_show, + inode->i_private); +} + +static const struct file_operations i915_dsc_bpp_fops = { + .owner = THIS_MODULE, + .open = i915_dsc_bpp_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, + .write = i915_dsc_bpp_write +}; + /** * intel_connector_debugfs_add - add i915 specific connector debugfs files * @connector: pointer to a registered drm_connector @@ -2423,10 +2500,17 @@ int intel_connector_debugfs_add(struct drm_connector *connector) connector, &i915_hdcp_sink_capability_fops); } - if ((DISPLAY_VER(dev_priv) >= 11 || IS_CANNONLAKE(dev_priv)) && ((connector->connector_type == DRM_MODE_CONNECTOR_DisplayPort && !to_intel_connector(connector)->mst_port) || connector->connector_type == DRM_MODE_CONNECTOR_eDP)) - debugfs_create_file("i915_dsc_fec_support", S_IRUGO, root, + if (DISPLAY_VER(dev_priv) >= 11 && + ((connector->connector_type == DRM_MODE_CONNECTOR_DisplayPort && + !to_intel_connector(connector)->mst_port) || + connector->connector_type == DRM_MODE_CONNECTOR_eDP)) { + debugfs_create_file("i915_dsc_fec_support", 0644, root, connector, &i915_dsc_fec_support_fops); + debugfs_create_file("i915_dsc_bpp", 0644, root, + connector, &i915_dsc_bpp_fops); + } + /* Legacy panels doesn't lpsp on any platform */ if ((DISPLAY_VER(dev_priv) >= 9 || IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) && diff --git a/drivers/gpu/drm/i915/display/intel_display_power.c b/drivers/gpu/drm/i915/display/intel_display_power.c index 86b7ac7b65ec..cce1a926fcc1 100644 --- a/drivers/gpu/drm/i915/display/intel_display_power.c +++ b/drivers/gpu/drm/i915/display/intel_display_power.c @@ -18,6 +18,7 @@ #include "intel_pm.h" #include "intel_pps.h" #include "intel_sideband.h" +#include "intel_snps_phy.h" #include "intel_tc.h" #include "intel_vga.h" @@ -106,8 +107,10 @@ intel_display_power_domain_str(enum intel_display_power_domain domain) return "PORT_OTHER"; case POWER_DOMAIN_VGA: return "VGA"; - case POWER_DOMAIN_AUDIO: - return "AUDIO"; + case POWER_DOMAIN_AUDIO_MMIO: + return "AUDIO_MMIO"; + case POWER_DOMAIN_AUDIO_PLAYBACK: + return "AUDIO_PLAYBACK"; case POWER_DOMAIN_AUX_A: return "AUX_A"; case POWER_DOMAIN_AUX_B: @@ -341,6 +344,17 @@ static void hsw_wait_for_power_well_enable(struct drm_i915_private *dev_priv, { const struct i915_power_well_regs *regs = power_well->desc->hsw.regs; int pw_idx = power_well->desc->hsw.idx; + int enable_delay = power_well->desc->hsw.fixed_enable_delay; + + /* + * For some power wells we're not supposed to watch the status bit for + * an ack, but rather just wait a fixed amount of time and then + * proceed. This is only used on DG2. + */ + if (IS_DG2(dev_priv) && enable_delay) { + usleep_range(enable_delay, 2 * enable_delay); + return; + } /* Timeout for PW1:10 us, AUX:not specified, other PWs:20 us. */ if (intel_de_wait_for_set(dev_priv, regs->driver, @@ -436,17 +450,6 @@ static void hsw_power_well_enable(struct drm_i915_private *dev_priv, hsw_wait_for_power_well_enable(dev_priv, power_well, false); - /* Display WA #1178: cnl */ - if (IS_CANNONLAKE(dev_priv) && - pw_idx >= GLK_PW_CTL_IDX_AUX_B && - pw_idx <= CNL_PW_CTL_IDX_AUX_F) { - u32 val; - - val = intel_de_read(dev_priv, CNL_AUX_ANAOVRD1(pw_idx)); - val |= CNL_AUX_ANAOVRD1_ENABLE | CNL_AUX_ANAOVRD1_LDO_BYPASS; - intel_de_write(dev_priv, CNL_AUX_ANAOVRD1(pw_idx), val); - } - if (power_well->desc->hsw.has_fuses) { enum skl_power_gate pg; @@ -961,8 +964,9 @@ static void bxt_disable_dc9(struct drm_i915_private *dev_priv) static void assert_dmc_loaded(struct drm_i915_private *dev_priv) { drm_WARN_ONCE(&dev_priv->drm, - !intel_de_read(dev_priv, DMC_PROGRAM(0)), - "DMC program storage start is NULL\n"); + !intel_de_read(dev_priv, + DMC_PROGRAM(dev_priv->dmc.dmc_info[DMC_FW_MAIN].start_mmioaddr, 0)), + "DMC program storage start is NULL\n"); drm_WARN_ONCE(&dev_priv->drm, !intel_de_read(dev_priv, DMC_SSP_BASE), "DMC SSP Base Not fine\n"); drm_WARN_ONCE(&dev_priv->drm, !intel_de_read(dev_priv, DMC_HTP_SKL), @@ -2507,7 +2511,8 @@ intel_display_power_put_mask_in_set(struct drm_i915_private *i915, BIT_ULL(POWER_DOMAIN_PORT_DSI) | \ BIT_ULL(POWER_DOMAIN_PORT_CRT) | \ BIT_ULL(POWER_DOMAIN_VGA) | \ - BIT_ULL(POWER_DOMAIN_AUDIO) | \ + BIT_ULL(POWER_DOMAIN_AUDIO_MMIO) | \ + BIT_ULL(POWER_DOMAIN_AUDIO_PLAYBACK) | \ BIT_ULL(POWER_DOMAIN_AUX_B) | \ BIT_ULL(POWER_DOMAIN_AUX_C) | \ BIT_ULL(POWER_DOMAIN_GMBUS) | \ @@ -2557,7 +2562,8 @@ intel_display_power_put_mask_in_set(struct drm_i915_private *i915, BIT_ULL(POWER_DOMAIN_PORT_DDI_D_LANES) | \ BIT_ULL(POWER_DOMAIN_PORT_DSI) | \ BIT_ULL(POWER_DOMAIN_VGA) | \ - BIT_ULL(POWER_DOMAIN_AUDIO) | \ + BIT_ULL(POWER_DOMAIN_AUDIO_MMIO) | \ + BIT_ULL(POWER_DOMAIN_AUDIO_PLAYBACK) | \ BIT_ULL(POWER_DOMAIN_AUX_B) | \ BIT_ULL(POWER_DOMAIN_AUX_C) | \ BIT_ULL(POWER_DOMAIN_AUX_D) | \ @@ -2590,7 +2596,8 @@ intel_display_power_put_mask_in_set(struct drm_i915_private *i915, BIT_ULL(POWER_DOMAIN_PORT_DDI_D_LANES) | \ BIT_ULL(POWER_DOMAIN_PORT_CRT) | /* DDI E */ \ BIT_ULL(POWER_DOMAIN_VGA) | \ - BIT_ULL(POWER_DOMAIN_AUDIO) | \ + BIT_ULL(POWER_DOMAIN_AUDIO_MMIO) | \ + BIT_ULL(POWER_DOMAIN_AUDIO_PLAYBACK) | \ BIT_ULL(POWER_DOMAIN_INIT)) #define BDW_DISPLAY_POWER_DOMAINS ( \ @@ -2606,7 +2613,8 @@ intel_display_power_put_mask_in_set(struct drm_i915_private *i915, BIT_ULL(POWER_DOMAIN_PORT_DDI_D_LANES) | \ BIT_ULL(POWER_DOMAIN_PORT_CRT) | /* DDI E */ \ BIT_ULL(POWER_DOMAIN_VGA) | \ - BIT_ULL(POWER_DOMAIN_AUDIO) | \ + BIT_ULL(POWER_DOMAIN_AUDIO_MMIO) | \ + BIT_ULL(POWER_DOMAIN_AUDIO_PLAYBACK) | \ BIT_ULL(POWER_DOMAIN_INIT)) #define SKL_DISPLAY_POWERWELL_2_POWER_DOMAINS ( \ @@ -2624,7 +2632,8 @@ intel_display_power_put_mask_in_set(struct drm_i915_private *i915, BIT_ULL(POWER_DOMAIN_AUX_B) | \ BIT_ULL(POWER_DOMAIN_AUX_C) | \ BIT_ULL(POWER_DOMAIN_AUX_D) | \ - BIT_ULL(POWER_DOMAIN_AUDIO) | \ + BIT_ULL(POWER_DOMAIN_AUDIO_MMIO) | \ + BIT_ULL(POWER_DOMAIN_AUDIO_PLAYBACK) | \ BIT_ULL(POWER_DOMAIN_VGA) | \ BIT_ULL(POWER_DOMAIN_INIT)) #define SKL_DISPLAY_DDI_IO_A_E_POWER_DOMAINS ( \ @@ -2659,7 +2668,8 @@ intel_display_power_put_mask_in_set(struct drm_i915_private *i915, BIT_ULL(POWER_DOMAIN_PORT_DDI_C_LANES) | \ BIT_ULL(POWER_DOMAIN_AUX_B) | \ BIT_ULL(POWER_DOMAIN_AUX_C) | \ - BIT_ULL(POWER_DOMAIN_AUDIO) | \ + BIT_ULL(POWER_DOMAIN_AUDIO_MMIO) | \ + BIT_ULL(POWER_DOMAIN_AUDIO_PLAYBACK) | \ BIT_ULL(POWER_DOMAIN_VGA) | \ BIT_ULL(POWER_DOMAIN_INIT)) #define BXT_DISPLAY_DC_OFF_POWER_DOMAINS ( \ @@ -2692,7 +2702,8 @@ intel_display_power_put_mask_in_set(struct drm_i915_private *i915, BIT_ULL(POWER_DOMAIN_PORT_DDI_C_LANES) | \ BIT_ULL(POWER_DOMAIN_AUX_B) | \ BIT_ULL(POWER_DOMAIN_AUX_C) | \ - BIT_ULL(POWER_DOMAIN_AUDIO) | \ + BIT_ULL(POWER_DOMAIN_AUDIO_MMIO) | \ + BIT_ULL(POWER_DOMAIN_AUDIO_PLAYBACK) | \ BIT_ULL(POWER_DOMAIN_VGA) | \ BIT_ULL(POWER_DOMAIN_INIT)) #define GLK_DISPLAY_DDI_IO_A_POWER_DOMAINS ( \ @@ -2731,63 +2742,6 @@ intel_display_power_put_mask_in_set(struct drm_i915_private *i915, BIT_ULL(POWER_DOMAIN_GMBUS) | \ BIT_ULL(POWER_DOMAIN_INIT)) -#define CNL_DISPLAY_POWERWELL_2_POWER_DOMAINS ( \ - BIT_ULL(POWER_DOMAIN_TRANSCODER_A) | \ - BIT_ULL(POWER_DOMAIN_PIPE_B) | \ - BIT_ULL(POWER_DOMAIN_TRANSCODER_B) | \ - BIT_ULL(POWER_DOMAIN_PIPE_C) | \ - BIT_ULL(POWER_DOMAIN_TRANSCODER_C) | \ - BIT_ULL(POWER_DOMAIN_PIPE_B_PANEL_FITTER) | \ - BIT_ULL(POWER_DOMAIN_PIPE_C_PANEL_FITTER) | \ - BIT_ULL(POWER_DOMAIN_PORT_DDI_B_LANES) | \ - BIT_ULL(POWER_DOMAIN_PORT_DDI_C_LANES) | \ - BIT_ULL(POWER_DOMAIN_PORT_DDI_D_LANES) | \ - BIT_ULL(POWER_DOMAIN_PORT_DDI_F_LANES) | \ - BIT_ULL(POWER_DOMAIN_AUX_B) | \ - BIT_ULL(POWER_DOMAIN_AUX_C) | \ - BIT_ULL(POWER_DOMAIN_AUX_D) | \ - BIT_ULL(POWER_DOMAIN_AUX_F) | \ - BIT_ULL(POWER_DOMAIN_AUDIO) | \ - BIT_ULL(POWER_DOMAIN_VGA) | \ - BIT_ULL(POWER_DOMAIN_INIT)) -#define CNL_DISPLAY_DDI_A_IO_POWER_DOMAINS ( \ - BIT_ULL(POWER_DOMAIN_PORT_DDI_A_IO) | \ - BIT_ULL(POWER_DOMAIN_INIT)) -#define CNL_DISPLAY_DDI_B_IO_POWER_DOMAINS ( \ - BIT_ULL(POWER_DOMAIN_PORT_DDI_B_IO) | \ - BIT_ULL(POWER_DOMAIN_INIT)) -#define CNL_DISPLAY_DDI_C_IO_POWER_DOMAINS ( \ - BIT_ULL(POWER_DOMAIN_PORT_DDI_C_IO) | \ - BIT_ULL(POWER_DOMAIN_INIT)) -#define CNL_DISPLAY_DDI_D_IO_POWER_DOMAINS ( \ - BIT_ULL(POWER_DOMAIN_PORT_DDI_D_IO) | \ - BIT_ULL(POWER_DOMAIN_INIT)) -#define CNL_DISPLAY_AUX_A_POWER_DOMAINS ( \ - BIT_ULL(POWER_DOMAIN_AUX_A) | \ - BIT_ULL(POWER_DOMAIN_AUX_IO_A) | \ - BIT_ULL(POWER_DOMAIN_INIT)) -#define CNL_DISPLAY_AUX_B_POWER_DOMAINS ( \ - BIT_ULL(POWER_DOMAIN_AUX_B) | \ - BIT_ULL(POWER_DOMAIN_INIT)) -#define CNL_DISPLAY_AUX_C_POWER_DOMAINS ( \ - BIT_ULL(POWER_DOMAIN_AUX_C) | \ - BIT_ULL(POWER_DOMAIN_INIT)) -#define CNL_DISPLAY_AUX_D_POWER_DOMAINS ( \ - BIT_ULL(POWER_DOMAIN_AUX_D) | \ - BIT_ULL(POWER_DOMAIN_INIT)) -#define CNL_DISPLAY_AUX_F_POWER_DOMAINS ( \ - BIT_ULL(POWER_DOMAIN_AUX_F) | \ - BIT_ULL(POWER_DOMAIN_INIT)) -#define CNL_DISPLAY_DDI_F_IO_POWER_DOMAINS ( \ - BIT_ULL(POWER_DOMAIN_PORT_DDI_F_IO) | \ - BIT_ULL(POWER_DOMAIN_INIT)) -#define CNL_DISPLAY_DC_OFF_POWER_DOMAINS ( \ - CNL_DISPLAY_POWERWELL_2_POWER_DOMAINS | \ - BIT_ULL(POWER_DOMAIN_GT_IRQ) | \ - BIT_ULL(POWER_DOMAIN_MODESET) | \ - BIT_ULL(POWER_DOMAIN_AUX_A) | \ - BIT_ULL(POWER_DOMAIN_INIT)) - /* * ICL PW_0/PG_0 domains (HW/DMC control): * - PCI @@ -2829,7 +2783,8 @@ intel_display_power_put_mask_in_set(struct drm_i915_private *i915, BIT_ULL(POWER_DOMAIN_AUX_E_TBT) | \ BIT_ULL(POWER_DOMAIN_AUX_F_TBT) | \ BIT_ULL(POWER_DOMAIN_VGA) | \ - BIT_ULL(POWER_DOMAIN_AUDIO) | \ + BIT_ULL(POWER_DOMAIN_AUDIO_MMIO) | \ + BIT_ULL(POWER_DOMAIN_AUDIO_PLAYBACK) | \ BIT_ULL(POWER_DOMAIN_INIT)) /* * - transcoder WD @@ -2921,7 +2876,8 @@ intel_display_power_put_mask_in_set(struct drm_i915_private *i915, BIT_ULL(POWER_DOMAIN_AUX_TBT5) | \ BIT_ULL(POWER_DOMAIN_AUX_TBT6) | \ BIT_ULL(POWER_DOMAIN_VGA) | \ - BIT_ULL(POWER_DOMAIN_AUDIO) | \ + BIT_ULL(POWER_DOMAIN_AUDIO_MMIO) | \ + BIT_ULL(POWER_DOMAIN_AUDIO_PLAYBACK) | \ BIT_ULL(POWER_DOMAIN_INIT)) #define TGL_PW_2_POWER_DOMAINS ( \ @@ -2991,7 +2947,8 @@ intel_display_power_put_mask_in_set(struct drm_i915_private *i915, RKL_PW_4_POWER_DOMAINS | \ BIT_ULL(POWER_DOMAIN_PIPE_B) | \ BIT_ULL(POWER_DOMAIN_PIPE_B_PANEL_FITTER) | \ - BIT_ULL(POWER_DOMAIN_AUDIO) | \ + BIT_ULL(POWER_DOMAIN_AUDIO_MMIO) | \ + BIT_ULL(POWER_DOMAIN_AUDIO_PLAYBACK) | \ BIT_ULL(POWER_DOMAIN_VGA) | \ BIT_ULL(POWER_DOMAIN_TRANSCODER_B) | \ BIT_ULL(POWER_DOMAIN_PORT_DDI_LANES_TC1) | \ @@ -3029,6 +2986,35 @@ intel_display_power_put_mask_in_set(struct drm_i915_private *i915, BIT_ULL(POWER_DOMAIN_INIT)) /* + * DG1 onwards Audio MMIO/VERBS lies in PG0 power well. + */ +#define DG1_PW_3_POWER_DOMAINS ( \ + TGL_PW_4_POWER_DOMAINS | \ + BIT_ULL(POWER_DOMAIN_PIPE_B) | \ + BIT_ULL(POWER_DOMAIN_TRANSCODER_B) | \ + BIT_ULL(POWER_DOMAIN_PIPE_B_PANEL_FITTER) | \ + BIT_ULL(POWER_DOMAIN_PORT_DDI_LANES_TC1) | \ + BIT_ULL(POWER_DOMAIN_PORT_DDI_LANES_TC2) | \ + BIT_ULL(POWER_DOMAIN_AUX_USBC1) | \ + BIT_ULL(POWER_DOMAIN_AUX_USBC2) | \ + BIT_ULL(POWER_DOMAIN_VGA) | \ + BIT_ULL(POWER_DOMAIN_AUDIO_PLAYBACK) | \ + BIT_ULL(POWER_DOMAIN_INIT)) + +#define DG1_PW_2_POWER_DOMAINS ( \ + DG1_PW_3_POWER_DOMAINS | \ + BIT_ULL(POWER_DOMAIN_TRANSCODER_VDSC_PW2) | \ + BIT_ULL(POWER_DOMAIN_INIT)) + +#define DG1_DISPLAY_DC_OFF_POWER_DOMAINS ( \ + DG1_PW_3_POWER_DOMAINS | \ + BIT_ULL(POWER_DOMAIN_AUDIO_MMIO) | \ + BIT_ULL(POWER_DOMAIN_MODESET) | \ + BIT_ULL(POWER_DOMAIN_AUX_A) | \ + BIT_ULL(POWER_DOMAIN_AUX_B) | \ + BIT_ULL(POWER_DOMAIN_INIT)) + +/* * XE_LPD Power Domains * * Previous platforms required that PG(n-1) be enabled before PG(n). That @@ -3073,7 +3059,7 @@ intel_display_power_put_mask_in_set(struct drm_i915_private *i915, XELPD_PW_B_POWER_DOMAINS | \ XELPD_PW_C_POWER_DOMAINS | \ XELPD_PW_D_POWER_DOMAINS | \ - BIT_ULL(POWER_DOMAIN_AUDIO) | \ + BIT_ULL(POWER_DOMAIN_AUDIO_PLAYBACK) | \ BIT_ULL(POWER_DOMAIN_VGA) | \ BIT_ULL(POWER_DOMAIN_PORT_DDI_C_LANES) | \ BIT_ULL(POWER_DOMAIN_PORT_DDI_LANES_D_XELPD) | \ @@ -3114,6 +3100,7 @@ intel_display_power_put_mask_in_set(struct drm_i915_private *i915, #define XELPD_DISPLAY_DC_OFF_POWER_DOMAINS ( \ XELPD_PW_2_POWER_DOMAINS | \ + BIT_ULL(POWER_DOMAIN_AUDIO_MMIO) | \ BIT_ULL(POWER_DOMAIN_MODESET) | \ BIT_ULL(POWER_DOMAIN_AUX_A) | \ BIT_ULL(POWER_DOMAIN_AUX_B) | \ @@ -3694,148 +3681,6 @@ static const struct i915_power_well_desc glk_power_wells[] = { }, }; -static const struct i915_power_well_desc cnl_power_wells[] = { - { - .name = "always-on", - .always_on = true, - .domains = POWER_DOMAIN_MASK, - .ops = &i9xx_always_on_power_well_ops, - .id = DISP_PW_ID_NONE, - }, - { - .name = "power well 1", - /* Handled by the DMC firmware */ - .always_on = true, - .domains = 0, - .ops = &hsw_power_well_ops, - .id = SKL_DISP_PW_1, - { - .hsw.regs = &hsw_power_well_regs, - .hsw.idx = SKL_PW_CTL_IDX_PW_1, - .hsw.has_fuses = true, - }, - }, - { - .name = "AUX A", - .domains = CNL_DISPLAY_AUX_A_POWER_DOMAINS, - .ops = &hsw_power_well_ops, - .id = DISP_PW_ID_NONE, - { - .hsw.regs = &hsw_power_well_regs, - .hsw.idx = GLK_PW_CTL_IDX_AUX_A, - }, - }, - { - .name = "AUX B", - .domains = CNL_DISPLAY_AUX_B_POWER_DOMAINS, - .ops = &hsw_power_well_ops, - .id = DISP_PW_ID_NONE, - { - .hsw.regs = &hsw_power_well_regs, - .hsw.idx = GLK_PW_CTL_IDX_AUX_B, - }, - }, - { - .name = "AUX C", - .domains = CNL_DISPLAY_AUX_C_POWER_DOMAINS, - .ops = &hsw_power_well_ops, - .id = DISP_PW_ID_NONE, - { - .hsw.regs = &hsw_power_well_regs, - .hsw.idx = GLK_PW_CTL_IDX_AUX_C, - }, - }, - { - .name = "AUX D", - .domains = CNL_DISPLAY_AUX_D_POWER_DOMAINS, - .ops = &hsw_power_well_ops, - .id = DISP_PW_ID_NONE, - { - .hsw.regs = &hsw_power_well_regs, - .hsw.idx = CNL_PW_CTL_IDX_AUX_D, - }, - }, - { - .name = "DC off", - .domains = CNL_DISPLAY_DC_OFF_POWER_DOMAINS, - .ops = &gen9_dc_off_power_well_ops, - .id = SKL_DISP_DC_OFF, - }, - { - .name = "power well 2", - .domains = CNL_DISPLAY_POWERWELL_2_POWER_DOMAINS, - .ops = &hsw_power_well_ops, - .id = SKL_DISP_PW_2, - { - .hsw.regs = &hsw_power_well_regs, - .hsw.idx = SKL_PW_CTL_IDX_PW_2, - .hsw.irq_pipe_mask = BIT(PIPE_B) | BIT(PIPE_C), - .hsw.has_vga = true, - .hsw.has_fuses = true, - }, - }, - { - .name = "DDI A IO power well", - .domains = CNL_DISPLAY_DDI_A_IO_POWER_DOMAINS, - .ops = &hsw_power_well_ops, - .id = DISP_PW_ID_NONE, - { - .hsw.regs = &hsw_power_well_regs, - .hsw.idx = GLK_PW_CTL_IDX_DDI_A, - }, - }, - { - .name = "DDI B IO power well", - .domains = CNL_DISPLAY_DDI_B_IO_POWER_DOMAINS, - .ops = &hsw_power_well_ops, - .id = DISP_PW_ID_NONE, - { - .hsw.regs = &hsw_power_well_regs, - .hsw.idx = SKL_PW_CTL_IDX_DDI_B, - }, - }, - { - .name = "DDI C IO power well", - .domains = CNL_DISPLAY_DDI_C_IO_POWER_DOMAINS, - .ops = &hsw_power_well_ops, - .id = DISP_PW_ID_NONE, - { - .hsw.regs = &hsw_power_well_regs, - .hsw.idx = SKL_PW_CTL_IDX_DDI_C, - }, - }, - { - .name = "DDI D IO power well", - .domains = CNL_DISPLAY_DDI_D_IO_POWER_DOMAINS, - .ops = &hsw_power_well_ops, - .id = DISP_PW_ID_NONE, - { - .hsw.regs = &hsw_power_well_regs, - .hsw.idx = SKL_PW_CTL_IDX_DDI_D, - }, - }, - { - .name = "DDI F IO power well", - .domains = CNL_DISPLAY_DDI_F_IO_POWER_DOMAINS, - .ops = &hsw_power_well_ops, - .id = CNL_DISP_PW_DDI_F_IO, - { - .hsw.regs = &hsw_power_well_regs, - .hsw.idx = CNL_PW_CTL_IDX_DDI_F, - }, - }, - { - .name = "AUX F", - .domains = CNL_DISPLAY_AUX_F_POWER_DOMAINS, - .ops = &hsw_power_well_ops, - .id = CNL_DISP_PW_DDI_F_AUX, - { - .hsw.regs = &hsw_power_well_regs, - .hsw.idx = CNL_PW_CTL_IDX_AUX_F, - }, - }, -}; - static const struct i915_power_well_ops icl_aux_power_well_ops = { .sync_hw = hsw_power_well_sync_hw, .enable = icl_aux_power_well_enable, @@ -4642,6 +4487,165 @@ static const struct i915_power_well_desc rkl_power_wells[] = { }, }; +static const struct i915_power_well_desc dg1_power_wells[] = { + { + .name = "always-on", + .always_on = true, + .domains = POWER_DOMAIN_MASK, + .ops = &i9xx_always_on_power_well_ops, + .id = DISP_PW_ID_NONE, + }, + { + .name = "power well 1", + /* Handled by the DMC firmware */ + .always_on = true, + .domains = 0, + .ops = &hsw_power_well_ops, + .id = SKL_DISP_PW_1, + { + .hsw.regs = &hsw_power_well_regs, + .hsw.idx = ICL_PW_CTL_IDX_PW_1, + .hsw.has_fuses = true, + }, + }, + { + .name = "DC off", + .domains = DG1_DISPLAY_DC_OFF_POWER_DOMAINS, + .ops = &gen9_dc_off_power_well_ops, + .id = SKL_DISP_DC_OFF, + }, + { + .name = "power well 2", + .domains = DG1_PW_2_POWER_DOMAINS, + .ops = &hsw_power_well_ops, + .id = SKL_DISP_PW_2, + { + .hsw.regs = &hsw_power_well_regs, + .hsw.idx = ICL_PW_CTL_IDX_PW_2, + .hsw.has_fuses = true, + }, + }, + { + .name = "power well 3", + .domains = DG1_PW_3_POWER_DOMAINS, + .ops = &hsw_power_well_ops, + .id = ICL_DISP_PW_3, + { + .hsw.regs = &hsw_power_well_regs, + .hsw.idx = ICL_PW_CTL_IDX_PW_3, + .hsw.irq_pipe_mask = BIT(PIPE_B), + .hsw.has_vga = true, + .hsw.has_fuses = true, + }, + }, + { + .name = "DDI A IO", + .domains = ICL_DDI_IO_A_POWER_DOMAINS, + .ops = &hsw_power_well_ops, + .id = DISP_PW_ID_NONE, + { + .hsw.regs = &icl_ddi_power_well_regs, + .hsw.idx = ICL_PW_CTL_IDX_DDI_A, + } + }, + { + .name = "DDI B IO", + .domains = ICL_DDI_IO_B_POWER_DOMAINS, + .ops = &hsw_power_well_ops, + .id = DISP_PW_ID_NONE, + { + .hsw.regs = &icl_ddi_power_well_regs, + .hsw.idx = ICL_PW_CTL_IDX_DDI_B, + } + }, + { + .name = "DDI IO TC1", + .domains = TGL_DDI_IO_TC1_POWER_DOMAINS, + .ops = &hsw_power_well_ops, + .id = DISP_PW_ID_NONE, + { + .hsw.regs = &icl_ddi_power_well_regs, + .hsw.idx = TGL_PW_CTL_IDX_DDI_TC1, + }, + }, + { + .name = "DDI IO TC2", + .domains = TGL_DDI_IO_TC2_POWER_DOMAINS, + .ops = &hsw_power_well_ops, + .id = DISP_PW_ID_NONE, + { + .hsw.regs = &icl_ddi_power_well_regs, + .hsw.idx = TGL_PW_CTL_IDX_DDI_TC2, + }, + }, + { + .name = "AUX A", + .domains = TGL_AUX_A_IO_POWER_DOMAINS, + .ops = &icl_aux_power_well_ops, + .id = DISP_PW_ID_NONE, + { + .hsw.regs = &icl_aux_power_well_regs, + .hsw.idx = ICL_PW_CTL_IDX_AUX_A, + }, + }, + { + .name = "AUX B", + .domains = TGL_AUX_B_IO_POWER_DOMAINS, + .ops = &icl_aux_power_well_ops, + .id = DISP_PW_ID_NONE, + { + .hsw.regs = &icl_aux_power_well_regs, + .hsw.idx = ICL_PW_CTL_IDX_AUX_B, + }, + }, + { + .name = "AUX USBC1", + .domains = TGL_AUX_IO_USBC1_POWER_DOMAINS, + .ops = &icl_aux_power_well_ops, + .id = DISP_PW_ID_NONE, + { + .hsw.regs = &icl_aux_power_well_regs, + .hsw.idx = TGL_PW_CTL_IDX_AUX_TC1, + .hsw.is_tc_tbt = false, + }, + }, + { + .name = "AUX USBC2", + .domains = TGL_AUX_IO_USBC2_POWER_DOMAINS, + .ops = &icl_aux_power_well_ops, + .id = DISP_PW_ID_NONE, + { + .hsw.regs = &icl_aux_power_well_regs, + .hsw.idx = TGL_PW_CTL_IDX_AUX_TC2, + .hsw.is_tc_tbt = false, + }, + }, + { + .name = "power well 4", + .domains = TGL_PW_4_POWER_DOMAINS, + .ops = &hsw_power_well_ops, + .id = DISP_PW_ID_NONE, + { + .hsw.regs = &hsw_power_well_regs, + .hsw.idx = ICL_PW_CTL_IDX_PW_4, + .hsw.has_fuses = true, + .hsw.irq_pipe_mask = BIT(PIPE_C), + } + }, + { + .name = "power well 5", + .domains = TGL_PW_5_POWER_DOMAINS, + .ops = &hsw_power_well_ops, + .id = DISP_PW_ID_NONE, + { + .hsw.regs = &hsw_power_well_regs, + .hsw.idx = TGL_PW_CTL_IDX_PW_5, + .hsw.has_fuses = true, + .hsw.irq_pipe_mask = BIT(PIPE_D), + }, + }, +}; + static const struct i915_power_well_desc xelpd_power_wells[] = { { .name = "always-on", @@ -4827,6 +4831,7 @@ static const struct i915_power_well_desc xelpd_power_wells[] = { { .hsw.regs = &icl_aux_power_well_regs, .hsw.idx = ICL_PW_CTL_IDX_AUX_A, + .hsw.fixed_enable_delay = 600, }, }, { @@ -4837,6 +4842,7 @@ static const struct i915_power_well_desc xelpd_power_wells[] = { { .hsw.regs = &icl_aux_power_well_regs, .hsw.idx = ICL_PW_CTL_IDX_AUX_B, + .hsw.fixed_enable_delay = 600, }, }, { @@ -4847,6 +4853,7 @@ static const struct i915_power_well_desc xelpd_power_wells[] = { { .hsw.regs = &icl_aux_power_well_regs, .hsw.idx = ICL_PW_CTL_IDX_AUX_C, + .hsw.fixed_enable_delay = 600, }, }, { @@ -4857,6 +4864,7 @@ static const struct i915_power_well_desc xelpd_power_wells[] = { { .hsw.regs = &icl_aux_power_well_regs, .hsw.idx = XELPD_PW_CTL_IDX_AUX_D, + .hsw.fixed_enable_delay = 600, }, }, { @@ -4877,6 +4885,7 @@ static const struct i915_power_well_desc xelpd_power_wells[] = { { .hsw.regs = &icl_aux_power_well_regs, .hsw.idx = TGL_PW_CTL_IDX_AUX_TC1, + .hsw.fixed_enable_delay = 600, }, }, { @@ -5121,7 +5130,9 @@ int intel_power_domains_init(struct drm_i915_private *dev_priv) err = 0; } else if (DISPLAY_VER(dev_priv) >= 13) { err = set_power_wells(power_domains, xelpd_power_wells); - } else if (IS_ALDERLAKE_S(dev_priv) || IS_DG1(dev_priv)) { + } else if (IS_DG1(dev_priv)) { + err = set_power_wells(power_domains, dg1_power_wells); + } else if (IS_ALDERLAKE_S(dev_priv)) { err = set_power_wells_mask(power_domains, tgl_power_wells, BIT_ULL(TGL_DISP_PW_TC_COLD_OFF)); } else if (IS_ROCKETLAKE(dev_priv)) { @@ -5130,12 +5141,6 @@ int intel_power_domains_init(struct drm_i915_private *dev_priv) err = set_power_wells(power_domains, tgl_power_wells); } else if (DISPLAY_VER(dev_priv) == 11) { err = set_power_wells(power_domains, icl_power_wells); - } else if (IS_CNL_WITH_PORT_F(dev_priv)) { - err = set_power_wells(power_domains, cnl_power_wells); - } else if (IS_CANNONLAKE(dev_priv)) { - err = set_power_wells_mask(power_domains, cnl_power_wells, - BIT_ULL(CNL_DISP_PW_DDI_F_IO) | - BIT_ULL(CNL_DISP_PW_DDI_F_AUX)); } else if (IS_GEMINILAKE(dev_priv)) { err = set_power_wells(power_domains, glk_power_wells); } else if (IS_BROXTON(dev_priv)) { @@ -5690,75 +5695,6 @@ static void bxt_display_core_uninit(struct drm_i915_private *dev_priv) usleep_range(10, 30); /* 10 us delay per Bspec */ } -static void cnl_display_core_init(struct drm_i915_private *dev_priv, bool resume) -{ - struct i915_power_domains *power_domains = &dev_priv->power_domains; - struct i915_power_well *well; - - gen9_set_dc_state(dev_priv, DC_STATE_DISABLE); - - /* 1. Enable PCH Reset Handshake */ - intel_pch_reset_handshake(dev_priv, !HAS_PCH_NOP(dev_priv)); - - if (!HAS_DISPLAY(dev_priv)) - return; - - /* 2-3. */ - intel_combo_phy_init(dev_priv); - - /* - * 4. Enable Power Well 1 (PG1). - * The AUX IO power wells will be enabled on demand. - */ - mutex_lock(&power_domains->lock); - well = lookup_power_well(dev_priv, SKL_DISP_PW_1); - intel_power_well_enable(dev_priv, well); - mutex_unlock(&power_domains->lock); - - /* 5. Enable CD clock */ - intel_cdclk_init_hw(dev_priv); - - /* 6. Enable DBUF */ - gen9_dbuf_enable(dev_priv); - - if (resume && intel_dmc_has_payload(dev_priv)) - intel_dmc_load_program(dev_priv); -} - -static void cnl_display_core_uninit(struct drm_i915_private *dev_priv) -{ - struct i915_power_domains *power_domains = &dev_priv->power_domains; - struct i915_power_well *well; - - if (!HAS_DISPLAY(dev_priv)) - return; - - gen9_disable_dc_states(dev_priv); - - /* 1. Disable all display engine functions -> aready done */ - - /* 2. Disable DBUF */ - gen9_dbuf_disable(dev_priv); - - /* 3. Disable CD clock */ - intel_cdclk_uninit_hw(dev_priv); - - /* - * 4. Disable Power Well 1 (PG1). - * The AUX IO power wells are toggled on demand, so they are already - * disabled at this point. - */ - mutex_lock(&power_domains->lock); - well = lookup_power_well(dev_priv, SKL_DISP_PW_1); - intel_power_well_disable(dev_priv, well); - mutex_unlock(&power_domains->lock); - - usleep_range(10, 30); /* 10 us delay per Bspec */ - - /* 5. */ - intel_combo_phy_uninit(dev_priv); -} - struct buddy_page_mask { u32 page_mask; u8 type; @@ -5797,9 +5733,14 @@ static void tgl_bw_buddy_init(struct drm_i915_private *dev_priv) unsigned long abox_mask = INTEL_INFO(dev_priv)->abox_mask; int config, i; + /* BW_BUDDY registers are not used on dgpu's beyond DG1 */ + if (IS_DGFX(dev_priv) && !IS_DG1(dev_priv)) + return; + if (IS_ALDERLAKE_S(dev_priv) || - IS_DG1_REVID(dev_priv, DG1_REVID_A0, DG1_REVID_A0) || - IS_TGL_DISPLAY_STEP(dev_priv, STEP_A0, STEP_B0)) + IS_DG1_DISPLAY_STEP(dev_priv, STEP_A0, STEP_B0) || + IS_RKL_DISPLAY_STEP(dev_priv, STEP_A0, STEP_B0) || + IS_TGL_DISPLAY_STEP(dev_priv, STEP_A0, STEP_C0)) /* Wa_1409767108:tgl,dg1,adl-s */ table = wa_1409767108_buddy_page_masks; else @@ -5821,10 +5762,11 @@ static void tgl_bw_buddy_init(struct drm_i915_private *dev_priv) intel_de_write(dev_priv, BW_BUDDY_PAGE_MASK(i), table[config].page_mask); - /* Wa_22010178259:tgl,rkl */ - intel_de_rmw(dev_priv, BW_BUDDY_CTL(i), - BW_BUDDY_TLB_REQ_TIMER_MASK, - BW_BUDDY_TLB_REQ_TIMER(0x8)); + /* Wa_22010178259:tgl,dg1,rkl,adl-s */ + if (DISPLAY_VER(dev_priv) == 12) + intel_de_rmw(dev_priv, BW_BUDDY_CTL(i), + BW_BUDDY_TLB_REQ_TIMER_MASK, + BW_BUDDY_TLB_REQ_TIMER(0x8)); } } } @@ -5878,11 +5820,15 @@ static void icl_display_core_init(struct drm_i915_private *dev_priv, if (DISPLAY_VER(dev_priv) >= 12) tgl_bw_buddy_init(dev_priv); + /* 8. Ensure PHYs have completed calibration and adaptation */ + if (IS_DG2(dev_priv)) + intel_snps_phy_wait_for_calibration(dev_priv); + if (resume && intel_dmc_has_payload(dev_priv)) intel_dmc_load_program(dev_priv); - /* Wa_14011508470 */ - if (DISPLAY_VER(dev_priv) == 12) { + /* Wa_14011508470:tgl,dg1,rkl,adl-s,adl-p */ + if (DISPLAY_VER(dev_priv) >= 12) { val = DCPR_CLEAR_MEMSTAT_DIS | DCPR_SEND_RESP_IMM | DCPR_MASK_LPMODE | DCPR_MASK_MAXLATENCY_MEMUP_CLR; intel_uncore_rmw(&dev_priv->uncore, GEN11_CHICKEN_DCPR_2, 0, val); @@ -6097,8 +6043,6 @@ void intel_power_domains_init_hw(struct drm_i915_private *i915, bool resume) if (DISPLAY_VER(i915) >= 11) { icl_display_core_init(i915, resume); - } else if (IS_CANNONLAKE(i915)) { - cnl_display_core_init(i915, resume); } else if (IS_GEMINILAKE(i915) || IS_BROXTON(i915)) { bxt_display_core_init(i915, resume); } else if (DISPLAY_VER(i915) == 9) { @@ -6258,8 +6202,6 @@ void intel_power_domains_suspend(struct drm_i915_private *i915, if (DISPLAY_VER(i915) >= 11) icl_display_core_uninit(i915); - else if (IS_CANNONLAKE(i915)) - cnl_display_core_uninit(i915); else if (IS_GEMINILAKE(i915) || IS_BROXTON(i915)) bxt_display_core_uninit(i915); else if (DISPLAY_VER(i915) == 9) diff --git a/drivers/gpu/drm/i915/display/intel_display_power.h b/drivers/gpu/drm/i915/display/intel_display_power.h index 4f0917df4375..978531841fa3 100644 --- a/drivers/gpu/drm/i915/display/intel_display_power.h +++ b/drivers/gpu/drm/i915/display/intel_display_power.h @@ -76,7 +76,8 @@ enum intel_display_power_domain { POWER_DOMAIN_PORT_CRT, POWER_DOMAIN_PORT_OTHER, POWER_DOMAIN_VGA, - POWER_DOMAIN_AUDIO, + POWER_DOMAIN_AUDIO_MMIO, + POWER_DOMAIN_AUDIO_PLAYBACK, POWER_DOMAIN_AUX_A, POWER_DOMAIN_AUX_B, POWER_DOMAIN_AUX_C, @@ -142,8 +143,6 @@ enum i915_power_well_id { SKL_DISP_PW_MISC_IO, SKL_DISP_PW_1, SKL_DISP_PW_2, - CNL_DISP_PW_DDI_F_IO, - CNL_DISP_PW_DDI_F_AUX, ICL_DISP_PW_3, SKL_DISP_DC_OFF, TGL_DISP_PW_TC_COLD_OFF, @@ -223,6 +222,12 @@ struct i915_power_well_desc { u8 idx; /* Mask of pipes whose IRQ logic is backed by the pw */ u8 irq_pipe_mask; + /* + * Instead of waiting for the status bit to ack enables, + * just wait a specific amount of time and then consider + * the well enabled. + */ + u16 fixed_enable_delay; /* The pw is backing the VGA functionality */ bool has_vga:1; bool has_fuses:1; @@ -386,6 +391,10 @@ intel_display_power_put_all_in_set(struct drm_i915_private *i915, intel_display_power_put_mask_in_set(i915, power_domain_set, power_domain_set->mask); } +/* + * FIXME: We should probably switch this to a 0-based scheme to be consistent + * with how we now name/number DBUF_CTL instances. + */ enum dbuf_slice { DBUF_S1, DBUF_S2, diff --git a/drivers/gpu/drm/i915/display/intel_display_types.h b/drivers/gpu/drm/i915/display/intel_display_types.h index 04613864cbe8..6beeeeba1bed 100644 --- a/drivers/gpu/drm/i915/display/intel_display_types.h +++ b/drivers/gpu/drm/i915/display/intel_display_types.h @@ -48,6 +48,7 @@ struct drm_printer; struct __intel_global_objs_state; +struct intel_ddi_buf_trans; /* * Display related stuff @@ -195,6 +196,10 @@ struct intel_encoder { void (*update_complete)(struct intel_atomic_state *, struct intel_encoder *, struct intel_crtc *); + void (*pre_disable)(struct intel_atomic_state *, + struct intel_encoder *, + const struct intel_crtc_state *, + const struct drm_connector_state *); void (*disable)(struct intel_atomic_state *, struct intel_encoder *, const struct intel_crtc_state *, @@ -263,6 +268,9 @@ struct intel_encoder { * Returns whether the port clock is enabled or not. */ bool (*is_clock_enabled)(struct intel_encoder *encoder); + const struct intel_ddi_buf_trans *(*get_buf_trans)(struct intel_encoder *encoder, + const struct intel_crtc_state *crtc_state, + int *n_entries); enum hpd_pin hpd_pin; enum intel_display_power_domain power_domain; /* for communication with audio component; protected by av_mutex */ @@ -310,7 +318,7 @@ struct intel_panel { /* DPCD backlight */ union { struct { - u8 pwmgen_bit_count; + struct drm_edp_backlight_info info; } vesa; struct { bool sdr_uses_aux; @@ -880,6 +888,18 @@ enum intel_output_format { INTEL_OUTPUT_FORMAT_YCBCR444, }; +struct intel_mpllb_state { + u32 clock; /* in KHz */ + u32 ref_control; + u32 mpllb_cp; + u32 mpllb_div; + u32 mpllb_div2; + u32 mpllb_fracn1; + u32 mpllb_fracn2; + u32 mpllb_sscen; + u32 mpllb_sscstep; +}; + struct intel_crtc_state { /* * uapi (drm) state. This is the software state shown to userspace. @@ -1014,7 +1034,10 @@ struct intel_crtc_state { struct intel_shared_dpll *shared_dpll; /* Actual register state of the dpll, for shared dpll cross-checking. */ - struct intel_dpll_hw_state dpll_hw_state; + union { + struct intel_dpll_hw_state dpll_hw_state; + struct intel_mpllb_state mpllb_state; + }; /* * ICL reserved DPLLs for the CRTC/port. The active PLL is selected by @@ -1040,7 +1063,9 @@ struct intel_crtc_state { bool has_psr; bool has_psr2; bool enable_psr2_sel_fetch; + bool req_psr2_sdp_prior_scanline; u32 dc3co_exitline; + u16 su_y_granularity; /* * Frequence the dpll for the port should run at. Differs from the @@ -1493,12 +1518,14 @@ struct intel_psr { bool colorimetry_support; bool psr2_enabled; bool psr2_sel_fetch_enabled; + bool req_psr2_sdp_prior_scanline; u8 sink_sync_latency; ktime_t last_entry_attempt; ktime_t last_exit; bool sink_not_reliable; bool irq_aux_error; - u16 su_x_granularity; + u16 su_w_granularity; + u16 su_y_granularity; u32 dc3co_exitline; u32 dc3co_exit_delay; struct delayed_work dc3co_work; @@ -1604,6 +1631,7 @@ struct intel_dp { /* Display stream compression testing */ bool force_dsc_en; + int force_dsc_bpp; bool hobl_failed; bool hobl_active; diff --git a/drivers/gpu/drm/i915/display/intel_dmc.c b/drivers/gpu/drm/i915/display/intel_dmc.c index 97308da28059..3c3c6cb5c0df 100644 --- a/drivers/gpu/drm/i915/display/intel_dmc.c +++ b/drivers/gpu/drm/i915/display/intel_dmc.c @@ -45,6 +45,10 @@ #define GEN12_DMC_MAX_FW_SIZE ICL_DMC_MAX_FW_SIZE +#define ADLP_DMC_PATH DMC_PATH(adlp, 2, 10) +#define ADLP_DMC_VERSION_REQUIRED DMC_VERSION(2, 10) +MODULE_FIRMWARE(ADLP_DMC_PATH); + #define ADLS_DMC_PATH DMC_PATH(adls, 2, 01) #define ADLS_DMC_VERSION_REQUIRED DMC_VERSION(2, 1) MODULE_FIRMWARE(ADLS_DMC_PATH); @@ -53,12 +57,12 @@ MODULE_FIRMWARE(ADLS_DMC_PATH); #define DG1_DMC_VERSION_REQUIRED DMC_VERSION(2, 2) MODULE_FIRMWARE(DG1_DMC_PATH); -#define RKL_DMC_PATH DMC_PATH(rkl, 2, 02) -#define RKL_DMC_VERSION_REQUIRED DMC_VERSION(2, 2) +#define RKL_DMC_PATH DMC_PATH(rkl, 2, 03) +#define RKL_DMC_VERSION_REQUIRED DMC_VERSION(2, 3) MODULE_FIRMWARE(RKL_DMC_PATH); -#define TGL_DMC_PATH DMC_PATH(tgl, 2, 08) -#define TGL_DMC_VERSION_REQUIRED DMC_VERSION(2, 8) +#define TGL_DMC_PATH DMC_PATH(tgl, 2, 12) +#define TGL_DMC_VERSION_REQUIRED DMC_VERSION(2, 12) MODULE_FIRMWARE(TGL_DMC_PATH); #define ICL_DMC_PATH DMC_PATH(icl, 1, 09) @@ -66,11 +70,6 @@ MODULE_FIRMWARE(TGL_DMC_PATH); #define ICL_DMC_MAX_FW_SIZE 0x6000 MODULE_FIRMWARE(ICL_DMC_PATH); -#define CNL_DMC_PATH DMC_PATH(cnl, 1, 07) -#define CNL_DMC_VERSION_REQUIRED DMC_VERSION(1, 7) -#define CNL_DMC_MAX_FW_SIZE GLK_DMC_MAX_FW_SIZE -MODULE_FIRMWARE(CNL_DMC_PATH); - #define GLK_DMC_PATH DMC_PATH(glk, 1, 04) #define GLK_DMC_VERSION_REQUIRED DMC_VERSION(1, 4) #define GLK_DMC_MAX_FW_SIZE 0x4000 @@ -96,6 +95,7 @@ MODULE_FIRMWARE(BXT_DMC_PATH); #define PACKAGE_V2_MAX_FW_INFO_ENTRIES 32 #define DMC_V1_MAX_MMIO_COUNT 8 #define DMC_V3_MAX_MMIO_COUNT 20 +#define DMC_V1_MMIO_START_RANGE 0x80000 struct intel_css_header { /* 0x09 for DMC */ @@ -239,53 +239,18 @@ struct stepping_info { bool intel_dmc_has_payload(struct drm_i915_private *i915) { - return i915->dmc.dmc_payload; + return i915->dmc.dmc_info[DMC_FW_MAIN].payload; } -static const struct stepping_info skl_stepping_info[] = { - {'A', '0'}, {'B', '0'}, {'C', '0'}, - {'D', '0'}, {'E', '0'}, {'F', '0'}, - {'G', '0'}, {'H', '0'}, {'I', '0'}, - {'J', '0'}, {'K', '0'} -}; - -static const struct stepping_info bxt_stepping_info[] = { - {'A', '0'}, {'A', '1'}, {'A', '2'}, - {'B', '0'}, {'B', '1'}, {'B', '2'} -}; - -static const struct stepping_info icl_stepping_info[] = { - {'A', '0'}, {'A', '1'}, {'A', '2'}, - {'B', '0'}, {'B', '2'}, - {'C', '0'} -}; - -static const struct stepping_info no_stepping_info = { '*', '*' }; - static const struct stepping_info * -intel_get_stepping_info(struct drm_i915_private *dev_priv) +intel_get_stepping_info(struct drm_i915_private *i915, + struct stepping_info *si) { - const struct stepping_info *si; - unsigned int size; - - if (IS_ICELAKE(dev_priv)) { - size = ARRAY_SIZE(icl_stepping_info); - si = icl_stepping_info; - } else if (IS_SKYLAKE(dev_priv)) { - size = ARRAY_SIZE(skl_stepping_info); - si = skl_stepping_info; - } else if (IS_BROXTON(dev_priv)) { - size = ARRAY_SIZE(bxt_stepping_info); - si = bxt_stepping_info; - } else { - size = 0; - si = NULL; - } - - if (INTEL_REVID(dev_priv) < size) - return si + INTEL_REVID(dev_priv); + const char *step_name = intel_step_name(RUNTIME_INFO(i915)->step.display_step); - return &no_stepping_info; + si->stepping = step_name[0]; + si->substepping = step_name[1]; + return si; } static void gen9_set_dc_state_debugmask(struct drm_i915_private *dev_priv) @@ -316,8 +281,8 @@ static void gen9_set_dc_state_debugmask(struct drm_i915_private *dev_priv) */ void intel_dmc_load_program(struct drm_i915_private *dev_priv) { - u32 *payload = dev_priv->dmc.dmc_payload; - u32 i, fw_size; + struct intel_dmc *dmc = &dev_priv->dmc; + u32 id, i; if (!HAS_DMC(dev_priv)) { drm_err(&dev_priv->drm, @@ -325,26 +290,31 @@ void intel_dmc_load_program(struct drm_i915_private *dev_priv) return; } - if (!intel_dmc_has_payload(dev_priv)) { + if (!dev_priv->dmc.dmc_info[DMC_FW_MAIN].payload) { drm_err(&dev_priv->drm, "Tried to program CSR with empty payload\n"); return; } - fw_size = dev_priv->dmc.dmc_fw_size; assert_rpm_wakelock_held(&dev_priv->runtime_pm); preempt_disable(); - for (i = 0; i < fw_size; i++) - intel_uncore_write_fw(&dev_priv->uncore, DMC_PROGRAM(i), - payload[i]); + for (id = 0; id < DMC_FW_MAX; id++) { + for (i = 0; i < dmc->dmc_info[id].dmc_fw_size; i++) { + intel_uncore_write_fw(&dev_priv->uncore, + DMC_PROGRAM(dmc->dmc_info[id].start_mmioaddr, i), + dmc->dmc_info[id].payload[i]); + } + } preempt_enable(); - for (i = 0; i < dev_priv->dmc.mmio_count; i++) { - intel_de_write(dev_priv, dev_priv->dmc.mmioaddr[i], - dev_priv->dmc.mmiodata[i]); + for (id = 0; id < DMC_FW_MAX; id++) { + for (i = 0; i < dmc->dmc_info[id].mmio_count; i++) { + intel_de_write(dev_priv, dmc->dmc_info[id].mmioaddr[i], + dmc->dmc_info[id].mmiodata[i]); + } } dev_priv->dmc.dc_state = 0; @@ -352,62 +322,72 @@ void intel_dmc_load_program(struct drm_i915_private *dev_priv) gen9_set_dc_state_debugmask(dev_priv); } +static bool fw_info_matches_stepping(const struct intel_fw_info *fw_info, + const struct stepping_info *si) +{ + if ((fw_info->substepping == '*' && si->stepping == fw_info->stepping) || + (si->stepping == fw_info->stepping && si->substepping == fw_info->substepping) || + /* + * If we don't find a more specific one from above two checks, we + * then check for the generic one to be sure to work even with + * "broken firmware" + */ + (si->stepping == '*' && si->substepping == fw_info->substepping) || + (fw_info->stepping == '*' && fw_info->substepping == '*')) + return true; + + return false; +} + /* * Search fw_info table for dmc_offset to find firmware binary: num_entries is * already sanitized. */ -static u32 find_dmc_fw_offset(const struct intel_fw_info *fw_info, +static void dmc_set_fw_offset(struct intel_dmc *dmc, + const struct intel_fw_info *fw_info, unsigned int num_entries, const struct stepping_info *si, u8 package_ver) { - u32 dmc_offset = DMC_DEFAULT_FW_OFFSET; - unsigned int i; + unsigned int i, id; + + struct drm_i915_private *i915 = container_of(dmc, typeof(*i915), dmc); for (i = 0; i < num_entries; i++) { - if (package_ver > 1 && fw_info[i].dmc_id != 0) - continue; + id = package_ver <= 1 ? DMC_FW_MAIN : fw_info[i].dmc_id; - if (fw_info[i].substepping == '*' && - si->stepping == fw_info[i].stepping) { - dmc_offset = fw_info[i].offset; - break; + if (id >= DMC_FW_MAX) { + drm_dbg(&i915->drm, "Unsupported firmware id: %u\n", id); + continue; } - if (si->stepping == fw_info[i].stepping && - si->substepping == fw_info[i].substepping) { - dmc_offset = fw_info[i].offset; - break; - } + /* More specific versions come first, so we don't even have to + * check for the stepping since we already found a previous FW + * for this id. + */ + if (dmc->dmc_info[id].present) + continue; - if (fw_info[i].stepping == '*' && - fw_info[i].substepping == '*') { - /* - * In theory we should stop the search as generic - * entries should always come after the more specific - * ones, but let's continue to make sure to work even - * with "broken" firmwares. If we don't find a more - * specific one, then we use this entry - */ - dmc_offset = fw_info[i].offset; + if (fw_info_matches_stepping(&fw_info[i], si)) { + dmc->dmc_info[id].present = true; + dmc->dmc_info[id].dmc_offset = fw_info[i].offset; } } - - return dmc_offset; } static u32 parse_dmc_fw_header(struct intel_dmc *dmc, const struct intel_dmc_header_base *dmc_header, - size_t rem_size) + size_t rem_size, u8 dmc_id) { struct drm_i915_private *i915 = container_of(dmc, typeof(*i915), dmc); + struct dmc_fw_info *dmc_info = &dmc->dmc_info[dmc_id]; unsigned int header_len_bytes, dmc_header_size, payload_size, i; const u32 *mmioaddr, *mmiodata; - u32 mmio_count, mmio_count_max; + u32 mmio_count, mmio_count_max, start_mmioaddr; u8 *payload; - BUILD_BUG_ON(ARRAY_SIZE(dmc->mmioaddr) < DMC_V3_MAX_MMIO_COUNT || - ARRAY_SIZE(dmc->mmioaddr) < DMC_V1_MAX_MMIO_COUNT); + BUILD_BUG_ON(ARRAY_SIZE(dmc_info->mmioaddr) < DMC_V3_MAX_MMIO_COUNT || + ARRAY_SIZE(dmc_info->mmioaddr) < DMC_V1_MAX_MMIO_COUNT); /* * Check if we can access common fields, we will checkc again below @@ -430,6 +410,7 @@ static u32 parse_dmc_fw_header(struct intel_dmc *dmc, mmio_count_max = DMC_V3_MAX_MMIO_COUNT; /* header_len is in dwords */ header_len_bytes = dmc_header->header_len * 4; + start_mmioaddr = v3->start_mmioaddr; dmc_header_size = sizeof(*v3); } else if (dmc_header->header_ver == 1) { const struct intel_dmc_header_v1 *v1 = @@ -443,6 +424,7 @@ static u32 parse_dmc_fw_header(struct intel_dmc *dmc, mmio_count = v1->mmio_count; mmio_count_max = DMC_V1_MAX_MMIO_COUNT; header_len_bytes = dmc_header->header_len; + start_mmioaddr = DMC_V1_MMIO_START_RANGE; dmc_header_size = sizeof(*v1); } else { drm_err(&i915->drm, "Unknown DMC fw header version: %u\n", @@ -463,16 +445,11 @@ static u32 parse_dmc_fw_header(struct intel_dmc *dmc, } for (i = 0; i < mmio_count; i++) { - if (mmioaddr[i] < DMC_MMIO_START_RANGE || - mmioaddr[i] > DMC_MMIO_END_RANGE) { - drm_err(&i915->drm, "DMC firmware has wrong mmio address 0x%x\n", - mmioaddr[i]); - return 0; - } - dmc->mmioaddr[i] = _MMIO(mmioaddr[i]); - dmc->mmiodata[i] = mmiodata[i]; + dmc_info->mmioaddr[i] = _MMIO(mmioaddr[i]); + dmc_info->mmiodata[i] = mmiodata[i]; } - dmc->mmio_count = mmio_count; + dmc_info->mmio_count = mmio_count; + dmc_info->start_mmioaddr = start_mmioaddr; rem_size -= header_len_bytes; @@ -485,14 +462,14 @@ static u32 parse_dmc_fw_header(struct intel_dmc *dmc, drm_err(&i915->drm, "DMC FW too big (%u bytes)\n", payload_size); return 0; } - dmc->dmc_fw_size = dmc_header->fw_size; + dmc_info->dmc_fw_size = dmc_header->fw_size; - dmc->dmc_payload = kmalloc(payload_size, GFP_KERNEL); - if (!dmc->dmc_payload) + dmc_info->payload = kmalloc(payload_size, GFP_KERNEL); + if (!dmc_info->payload) return 0; payload = (u8 *)(dmc_header) + header_len_bytes; - memcpy(dmc->dmc_payload, payload, payload_size); + memcpy(dmc_info->payload, payload, payload_size); return header_len_bytes + payload_size; @@ -509,7 +486,7 @@ parse_dmc_fw_package(struct intel_dmc *dmc, { struct drm_i915_private *i915 = container_of(dmc, typeof(*i915), dmc); u32 package_size = sizeof(struct intel_package_header); - u32 num_entries, max_entries, dmc_offset; + u32 num_entries, max_entries; const struct intel_fw_info *fw_info; if (rem_size < package_size) @@ -545,16 +522,11 @@ parse_dmc_fw_package(struct intel_dmc *dmc, fw_info = (const struct intel_fw_info *) ((u8 *)package_header + sizeof(*package_header)); - dmc_offset = find_dmc_fw_offset(fw_info, num_entries, si, - package_header->header_ver); - if (dmc_offset == DMC_DEFAULT_FW_OFFSET) { - drm_err(&i915->drm, "DMC firmware not supported for %c stepping\n", - si->stepping); - return 0; - } + dmc_set_fw_offset(dmc, fw_info, num_entries, si, + package_header->header_ver); /* dmc_offset is in dwords */ - return package_size + dmc_offset * 4; + return package_size; error_truncated: drm_err(&i915->drm, "Truncated DMC firmware, refusing.\n"); @@ -604,9 +576,11 @@ static void parse_dmc_fw(struct drm_i915_private *dev_priv, struct intel_package_header *package_header; struct intel_dmc_header_base *dmc_header; struct intel_dmc *dmc = &dev_priv->dmc; - const struct stepping_info *si = intel_get_stepping_info(dev_priv); + struct stepping_info display_info = { '*', '*'}; + const struct stepping_info *si = intel_get_stepping_info(dev_priv, &display_info); u32 readcount = 0; - u32 r; + u32 r, offset; + int id; if (!fw) return; @@ -627,9 +601,19 @@ static void parse_dmc_fw(struct drm_i915_private *dev_priv, readcount += r; - /* Extract dmc_header information */ - dmc_header = (struct intel_dmc_header_base *)&fw->data[readcount]; - parse_dmc_fw_header(dmc, dmc_header, fw->size - readcount); + for (id = 0; id < DMC_FW_MAX; id++) { + if (!dev_priv->dmc.dmc_info[id].present) + continue; + + offset = readcount + dmc->dmc_info[id].dmc_offset * 4; + if (fw->size - offset < 0) { + drm_err(&dev_priv->drm, "Reading beyond the fw_size\n"); + continue; + } + + dmc_header = (struct intel_dmc_header_base *)&fw->data[offset]; + parse_dmc_fw_header(dmc, dmc_header, fw->size - offset, id); + } } static void intel_dmc_runtime_pm_get(struct drm_i915_private *dev_priv) @@ -705,7 +689,11 @@ void intel_dmc_ucode_init(struct drm_i915_private *dev_priv) */ intel_dmc_runtime_pm_get(dev_priv); - if (IS_ALDERLAKE_S(dev_priv)) { + if (IS_ALDERLAKE_P(dev_priv)) { + dmc->fw_path = ADLP_DMC_PATH; + dmc->required_version = ADLP_DMC_VERSION_REQUIRED; + dmc->max_fw_size = GEN12_DMC_MAX_FW_SIZE; + } else if (IS_ALDERLAKE_S(dev_priv)) { dmc->fw_path = ADLS_DMC_PATH; dmc->required_version = ADLS_DMC_VERSION_REQUIRED; dmc->max_fw_size = GEN12_DMC_MAX_FW_SIZE; @@ -725,10 +713,6 @@ void intel_dmc_ucode_init(struct drm_i915_private *dev_priv) dmc->fw_path = ICL_DMC_PATH; dmc->required_version = ICL_DMC_VERSION_REQUIRED; dmc->max_fw_size = ICL_DMC_MAX_FW_SIZE; - } else if (IS_CANNONLAKE(dev_priv)) { - dmc->fw_path = CNL_DMC_PATH; - dmc->required_version = CNL_DMC_VERSION_REQUIRED; - dmc->max_fw_size = CNL_DMC_MAX_FW_SIZE; } else if (IS_GEMINILAKE(dev_priv)) { dmc->fw_path = GLK_DMC_PATH; dmc->required_version = GLK_DMC_VERSION_REQUIRED; @@ -827,5 +811,5 @@ void intel_dmc_ucode_fini(struct drm_i915_private *dev_priv) intel_dmc_ucode_suspend(dev_priv); drm_WARN_ON(&dev_priv->drm, dev_priv->dmc.wakeref); - kfree(dev_priv->dmc.dmc_payload); + kfree(dev_priv->dmc.dmc_info[DMC_FW_MAIN].payload); } diff --git a/drivers/gpu/drm/i915/display/intel_dmc.h b/drivers/gpu/drm/i915/display/intel_dmc.h index 4c22f567b61b..c3c00ff03869 100644 --- a/drivers/gpu/drm/i915/display/intel_dmc.h +++ b/drivers/gpu/drm/i915/display/intel_dmc.h @@ -16,17 +16,30 @@ struct drm_i915_private; #define DMC_VERSION_MAJOR(version) ((version) >> 16) #define DMC_VERSION_MINOR(version) ((version) & 0xffff) +enum { + DMC_FW_MAIN = 0, + DMC_FW_PIPEA, + DMC_FW_PIPEB, + DMC_FW_MAX +}; + struct intel_dmc { struct work_struct work; const char *fw_path; u32 required_version; u32 max_fw_size; /* bytes */ - u32 *dmc_payload; - u32 dmc_fw_size; /* dwords */ u32 version; - u32 mmio_count; - i915_reg_t mmioaddr[20]; - u32 mmiodata[20]; + struct dmc_fw_info { + u32 mmio_count; + i915_reg_t mmioaddr[20]; + u32 mmiodata[20]; + u32 dmc_offset; + u32 start_mmioaddr; + u32 dmc_fw_size; /*dwords */ + u32 *payload; + bool present; + } dmc_info[DMC_FW_MAX]; + u32 dc_state; u32 target_dc_state; u32 allowed_dc_mask; diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c index 862c1df69cc2..04175f359fd6 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.c +++ b/drivers/gpu/drm/i915/display/intel_dp.c @@ -222,29 +222,6 @@ bool intel_dp_can_bigjoiner(struct intel_dp *intel_dp) encoder->port != PORT_A); } -static int cnl_max_source_rate(struct intel_dp *intel_dp) -{ - struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp); - struct drm_i915_private *dev_priv = to_i915(dig_port->base.base.dev); - enum port port = dig_port->base.port; - - u32 voltage = intel_de_read(dev_priv, CNL_PORT_COMP_DW3) & VOLTAGE_INFO_MASK; - - /* Low voltage SKUs are limited to max of 5.4G */ - if (voltage == VOLTAGE_INFO_0_85V) - return 540000; - - /* For this SKU 8.1G is supported in all ports */ - if (IS_CNL_WITH_PORT_F(dev_priv)) - return 810000; - - /* For other SKUs, max rate on ports A and D is 5.4G */ - if (port == PORT_A || port == PORT_D) - return 540000; - - return 810000; -} - static int icl_max_source_rate(struct intel_dp *intel_dp) { struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp); @@ -270,7 +247,7 @@ static void intel_dp_set_source_rates(struct intel_dp *intel_dp) { /* The values must be in increasing order */ - static const int cnl_rates[] = { + static const int icl_rates[] = { 162000, 216000, 270000, 324000, 432000, 540000, 648000, 810000 }; static const int bxt_rates[] = { @@ -295,12 +272,10 @@ intel_dp_set_source_rates(struct intel_dp *intel_dp) drm_WARN_ON(&dev_priv->drm, intel_dp->source_rates || intel_dp->num_source_rates); - if (DISPLAY_VER(dev_priv) >= 11 || IS_CANNONLAKE(dev_priv)) { - source_rates = cnl_rates; - size = ARRAY_SIZE(cnl_rates); - if (DISPLAY_VER(dev_priv) == 10) - max_rate = cnl_max_source_rate(intel_dp); - else if (IS_JSL_EHL(dev_priv)) + if (DISPLAY_VER(dev_priv) >= 11) { + source_rates = icl_rates; + size = ARRAY_SIZE(icl_rates); + if (IS_JSL_EHL(dev_priv)) max_rate = ehl_max_source_rate(intel_dp); else max_rate = icl_max_source_rate(intel_dp); @@ -1274,6 +1249,23 @@ static int intel_dp_dsc_compute_config(struct intel_dp *intel_dp, pipe_config->pipe_bpp); pipe_config->dsc.slice_count = dsc_dp_slice_count; } + + /* As of today we support DSC for only RGB */ + if (intel_dp->force_dsc_bpp) { + if (intel_dp->force_dsc_bpp >= 8 && + intel_dp->force_dsc_bpp < pipe_bpp) { + drm_dbg_kms(&dev_priv->drm, + "DSC BPP forced to %d", + intel_dp->force_dsc_bpp); + pipe_config->dsc.compressed_bpp = + intel_dp->force_dsc_bpp; + } else { + drm_dbg_kms(&dev_priv->drm, + "Invalid DSC BPP %d", + intel_dp->force_dsc_bpp); + } + } + /* * VDSC engine operates at 1 Pixel per clock, so if peak pixel rate * is greater than the maximum Cdclock and if slice count is even @@ -3031,9 +3023,6 @@ void intel_read_dp_sdp(struct intel_encoder *encoder, struct intel_crtc_state *crtc_state, unsigned int type) { - if (encoder->type != INTEL_OUTPUT_DDI) - return; - switch (type) { case DP_SDP_VSC: intel_read_dp_vsc_sdp(encoder, crtc_state, @@ -3342,6 +3331,9 @@ static void intel_dp_process_phy_request(struct intel_dp *intel_dp, intel_dp_autotest_phy_ddi_enable(intel_dp, crtc_state); + drm_dp_dpcd_write(&intel_dp->aux, DP_TRAINING_LANE0_SET, + intel_dp->train_set, crtc_state->lane_count); + drm_dp_set_phy_test_pattern(&intel_dp->aux, data, link_status[DP_DPCD_REV]); } @@ -4736,7 +4728,7 @@ static void intel_dp_set_drrs_state(struct drm_i915_private *dev_priv, int refresh_rate) { struct intel_dp *intel_dp = dev_priv->drrs.dp; - struct intel_crtc *intel_crtc = to_intel_crtc(crtc_state->uapi.crtc); + struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); enum drrs_refresh_rate_type index = DRRS_HIGH_RR; if (refresh_rate <= 0) { @@ -4750,7 +4742,7 @@ static void intel_dp_set_drrs_state(struct drm_i915_private *dev_priv, return; } - if (!intel_crtc) { + if (!crtc) { drm_dbg_kms(&dev_priv->drm, "DRRS: intel_crtc not initialized\n"); return; @@ -5233,7 +5225,8 @@ static bool intel_edp_init_connector(struct intel_dp *intel_dp, } intel_panel_init(&intel_connector->panel, fixed_mode, downclock_mode); - intel_connector->panel.backlight.power = intel_pps_backlight_power; + if (!(dev_priv->quirks & QUIRK_NO_PPS_BACKLIGHT_POWER_HOOK)) + intel_connector->panel.backlight.power = intel_pps_backlight_power; intel_panel_setup_backlight(connector, pipe); if (fixed_mode) { diff --git a/drivers/gpu/drm/i915/display/intel_dp_aux.c b/drivers/gpu/drm/i915/display/intel_dp_aux.c index 7c048d2ecf43..f483f479dd0b 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_aux.c +++ b/drivers/gpu/drm/i915/display/intel_dp_aux.c @@ -158,7 +158,6 @@ static u32 skl_get_aux_send_ctl(struct intel_dp *intel_dp, /* * Max timeout values: * SKL-GLK: 1.6ms - * CNL: 3.2ms * ICL+: 4ms */ ret = DP_AUX_CH_CTL_SEND_BUSY | diff --git a/drivers/gpu/drm/i915/display/intel_dp_aux_backlight.c b/drivers/gpu/drm/i915/display/intel_dp_aux_backlight.c index 8e9ac9ba1d38..6ac568617ef3 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_aux_backlight.c +++ b/drivers/gpu/drm/i915/display/intel_dp_aux_backlight.c @@ -107,7 +107,7 @@ intel_dp_aux_supports_hdr_backlight(struct intel_connector *connector) u8 tcon_cap[4]; ret = drm_dp_dpcd_read(aux, INTEL_EDP_HDR_TCON_CAP0, tcon_cap, sizeof(tcon_cap)); - if (ret < 0) + if (ret != sizeof(tcon_cap)) return false; if (!(tcon_cap[1] & INTEL_EDP_HDR_TCON_BRIGHTNESS_NITS_CAP)) @@ -137,7 +137,7 @@ intel_dp_aux_hdr_get_backlight(struct intel_connector *connector, enum pipe pipe u8 tmp; u8 buf[2] = { 0 }; - if (drm_dp_dpcd_readb(&intel_dp->aux, INTEL_EDP_HDR_GETSET_CTRL_PARAMS, &tmp) < 0) { + if (drm_dp_dpcd_readb(&intel_dp->aux, INTEL_EDP_HDR_GETSET_CTRL_PARAMS, &tmp) != 1) { drm_err(&i915->drm, "Failed to read current backlight mode from DPCD\n"); return 0; } @@ -153,7 +153,8 @@ intel_dp_aux_hdr_get_backlight(struct intel_connector *connector, enum pipe pipe return panel->backlight.max; } - if (drm_dp_dpcd_read(&intel_dp->aux, INTEL_EDP_BRIGHTNESS_NITS_LSB, buf, sizeof(buf)) < 0) { + if (drm_dp_dpcd_read(&intel_dp->aux, INTEL_EDP_BRIGHTNESS_NITS_LSB, buf, + sizeof(buf)) != sizeof(buf)) { drm_err(&i915->drm, "Failed to read brightness from DPCD\n"); return 0; } @@ -172,7 +173,8 @@ intel_dp_aux_hdr_set_aux_backlight(const struct drm_connector_state *conn_state, buf[0] = level & 0xFF; buf[1] = (level & 0xFF00) >> 8; - if (drm_dp_dpcd_write(&intel_dp->aux, INTEL_EDP_BRIGHTNESS_NITS_LSB, buf, 4) < 0) + if (drm_dp_dpcd_write(&intel_dp->aux, INTEL_EDP_BRIGHTNESS_NITS_LSB, buf, + sizeof(buf)) != sizeof(buf)) drm_err(dev, "Failed to write brightness level to DPCD\n"); } @@ -203,7 +205,7 @@ intel_dp_aux_hdr_enable_backlight(const struct intel_crtc_state *crtc_state, u8 old_ctrl, ctrl; ret = drm_dp_dpcd_readb(&intel_dp->aux, INTEL_EDP_HDR_GETSET_CTRL_PARAMS, &old_ctrl); - if (ret < 0) { + if (ret != 1) { drm_err(&i915->drm, "Failed to read current backlight control mode: %d\n", ret); return; } @@ -221,7 +223,7 @@ intel_dp_aux_hdr_enable_backlight(const struct intel_crtc_state *crtc_state, } if (ctrl != old_ctrl) - if (drm_dp_dpcd_writeb(&intel_dp->aux, INTEL_EDP_HDR_GETSET_CTRL_PARAMS, ctrl) < 0) + if (drm_dp_dpcd_writeb(&intel_dp->aux, INTEL_EDP_HDR_GETSET_CTRL_PARAMS, ctrl) != 1) drm_err(&i915->drm, "Failed to configure DPCD brightness controls\n"); } @@ -268,153 +270,19 @@ intel_dp_aux_hdr_setup_backlight(struct intel_connector *connector, enum pipe pi } /* VESA backlight callbacks */ -static void set_vesa_backlight_enable(struct intel_dp *intel_dp, bool enable) -{ - struct drm_i915_private *i915 = dp_to_i915(intel_dp); - u8 reg_val = 0; - - /* Early return when display use other mechanism to enable backlight. */ - if (!(intel_dp->edp_dpcd[1] & DP_EDP_BACKLIGHT_AUX_ENABLE_CAP)) - return; - - if (drm_dp_dpcd_readb(&intel_dp->aux, DP_EDP_DISPLAY_CONTROL_REGISTER, - ®_val) < 0) { - drm_dbg_kms(&i915->drm, "Failed to read DPCD register 0x%x\n", - DP_EDP_DISPLAY_CONTROL_REGISTER); - return; - } - if (enable) - reg_val |= DP_EDP_BACKLIGHT_ENABLE; - else - reg_val &= ~(DP_EDP_BACKLIGHT_ENABLE); - - if (drm_dp_dpcd_writeb(&intel_dp->aux, DP_EDP_DISPLAY_CONTROL_REGISTER, - reg_val) != 1) { - drm_dbg_kms(&i915->drm, "Failed to %s aux backlight\n", - enabledisable(enable)); - } -} - -static bool intel_dp_aux_vesa_backlight_dpcd_mode(struct intel_connector *connector) -{ - struct intel_dp *intel_dp = intel_attached_dp(connector); - struct drm_i915_private *i915 = dp_to_i915(intel_dp); - u8 mode_reg; - - if (drm_dp_dpcd_readb(&intel_dp->aux, - DP_EDP_BACKLIGHT_MODE_SET_REGISTER, - &mode_reg) != 1) { - drm_dbg_kms(&i915->drm, - "Failed to read the DPCD register 0x%x\n", - DP_EDP_BACKLIGHT_MODE_SET_REGISTER); - return false; - } - - return (mode_reg & DP_EDP_BACKLIGHT_CONTROL_MODE_MASK) == - DP_EDP_BACKLIGHT_CONTROL_MODE_DPCD; -} - -/* - * Read the current backlight value from DPCD register(s) based - * on if 8-bit(MSB) or 16-bit(MSB and LSB) values are supported - */ static u32 intel_dp_aux_vesa_get_backlight(struct intel_connector *connector, enum pipe unused) { - struct intel_dp *intel_dp = intel_attached_dp(connector); - struct drm_i915_private *i915 = dp_to_i915(intel_dp); - u8 read_val[2] = { 0x0 }; - u16 level = 0; - - /* - * If we're not in DPCD control mode yet, the programmed brightness - * value is meaningless and we should assume max brightness - */ - if (!intel_dp_aux_vesa_backlight_dpcd_mode(connector)) - return connector->panel.backlight.max; - - if (drm_dp_dpcd_read(&intel_dp->aux, DP_EDP_BACKLIGHT_BRIGHTNESS_MSB, - &read_val, sizeof(read_val)) < 0) { - drm_dbg_kms(&i915->drm, "Failed to read DPCD register 0x%x\n", - DP_EDP_BACKLIGHT_BRIGHTNESS_MSB); - return 0; - } - level = read_val[0]; - if (intel_dp->edp_dpcd[2] & DP_EDP_BACKLIGHT_BRIGHTNESS_BYTE_COUNT) - level = (read_val[0] << 8 | read_val[1]); - - return level; + return connector->panel.backlight.level; } -/* - * Sends the current backlight level over the aux channel, checking if its using - * 8-bit or 16 bit value (MSB and LSB) - */ static void -intel_dp_aux_vesa_set_backlight(const struct drm_connector_state *conn_state, - u32 level) +intel_dp_aux_vesa_set_backlight(const struct drm_connector_state *conn_state, u32 level) { struct intel_connector *connector = to_intel_connector(conn_state->connector); - struct intel_dp *intel_dp = intel_attached_dp(connector); - struct drm_i915_private *i915 = dp_to_i915(intel_dp); - u8 vals[2] = { 0x0 }; - - vals[0] = level; - - /* Write the MSB and/or LSB */ - if (intel_dp->edp_dpcd[2] & DP_EDP_BACKLIGHT_BRIGHTNESS_BYTE_COUNT) { - vals[0] = (level & 0xFF00) >> 8; - vals[1] = (level & 0xFF); - } - if (drm_dp_dpcd_write(&intel_dp->aux, DP_EDP_BACKLIGHT_BRIGHTNESS_MSB, - vals, sizeof(vals)) < 0) { - drm_dbg_kms(&i915->drm, - "Failed to write aux backlight level\n"); - return; - } -} - -/* - * Set PWM Frequency divider to match desired frequency in vbt. - * The PWM Frequency is calculated as 27Mhz / (F x P). - * - Where F = PWM Frequency Pre-Divider value programmed by field 7:0 of the - * EDP_BACKLIGHT_FREQ_SET register (DPCD Address 00728h) - * - Where P = 2^Pn, where Pn is the value programmed by field 4:0 of the - * EDP_PWMGEN_BIT_COUNT register (DPCD Address 00724h) - */ -static bool intel_dp_aux_vesa_set_pwm_freq(struct intel_connector *connector) -{ - struct drm_i915_private *dev_priv = to_i915(connector->base.dev); - struct intel_dp *intel_dp = intel_attached_dp(connector); - const u8 pn = connector->panel.backlight.edp.vesa.pwmgen_bit_count; - int freq, fxp, f, fxp_actual, fxp_min, fxp_max; - - freq = dev_priv->vbt.backlight.pwm_freq_hz; - if (!freq) { - drm_dbg_kms(&dev_priv->drm, - "Use panel default backlight frequency\n"); - return false; - } - - fxp = DIV_ROUND_CLOSEST(KHz(DP_EDP_BACKLIGHT_FREQ_BASE_KHZ), freq); - f = clamp(DIV_ROUND_CLOSEST(fxp, 1 << pn), 1, 255); - fxp_actual = f << pn; - - /* Ensure frequency is within 25% of desired value */ - fxp_min = DIV_ROUND_CLOSEST(fxp * 3, 4); - fxp_max = DIV_ROUND_CLOSEST(fxp * 5, 4); - - if (fxp_min > fxp_actual || fxp_actual > fxp_max) { - drm_dbg_kms(&dev_priv->drm, "Actual frequency out of range\n"); - return false; - } + struct intel_panel *panel = &connector->panel; + struct intel_dp *intel_dp = enc_to_intel_dp(connector->encoder); - if (drm_dp_dpcd_writeb(&intel_dp->aux, - DP_EDP_BACKLIGHT_FREQ_SET, (u8) f) < 0) { - drm_dbg_kms(&dev_priv->drm, - "Failed to write aux backlight freq\n"); - return false; - } - return true; + drm_edp_backlight_set_level(&intel_dp->aux, &panel->backlight.edp.vesa.info, level); } static void @@ -422,159 +290,46 @@ intel_dp_aux_vesa_enable_backlight(const struct intel_crtc_state *crtc_state, const struct drm_connector_state *conn_state, u32 level) { struct intel_connector *connector = to_intel_connector(conn_state->connector); - struct intel_dp *intel_dp = intel_attached_dp(connector); - struct drm_i915_private *i915 = dp_to_i915(intel_dp); struct intel_panel *panel = &connector->panel; - u8 dpcd_buf, new_dpcd_buf, edp_backlight_mode; - u8 pwmgen_bit_count = panel->backlight.edp.vesa.pwmgen_bit_count; - - if (drm_dp_dpcd_readb(&intel_dp->aux, - DP_EDP_BACKLIGHT_MODE_SET_REGISTER, &dpcd_buf) != 1) { - drm_dbg_kms(&i915->drm, "Failed to read DPCD register 0x%x\n", - DP_EDP_BACKLIGHT_MODE_SET_REGISTER); - return; - } - - new_dpcd_buf = dpcd_buf; - edp_backlight_mode = dpcd_buf & DP_EDP_BACKLIGHT_CONTROL_MODE_MASK; - - switch (edp_backlight_mode) { - case DP_EDP_BACKLIGHT_CONTROL_MODE_PWM: - case DP_EDP_BACKLIGHT_CONTROL_MODE_PRESET: - case DP_EDP_BACKLIGHT_CONTROL_MODE_PRODUCT: - new_dpcd_buf &= ~DP_EDP_BACKLIGHT_CONTROL_MODE_MASK; - new_dpcd_buf |= DP_EDP_BACKLIGHT_CONTROL_MODE_DPCD; - - if (drm_dp_dpcd_writeb(&intel_dp->aux, - DP_EDP_PWMGEN_BIT_COUNT, - pwmgen_bit_count) < 0) - drm_dbg_kms(&i915->drm, - "Failed to write aux pwmgen bit count\n"); - - break; - - /* Do nothing when it is already DPCD mode */ - case DP_EDP_BACKLIGHT_CONTROL_MODE_DPCD: - default: - break; - } - - if (intel_dp->edp_dpcd[2] & DP_EDP_BACKLIGHT_FREQ_AUX_SET_CAP) - if (intel_dp_aux_vesa_set_pwm_freq(connector)) - new_dpcd_buf |= DP_EDP_BACKLIGHT_FREQ_AUX_SET_ENABLE; - - if (new_dpcd_buf != dpcd_buf) { - if (drm_dp_dpcd_writeb(&intel_dp->aux, - DP_EDP_BACKLIGHT_MODE_SET_REGISTER, new_dpcd_buf) < 0) { - drm_dbg_kms(&i915->drm, - "Failed to write aux backlight mode\n"); - } - } + struct intel_dp *intel_dp = enc_to_intel_dp(connector->encoder); - intel_dp_aux_vesa_set_backlight(conn_state, level); - set_vesa_backlight_enable(intel_dp, true); + drm_edp_backlight_enable(&intel_dp->aux, &panel->backlight.edp.vesa.info, level); } static void intel_dp_aux_vesa_disable_backlight(const struct drm_connector_state *old_conn_state, u32 level) { - set_vesa_backlight_enable(enc_to_intel_dp(to_intel_encoder(old_conn_state->best_encoder)), - false); -} - -static u32 intel_dp_aux_vesa_calc_max_backlight(struct intel_connector *connector) -{ - struct drm_i915_private *i915 = to_i915(connector->base.dev); - struct intel_dp *intel_dp = intel_attached_dp(connector); + struct intel_connector *connector = to_intel_connector(old_conn_state->connector); struct intel_panel *panel = &connector->panel; - u32 max_backlight = 0; - int freq, fxp, fxp_min, fxp_max, fxp_actual, f = 1; - u8 pn, pn_min, pn_max; - - if (drm_dp_dpcd_readb(&intel_dp->aux, DP_EDP_PWMGEN_BIT_COUNT, &pn) == 1) { - pn &= DP_EDP_PWMGEN_BIT_COUNT_MASK; - max_backlight = (1 << pn) - 1; - } - - /* Find desired value of (F x P) - * Note that, if F x P is out of supported range, the maximum value or - * minimum value will applied automatically. So no need to check that. - */ - freq = i915->vbt.backlight.pwm_freq_hz; - drm_dbg_kms(&i915->drm, "VBT defined backlight frequency %u Hz\n", - freq); - if (!freq) { - drm_dbg_kms(&i915->drm, - "Use panel default backlight frequency\n"); - return max_backlight; - } - - fxp = DIV_ROUND_CLOSEST(KHz(DP_EDP_BACKLIGHT_FREQ_BASE_KHZ), freq); - - /* Use highest possible value of Pn for more granularity of brightness - * adjustment while satifying the conditions below. - * - Pn is in the range of Pn_min and Pn_max - * - F is in the range of 1 and 255 - * - FxP is within 25% of desired value. - * Note: 25% is arbitrary value and may need some tweak. - */ - if (drm_dp_dpcd_readb(&intel_dp->aux, - DP_EDP_PWMGEN_BIT_COUNT_CAP_MIN, &pn_min) != 1) { - drm_dbg_kms(&i915->drm, - "Failed to read pwmgen bit count cap min\n"); - return max_backlight; - } - if (drm_dp_dpcd_readb(&intel_dp->aux, - DP_EDP_PWMGEN_BIT_COUNT_CAP_MAX, &pn_max) != 1) { - drm_dbg_kms(&i915->drm, - "Failed to read pwmgen bit count cap max\n"); - return max_backlight; - } - pn_min &= DP_EDP_PWMGEN_BIT_COUNT_MASK; - pn_max &= DP_EDP_PWMGEN_BIT_COUNT_MASK; - - fxp_min = DIV_ROUND_CLOSEST(fxp * 3, 4); - fxp_max = DIV_ROUND_CLOSEST(fxp * 5, 4); - if (fxp_min < (1 << pn_min) || (255 << pn_max) < fxp_max) { - drm_dbg_kms(&i915->drm, - "VBT defined backlight frequency out of range\n"); - return max_backlight; - } - - for (pn = pn_max; pn >= pn_min; pn--) { - f = clamp(DIV_ROUND_CLOSEST(fxp, 1 << pn), 1, 255); - fxp_actual = f << pn; - if (fxp_min <= fxp_actual && fxp_actual <= fxp_max) - break; - } - - drm_dbg_kms(&i915->drm, "Using eDP pwmgen bit count of %d\n", pn); - if (drm_dp_dpcd_writeb(&intel_dp->aux, - DP_EDP_PWMGEN_BIT_COUNT, pn) < 0) { - drm_dbg_kms(&i915->drm, - "Failed to write aux pwmgen bit count\n"); - return max_backlight; - } - panel->backlight.edp.vesa.pwmgen_bit_count = pn; - - max_backlight = (1 << pn) - 1; + struct intel_dp *intel_dp = enc_to_intel_dp(connector->encoder); - return max_backlight; + drm_edp_backlight_disable(&intel_dp->aux, &panel->backlight.edp.vesa.info); } -static int intel_dp_aux_vesa_setup_backlight(struct intel_connector *connector, - enum pipe pipe) +static int intel_dp_aux_vesa_setup_backlight(struct intel_connector *connector, enum pipe pipe) { + struct intel_dp *intel_dp = intel_attached_dp(connector); struct intel_panel *panel = &connector->panel; + struct drm_i915_private *i915 = dp_to_i915(intel_dp); + u16 current_level; + u8 current_mode; + int ret; - panel->backlight.max = intel_dp_aux_vesa_calc_max_backlight(connector); - if (!panel->backlight.max) - return -ENODEV; + ret = drm_edp_backlight_init(&intel_dp->aux, &panel->backlight.edp.vesa.info, + i915->vbt.backlight.pwm_freq_hz, intel_dp->edp_dpcd, + ¤t_level, ¤t_mode); + if (ret < 0) + return ret; + panel->backlight.max = panel->backlight.edp.vesa.info.max; panel->backlight.min = 0; - panel->backlight.level = intel_dp_aux_vesa_get_backlight(connector, pipe); - panel->backlight.enabled = intel_dp_aux_vesa_backlight_dpcd_mode(connector) && - panel->backlight.level != 0; + if (current_mode == DP_EDP_BACKLIGHT_CONTROL_MODE_DPCD) { + panel->backlight.level = current_level; + panel->backlight.enabled = panel->backlight.level != 0; + } else { + panel->backlight.level = panel->backlight.max; + panel->backlight.enabled = false; + } return 0; } @@ -585,16 +340,12 @@ intel_dp_aux_supports_vesa_backlight(struct intel_connector *connector) struct intel_dp *intel_dp = intel_attached_dp(connector); struct drm_i915_private *i915 = dp_to_i915(intel_dp); - /* Check the eDP Display control capabilities registers to determine if - * the panel can support backlight control over the aux channel. - * - * TODO: We currently only support AUX only backlight configurations, not backlights which + /* TODO: We currently only support AUX only backlight configurations, not backlights which * require a mix of PWM and AUX controls to work. In the mean time, these machines typically * work just fine using normal PWM controls anyway. */ - if (intel_dp->edp_dpcd[1] & DP_EDP_TCON_BACKLIGHT_ADJUSTMENT_CAP && - (intel_dp->edp_dpcd[1] & DP_EDP_BACKLIGHT_AUX_ENABLE_CAP) && - (intel_dp->edp_dpcd[2] & DP_EDP_BACKLIGHT_BRIGHTNESS_AUX_SET_CAP)) { + if ((intel_dp->edp_dpcd[1] & DP_EDP_BACKLIGHT_AUX_ENABLE_CAP) && + drm_edp_backlight_supported(intel_dp->edp_dpcd)) { drm_dbg_kms(&i915->drm, "AUX Backlight Control Supported!\n"); return true; } diff --git a/drivers/gpu/drm/i915/display/intel_dp_mst.c b/drivers/gpu/drm/i915/display/intel_dp_mst.c index b170e272bdee..8d13d7b26a25 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_mst.c +++ b/drivers/gpu/drm/i915/display/intel_dp_mst.c @@ -308,9 +308,9 @@ intel_dp_mst_atomic_check(struct drm_connector *connector, * connector */ if (new_crtc) { - struct intel_crtc *intel_crtc = to_intel_crtc(new_crtc); + struct intel_crtc *crtc = to_intel_crtc(new_crtc); struct intel_crtc_state *crtc_state = - intel_atomic_get_new_crtc_state(state, intel_crtc); + intel_atomic_get_new_crtc_state(state, crtc); if (!crtc_state || !drm_atomic_crtc_needs_modeset(&crtc_state->uapi) || @@ -348,6 +348,16 @@ static void wait_for_act_sent(struct intel_encoder *encoder, drm_dp_check_act_status(&intel_dp->mst_mgr); } +static void intel_mst_pre_disable_dp(struct intel_atomic_state *state, + struct intel_encoder *encoder, + const struct intel_crtc_state *old_crtc_state, + const struct drm_connector_state *old_conn_state) +{ + if (old_crtc_state->has_audio) + intel_audio_codec_disable(encoder, old_crtc_state, + old_conn_state); +} + static void intel_mst_disable_dp(struct intel_atomic_state *state, struct intel_encoder *encoder, const struct intel_crtc_state *old_crtc_state, @@ -372,9 +382,6 @@ static void intel_mst_disable_dp(struct intel_atomic_state *state, if (ret) { drm_dbg_kms(&i915->drm, "failed to update payload %d\n", ret); } - if (old_crtc_state->has_audio) - intel_audio_codec_disable(encoder, - old_crtc_state, old_conn_state); } static void intel_mst_post_disable_dp(struct intel_atomic_state *state, @@ -542,7 +549,7 @@ static void intel_mst_enable_dp(struct intel_atomic_state *state, struct intel_digital_port *dig_port = intel_mst->primary; struct intel_dp *intel_dp = &dig_port->dp; struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - u32 val; + enum transcoder trans = pipe_config->cpu_transcoder; drm_WARN_ON(&dev_priv->drm, pipe_config->has_pch_encoder); @@ -550,12 +557,8 @@ static void intel_mst_enable_dp(struct intel_atomic_state *state, intel_ddi_enable_transcoder_func(encoder, pipe_config); - val = intel_de_read(dev_priv, - TRANS_DDI_FUNC_CTL(pipe_config->cpu_transcoder)); - val |= TRANS_DDI_DP_VC_PAYLOAD_ALLOC; - intel_de_write(dev_priv, - TRANS_DDI_FUNC_CTL(pipe_config->cpu_transcoder), - val); + intel_de_rmw(dev_priv, TRANS_DDI_FUNC_CTL(trans), 0, + TRANS_DDI_DP_VC_PAYLOAD_ALLOC); drm_dbg_kms(&dev_priv->drm, "active links %d\n", intel_dp->active_mst_links); @@ -564,6 +567,10 @@ static void intel_mst_enable_dp(struct intel_atomic_state *state, drm_dp_update_payload_part2(&intel_dp->mst_mgr); + if (DISPLAY_VER(dev_priv) >= 12 && pipe_config->fec_enable) + intel_de_rmw(dev_priv, CHICKEN_TRANS(trans), 0, + FECSTALL_DIS_DPTSTREAM_DPTTG); + intel_enable_pipe(pipe_config); intel_crtc_vblank_on(pipe_config); @@ -835,13 +842,10 @@ static struct drm_connector *intel_dp_add_mst_connector(struct drm_dp_mst_topolo intel_attach_force_audio_property(connector); intel_attach_broadcast_rgb_property(connector); - if (DISPLAY_VER(dev_priv) <= 12) { - ret = intel_dp_hdcp_init(dig_port, intel_connector); - if (ret) - drm_dbg_kms(&dev_priv->drm, "[%s:%d] HDCP MST init failed, skipping.\n", - connector->name, connector->base.id); - } - + ret = intel_dp_hdcp_init(dig_port, intel_connector); + if (ret) + drm_dbg_kms(&dev_priv->drm, "[%s:%d] HDCP MST init failed, skipping.\n", + connector->name, connector->base.id); /* * Reuse the prop from the SST connector because we're * not allowed to create new props after device registration. @@ -906,6 +910,7 @@ intel_dp_create_fake_mst_encoder(struct intel_digital_port *dig_port, enum pipe intel_encoder->compute_config = intel_dp_mst_compute_config; intel_encoder->compute_config_late = intel_dp_mst_compute_config_late; + intel_encoder->pre_disable = intel_mst_pre_disable_dp; intel_encoder->disable = intel_mst_disable_dp; intel_encoder->post_disable = intel_mst_post_disable_dp; intel_encoder->update_pipe = intel_ddi_update_pipe; diff --git a/drivers/gpu/drm/i915/display/intel_dpll.c b/drivers/gpu/drm/i915/display/intel_dpll.c index 89635da9f6f6..14515e62c05e 100644 --- a/drivers/gpu/drm/i915/display/intel_dpll.c +++ b/drivers/gpu/drm/i915/display/intel_dpll.c @@ -11,6 +11,7 @@ #include "intel_lvds.h" #include "intel_panel.h" #include "intel_sideband.h" +#include "display/intel_snps_phy.h" struct intel_limit { struct { @@ -923,12 +924,13 @@ static int hsw_crtc_compute_clock(struct intel_crtc *crtc, struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); struct intel_atomic_state *state = to_intel_atomic_state(crtc_state->uapi.state); + struct intel_encoder *encoder = + intel_get_crtc_new_encoder(state, crtc_state); - if (!intel_crtc_has_type(crtc_state, INTEL_OUTPUT_DSI) || - DISPLAY_VER(dev_priv) >= 11) { - struct intel_encoder *encoder = - intel_get_crtc_new_encoder(state, crtc_state); - + if (IS_DG2(dev_priv)) { + return intel_mpllb_calc_state(crtc_state, encoder); + } else if (!intel_crtc_has_type(crtc_state, INTEL_OUTPUT_DSI) || + DISPLAY_VER(dev_priv) >= 11) { if (!intel_reserve_shared_dplls(state, crtc, encoder)) { drm_dbg_kms(&dev_priv->drm, "failed to find PLL for pipe %c\n", diff --git a/drivers/gpu/drm/i915/display/intel_dpll_mgr.c b/drivers/gpu/drm/i915/display/intel_dpll_mgr.c index 71ac57670043..5c91d125a337 100644 --- a/drivers/gpu/drm/i915/display/intel_dpll_mgr.c +++ b/drivers/gpu/drm/i915/display/intel_dpll_mgr.c @@ -168,7 +168,7 @@ intel_combo_pll_enable_reg(struct drm_i915_private *i915, else if (IS_JSL_EHL(i915) && (pll->info->id == DPLL_ID_EHL_DPLL4)) return MG_PLL_ENABLE(0); - return CNL_DPLL_ENABLE(pll->info->id); + return ICL_DPLL_ENABLE(pll->info->id); } static i915_reg_t @@ -2346,160 +2346,7 @@ static const struct intel_dpll_mgr bxt_pll_mgr = { .dump_hw_state = bxt_dump_hw_state, }; -static void cnl_ddi_pll_enable(struct drm_i915_private *dev_priv, - struct intel_shared_dpll *pll) -{ - const enum intel_dpll_id id = pll->info->id; - u32 val; - - /* 1. Enable DPLL power in DPLL_ENABLE. */ - val = intel_de_read(dev_priv, CNL_DPLL_ENABLE(id)); - val |= PLL_POWER_ENABLE; - intel_de_write(dev_priv, CNL_DPLL_ENABLE(id), val); - - /* 2. Wait for DPLL power state enabled in DPLL_ENABLE. */ - if (intel_de_wait_for_set(dev_priv, CNL_DPLL_ENABLE(id), - PLL_POWER_STATE, 5)) - drm_err(&dev_priv->drm, "PLL %d Power not enabled\n", id); - - /* - * 3. Configure DPLL_CFGCR0 to set SSC enable/disable, - * select DP mode, and set DP link rate. - */ - val = pll->state.hw_state.cfgcr0; - intel_de_write(dev_priv, CNL_DPLL_CFGCR0(id), val); - - /* 4. Reab back to ensure writes completed */ - intel_de_posting_read(dev_priv, CNL_DPLL_CFGCR0(id)); - - /* 3. Configure DPLL_CFGCR0 */ - /* Avoid touch CFGCR1 if HDMI mode is not enabled */ - if (pll->state.hw_state.cfgcr0 & DPLL_CFGCR0_HDMI_MODE) { - val = pll->state.hw_state.cfgcr1; - intel_de_write(dev_priv, CNL_DPLL_CFGCR1(id), val); - /* 4. Reab back to ensure writes completed */ - intel_de_posting_read(dev_priv, CNL_DPLL_CFGCR1(id)); - } - - /* - * 5. If the frequency will result in a change to the voltage - * requirement, follow the Display Voltage Frequency Switching - * Sequence Before Frequency Change - * - * Note: DVFS is actually handled via the cdclk code paths, - * hence we do nothing here. - */ - - /* 6. Enable DPLL in DPLL_ENABLE. */ - val = intel_de_read(dev_priv, CNL_DPLL_ENABLE(id)); - val |= PLL_ENABLE; - intel_de_write(dev_priv, CNL_DPLL_ENABLE(id), val); - - /* 7. Wait for PLL lock status in DPLL_ENABLE. */ - if (intel_de_wait_for_set(dev_priv, CNL_DPLL_ENABLE(id), PLL_LOCK, 5)) - drm_err(&dev_priv->drm, "PLL %d not locked\n", id); - - /* - * 8. If the frequency will result in a change to the voltage - * requirement, follow the Display Voltage Frequency Switching - * Sequence After Frequency Change - * - * Note: DVFS is actually handled via the cdclk code paths, - * hence we do nothing here. - */ - - /* - * 9. turn on the clock for the DDI and map the DPLL to the DDI - * Done at intel_ddi_clk_select - */ -} - -static void cnl_ddi_pll_disable(struct drm_i915_private *dev_priv, - struct intel_shared_dpll *pll) -{ - const enum intel_dpll_id id = pll->info->id; - u32 val; - - /* - * 1. Configure DPCLKA_CFGCR0 to turn off the clock for the DDI. - * Done at intel_ddi_post_disable - */ - - /* - * 2. If the frequency will result in a change to the voltage - * requirement, follow the Display Voltage Frequency Switching - * Sequence Before Frequency Change - * - * Note: DVFS is actually handled via the cdclk code paths, - * hence we do nothing here. - */ - - /* 3. Disable DPLL through DPLL_ENABLE. */ - val = intel_de_read(dev_priv, CNL_DPLL_ENABLE(id)); - val &= ~PLL_ENABLE; - intel_de_write(dev_priv, CNL_DPLL_ENABLE(id), val); - - /* 4. Wait for PLL not locked status in DPLL_ENABLE. */ - if (intel_de_wait_for_clear(dev_priv, CNL_DPLL_ENABLE(id), PLL_LOCK, 5)) - drm_err(&dev_priv->drm, "PLL %d locked\n", id); - - /* - * 5. If the frequency will result in a change to the voltage - * requirement, follow the Display Voltage Frequency Switching - * Sequence After Frequency Change - * - * Note: DVFS is actually handled via the cdclk code paths, - * hence we do nothing here. - */ - - /* 6. Disable DPLL power in DPLL_ENABLE. */ - val = intel_de_read(dev_priv, CNL_DPLL_ENABLE(id)); - val &= ~PLL_POWER_ENABLE; - intel_de_write(dev_priv, CNL_DPLL_ENABLE(id), val); - - /* 7. Wait for DPLL power state disabled in DPLL_ENABLE. */ - if (intel_de_wait_for_clear(dev_priv, CNL_DPLL_ENABLE(id), - PLL_POWER_STATE, 5)) - drm_err(&dev_priv->drm, "PLL %d Power not disabled\n", id); -} - -static bool cnl_ddi_pll_get_hw_state(struct drm_i915_private *dev_priv, - struct intel_shared_dpll *pll, - struct intel_dpll_hw_state *hw_state) -{ - const enum intel_dpll_id id = pll->info->id; - intel_wakeref_t wakeref; - u32 val; - bool ret; - - wakeref = intel_display_power_get_if_enabled(dev_priv, - POWER_DOMAIN_DISPLAY_CORE); - if (!wakeref) - return false; - - ret = false; - - val = intel_de_read(dev_priv, CNL_DPLL_ENABLE(id)); - if (!(val & PLL_ENABLE)) - goto out; - - val = intel_de_read(dev_priv, CNL_DPLL_CFGCR0(id)); - hw_state->cfgcr0 = val; - - /* avoid reading back stale values if HDMI mode is not enabled */ - if (val & DPLL_CFGCR0_HDMI_MODE) { - hw_state->cfgcr1 = intel_de_read(dev_priv, - CNL_DPLL_CFGCR1(id)); - } - ret = true; - -out: - intel_display_power_put(dev_priv, POWER_DOMAIN_DISPLAY_CORE, wakeref); - - return ret; -} - -static void cnl_wrpll_get_multipliers(int bestdiv, int *pdiv, +static void icl_wrpll_get_multipliers(int bestdiv, int *pdiv, int *qdiv, int *kdiv) { /* even dividers */ @@ -2538,7 +2385,7 @@ static void cnl_wrpll_get_multipliers(int bestdiv, int *pdiv, } } -static void cnl_wrpll_params_populate(struct skl_wrpll_params *params, +static void icl_wrpll_params_populate(struct skl_wrpll_params *params, u32 dco_freq, u32 ref_freq, int pdiv, int qdiv, int kdiv) { @@ -2586,349 +2433,19 @@ static void cnl_wrpll_params_populate(struct skl_wrpll_params *params, params->dco_fraction = dco & 0x7fff; } -static bool -__cnl_ddi_calculate_wrpll(struct intel_crtc_state *crtc_state, - struct skl_wrpll_params *wrpll_params, - int ref_clock) -{ - u32 afe_clock = crtc_state->port_clock * 5; - u32 dco_min = 7998000; - u32 dco_max = 10000000; - u32 dco_mid = (dco_min + dco_max) / 2; - static const int dividers[] = { 2, 4, 6, 8, 10, 12, 14, 16, - 18, 20, 24, 28, 30, 32, 36, 40, - 42, 44, 48, 50, 52, 54, 56, 60, - 64, 66, 68, 70, 72, 76, 78, 80, - 84, 88, 90, 92, 96, 98, 100, 102, - 3, 5, 7, 9, 15, 21 }; - u32 dco, best_dco = 0, dco_centrality = 0; - u32 best_dco_centrality = U32_MAX; /* Spec meaning of 999999 MHz */ - int d, best_div = 0, pdiv = 0, qdiv = 0, kdiv = 0; - - for (d = 0; d < ARRAY_SIZE(dividers); d++) { - dco = afe_clock * dividers[d]; - - if ((dco <= dco_max) && (dco >= dco_min)) { - dco_centrality = abs(dco - dco_mid); - - if (dco_centrality < best_dco_centrality) { - best_dco_centrality = dco_centrality; - best_div = dividers[d]; - best_dco = dco; - } - } - } - - if (best_div == 0) - return false; - - cnl_wrpll_get_multipliers(best_div, &pdiv, &qdiv, &kdiv); - cnl_wrpll_params_populate(wrpll_params, best_dco, ref_clock, - pdiv, qdiv, kdiv); - - return true; -} - -static bool -cnl_ddi_calculate_wrpll(struct intel_crtc_state *crtc_state, - struct skl_wrpll_params *wrpll_params) -{ - struct drm_i915_private *i915 = to_i915(crtc_state->uapi.crtc->dev); - - return __cnl_ddi_calculate_wrpll(crtc_state, wrpll_params, - i915->dpll.ref_clks.nssc); -} - -static bool cnl_ddi_hdmi_pll_dividers(struct intel_crtc_state *crtc_state) -{ - u32 cfgcr0, cfgcr1; - struct skl_wrpll_params wrpll_params = { 0, }; - - cfgcr0 = DPLL_CFGCR0_HDMI_MODE; - - if (!cnl_ddi_calculate_wrpll(crtc_state, &wrpll_params)) - return false; - - cfgcr0 |= DPLL_CFGCR0_DCO_FRACTION(wrpll_params.dco_fraction) | - wrpll_params.dco_integer; - - cfgcr1 = DPLL_CFGCR1_QDIV_RATIO(wrpll_params.qdiv_ratio) | - DPLL_CFGCR1_QDIV_MODE(wrpll_params.qdiv_mode) | - DPLL_CFGCR1_KDIV(wrpll_params.kdiv) | - DPLL_CFGCR1_PDIV(wrpll_params.pdiv) | - DPLL_CFGCR1_CENTRAL_FREQ; - - memset(&crtc_state->dpll_hw_state, 0, - sizeof(crtc_state->dpll_hw_state)); - - crtc_state->dpll_hw_state.cfgcr0 = cfgcr0; - crtc_state->dpll_hw_state.cfgcr1 = cfgcr1; - return true; -} - /* - * Display WA #22010492432: ehl, tgl + * Display WA #22010492432: ehl, tgl, adl-p * Program half of the nominal DCO divider fraction value. */ static bool ehl_combo_pll_div_frac_wa_needed(struct drm_i915_private *i915) { return ((IS_PLATFORM(i915, INTEL_ELKHARTLAKE) && - IS_JSL_EHL_REVID(i915, EHL_REVID_B0, REVID_FOREVER)) || - IS_TIGERLAKE(i915)) && + IS_JSL_EHL_DISPLAY_STEP(i915, STEP_B0, STEP_FOREVER)) || + IS_TIGERLAKE(i915) || IS_ALDERLAKE_P(i915)) && i915->dpll.ref_clks.nssc == 38400; } -static int __cnl_ddi_wrpll_get_freq(struct drm_i915_private *dev_priv, - const struct intel_shared_dpll *pll, - const struct intel_dpll_hw_state *pll_state, - int ref_clock) -{ - u32 dco_fraction; - u32 p0, p1, p2, dco_freq; - - p0 = pll_state->cfgcr1 & DPLL_CFGCR1_PDIV_MASK; - p2 = pll_state->cfgcr1 & DPLL_CFGCR1_KDIV_MASK; - - if (pll_state->cfgcr1 & DPLL_CFGCR1_QDIV_MODE(1)) - p1 = (pll_state->cfgcr1 & DPLL_CFGCR1_QDIV_RATIO_MASK) >> - DPLL_CFGCR1_QDIV_RATIO_SHIFT; - else - p1 = 1; - - - switch (p0) { - case DPLL_CFGCR1_PDIV_2: - p0 = 2; - break; - case DPLL_CFGCR1_PDIV_3: - p0 = 3; - break; - case DPLL_CFGCR1_PDIV_5: - p0 = 5; - break; - case DPLL_CFGCR1_PDIV_7: - p0 = 7; - break; - } - - switch (p2) { - case DPLL_CFGCR1_KDIV_1: - p2 = 1; - break; - case DPLL_CFGCR1_KDIV_2: - p2 = 2; - break; - case DPLL_CFGCR1_KDIV_3: - p2 = 3; - break; - } - - dco_freq = (pll_state->cfgcr0 & DPLL_CFGCR0_DCO_INTEGER_MASK) * - ref_clock; - - dco_fraction = (pll_state->cfgcr0 & DPLL_CFGCR0_DCO_FRACTION_MASK) >> - DPLL_CFGCR0_DCO_FRACTION_SHIFT; - - if (ehl_combo_pll_div_frac_wa_needed(dev_priv)) - dco_fraction *= 2; - - dco_freq += (dco_fraction * ref_clock) / 0x8000; - - if (drm_WARN_ON(&dev_priv->drm, p0 == 0 || p1 == 0 || p2 == 0)) - return 0; - - return dco_freq / (p0 * p1 * p2 * 5); -} - -static int cnl_ddi_wrpll_get_freq(struct drm_i915_private *i915, - const struct intel_shared_dpll *pll, - const struct intel_dpll_hw_state *pll_state) -{ - return __cnl_ddi_wrpll_get_freq(i915, pll, pll_state, - i915->dpll.ref_clks.nssc); -} - -static bool -cnl_ddi_dp_set_dpll_hw_state(struct intel_crtc_state *crtc_state) -{ - u32 cfgcr0; - - cfgcr0 = DPLL_CFGCR0_SSC_ENABLE; - - switch (crtc_state->port_clock / 2) { - case 81000: - cfgcr0 |= DPLL_CFGCR0_LINK_RATE_810; - break; - case 135000: - cfgcr0 |= DPLL_CFGCR0_LINK_RATE_1350; - break; - case 270000: - cfgcr0 |= DPLL_CFGCR0_LINK_RATE_2700; - break; - /* eDP 1.4 rates */ - case 162000: - cfgcr0 |= DPLL_CFGCR0_LINK_RATE_1620; - break; - case 108000: - cfgcr0 |= DPLL_CFGCR0_LINK_RATE_1080; - break; - case 216000: - cfgcr0 |= DPLL_CFGCR0_LINK_RATE_2160; - break; - case 324000: - /* Some SKUs may require elevated I/O voltage to support this */ - cfgcr0 |= DPLL_CFGCR0_LINK_RATE_3240; - break; - case 405000: - /* Some SKUs may require elevated I/O voltage to support this */ - cfgcr0 |= DPLL_CFGCR0_LINK_RATE_4050; - break; - } - - memset(&crtc_state->dpll_hw_state, 0, - sizeof(crtc_state->dpll_hw_state)); - - crtc_state->dpll_hw_state.cfgcr0 = cfgcr0; - - return true; -} - -static int cnl_ddi_lcpll_get_freq(struct drm_i915_private *i915, - const struct intel_shared_dpll *pll, - const struct intel_dpll_hw_state *pll_state) -{ - int link_clock = 0; - - switch (pll_state->cfgcr0 & DPLL_CFGCR0_LINK_RATE_MASK) { - case DPLL_CFGCR0_LINK_RATE_810: - link_clock = 81000; - break; - case DPLL_CFGCR0_LINK_RATE_1080: - link_clock = 108000; - break; - case DPLL_CFGCR0_LINK_RATE_1350: - link_clock = 135000; - break; - case DPLL_CFGCR0_LINK_RATE_1620: - link_clock = 162000; - break; - case DPLL_CFGCR0_LINK_RATE_2160: - link_clock = 216000; - break; - case DPLL_CFGCR0_LINK_RATE_2700: - link_clock = 270000; - break; - case DPLL_CFGCR0_LINK_RATE_3240: - link_clock = 324000; - break; - case DPLL_CFGCR0_LINK_RATE_4050: - link_clock = 405000; - break; - default: - drm_WARN(&i915->drm, 1, "Unsupported link rate\n"); - break; - } - - return link_clock * 2; -} - -static bool cnl_get_dpll(struct intel_atomic_state *state, - struct intel_crtc *crtc, - struct intel_encoder *encoder) -{ - struct intel_crtc_state *crtc_state = - intel_atomic_get_new_crtc_state(state, crtc); - struct drm_i915_private *i915 = to_i915(crtc_state->uapi.crtc->dev); - struct intel_shared_dpll *pll; - bool bret; - - if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_HDMI)) { - bret = cnl_ddi_hdmi_pll_dividers(crtc_state); - if (!bret) { - drm_dbg_kms(&i915->drm, - "Could not get HDMI pll dividers.\n"); - return false; - } - } else if (intel_crtc_has_dp_encoder(crtc_state)) { - bret = cnl_ddi_dp_set_dpll_hw_state(crtc_state); - if (!bret) { - drm_dbg_kms(&i915->drm, - "Could not set DP dpll HW state.\n"); - return false; - } - } else { - drm_dbg_kms(&i915->drm, - "Skip DPLL setup for output_types 0x%x\n", - crtc_state->output_types); - return false; - } - - pll = intel_find_shared_dpll(state, crtc, - &crtc_state->dpll_hw_state, - BIT(DPLL_ID_SKL_DPLL2) | - BIT(DPLL_ID_SKL_DPLL1) | - BIT(DPLL_ID_SKL_DPLL0)); - if (!pll) { - drm_dbg_kms(&i915->drm, "No PLL selected\n"); - return false; - } - - intel_reference_shared_dpll(state, crtc, - pll, &crtc_state->dpll_hw_state); - - crtc_state->shared_dpll = pll; - - return true; -} - -static int cnl_ddi_pll_get_freq(struct drm_i915_private *i915, - const struct intel_shared_dpll *pll, - const struct intel_dpll_hw_state *pll_state) -{ - if (pll_state->cfgcr0 & DPLL_CFGCR0_HDMI_MODE) - return cnl_ddi_wrpll_get_freq(i915, pll, pll_state); - else - return cnl_ddi_lcpll_get_freq(i915, pll, pll_state); -} - -static void cnl_update_dpll_ref_clks(struct drm_i915_private *i915) -{ - /* No SSC reference */ - i915->dpll.ref_clks.nssc = i915->cdclk.hw.ref; -} - -static void cnl_dump_hw_state(struct drm_i915_private *dev_priv, - const struct intel_dpll_hw_state *hw_state) -{ - drm_dbg_kms(&dev_priv->drm, "dpll_hw_state: " - "cfgcr0: 0x%x, cfgcr1: 0x%x\n", - hw_state->cfgcr0, - hw_state->cfgcr1); -} - -static const struct intel_shared_dpll_funcs cnl_ddi_pll_funcs = { - .enable = cnl_ddi_pll_enable, - .disable = cnl_ddi_pll_disable, - .get_hw_state = cnl_ddi_pll_get_hw_state, - .get_freq = cnl_ddi_pll_get_freq, -}; - -static const struct dpll_info cnl_plls[] = { - { "DPLL 0", &cnl_ddi_pll_funcs, DPLL_ID_SKL_DPLL0, 0 }, - { "DPLL 1", &cnl_ddi_pll_funcs, DPLL_ID_SKL_DPLL1, 0 }, - { "DPLL 2", &cnl_ddi_pll_funcs, DPLL_ID_SKL_DPLL2, 0 }, - { }, -}; - -static const struct intel_dpll_mgr cnl_pll_mgr = { - .dpll_info = cnl_plls, - .get_dplls = cnl_get_dpll, - .put_dplls = intel_put_dpll, - .update_ref_clks = cnl_update_dpll_ref_clks, - .dump_hw_state = cnl_dump_hw_state, -}; - struct icl_combo_pll_params { int clock; struct skl_wrpll_params wrpll; @@ -3105,17 +2622,104 @@ icl_calc_wrpll(struct intel_crtc_state *crtc_state, struct skl_wrpll_params *wrpll_params) { struct drm_i915_private *i915 = to_i915(crtc_state->uapi.crtc->dev); + int ref_clock = icl_wrpll_ref_clock(i915); + u32 afe_clock = crtc_state->port_clock * 5; + u32 dco_min = 7998000; + u32 dco_max = 10000000; + u32 dco_mid = (dco_min + dco_max) / 2; + static const int dividers[] = { 2, 4, 6, 8, 10, 12, 14, 16, + 18, 20, 24, 28, 30, 32, 36, 40, + 42, 44, 48, 50, 52, 54, 56, 60, + 64, 66, 68, 70, 72, 76, 78, 80, + 84, 88, 90, 92, 96, 98, 100, 102, + 3, 5, 7, 9, 15, 21 }; + u32 dco, best_dco = 0, dco_centrality = 0; + u32 best_dco_centrality = U32_MAX; /* Spec meaning of 999999 MHz */ + int d, best_div = 0, pdiv = 0, qdiv = 0, kdiv = 0; - return __cnl_ddi_calculate_wrpll(crtc_state, wrpll_params, - icl_wrpll_ref_clock(i915)); + for (d = 0; d < ARRAY_SIZE(dividers); d++) { + dco = afe_clock * dividers[d]; + + if (dco <= dco_max && dco >= dco_min) { + dco_centrality = abs(dco - dco_mid); + + if (dco_centrality < best_dco_centrality) { + best_dco_centrality = dco_centrality; + best_div = dividers[d]; + best_dco = dco; + } + } + } + + if (best_div == 0) + return false; + + icl_wrpll_get_multipliers(best_div, &pdiv, &qdiv, &kdiv); + icl_wrpll_params_populate(wrpll_params, best_dco, ref_clock, + pdiv, qdiv, kdiv); + + return true; } static int icl_ddi_combo_pll_get_freq(struct drm_i915_private *i915, const struct intel_shared_dpll *pll, const struct intel_dpll_hw_state *pll_state) { - return __cnl_ddi_wrpll_get_freq(i915, pll, pll_state, - icl_wrpll_ref_clock(i915)); + int ref_clock = icl_wrpll_ref_clock(i915); + u32 dco_fraction; + u32 p0, p1, p2, dco_freq; + + p0 = pll_state->cfgcr1 & DPLL_CFGCR1_PDIV_MASK; + p2 = pll_state->cfgcr1 & DPLL_CFGCR1_KDIV_MASK; + + if (pll_state->cfgcr1 & DPLL_CFGCR1_QDIV_MODE(1)) + p1 = (pll_state->cfgcr1 & DPLL_CFGCR1_QDIV_RATIO_MASK) >> + DPLL_CFGCR1_QDIV_RATIO_SHIFT; + else + p1 = 1; + + switch (p0) { + case DPLL_CFGCR1_PDIV_2: + p0 = 2; + break; + case DPLL_CFGCR1_PDIV_3: + p0 = 3; + break; + case DPLL_CFGCR1_PDIV_5: + p0 = 5; + break; + case DPLL_CFGCR1_PDIV_7: + p0 = 7; + break; + } + + switch (p2) { + case DPLL_CFGCR1_KDIV_1: + p2 = 1; + break; + case DPLL_CFGCR1_KDIV_2: + p2 = 2; + break; + case DPLL_CFGCR1_KDIV_3: + p2 = 3; + break; + } + + dco_freq = (pll_state->cfgcr0 & DPLL_CFGCR0_DCO_INTEGER_MASK) * + ref_clock; + + dco_fraction = (pll_state->cfgcr0 & DPLL_CFGCR0_DCO_FRACTION_MASK) >> + DPLL_CFGCR0_DCO_FRACTION_SHIFT; + + if (ehl_combo_pll_div_frac_wa_needed(i915)) + dco_fraction *= 2; + + dco_freq += (dco_fraction * ref_clock) / 0x8000; + + if (drm_WARN_ON(&i915->drm, p0 == 0 || p1 == 0 || p2 == 0)) + return 0; + + return dco_freq / (p0 * p1 * p2 * 5); } static void icl_calc_dpll_state(struct drm_i915_private *i915, @@ -4131,6 +3735,31 @@ static void icl_pll_enable(struct drm_i915_private *dev_priv, drm_err(&dev_priv->drm, "PLL %d not locked\n", pll->info->id); } +static void adlp_cmtg_clock_gating_wa(struct drm_i915_private *i915, struct intel_shared_dpll *pll) +{ + u32 val; + + if (!IS_ADLP_DISPLAY_STEP(i915, STEP_A0, STEP_B0) || + pll->info->id != DPLL_ID_ICL_DPLL0) + return; + /* + * Wa_16011069516:adl-p[a0] + * + * All CMTG regs are unreliable until CMTG clock gating is disabled, + * so we can only assume the default TRANS_CMTG_CHICKEN reg value and + * sanity check this assumption with a double read, which presumably + * returns the correct value even with clock gating on. + * + * Instead of the usual place for workarounds we apply this one here, + * since TRANS_CMTG_CHICKEN is only accessible while DPLL0 is enabled. + */ + val = intel_de_read(i915, TRANS_CMTG_CHICKEN); + val = intel_de_read(i915, TRANS_CMTG_CHICKEN); + intel_de_write(i915, TRANS_CMTG_CHICKEN, DISABLE_DPT_CLK_GATING); + if (drm_WARN_ON(&i915->drm, val & ~DISABLE_DPT_CLK_GATING)) + drm_dbg_kms(&i915->drm, "Unexpected flags in TRANS_CMTG_CHICKEN: %08x\n", val); +} + static void combo_pll_enable(struct drm_i915_private *dev_priv, struct intel_shared_dpll *pll) { @@ -4160,6 +3789,8 @@ static void combo_pll_enable(struct drm_i915_private *dev_priv, icl_pll_enable(dev_priv, pll, enable_reg); + adlp_cmtg_clock_gating_wa(dev_priv, pll); + /* DVFS post sequence would be here. See the comment above. */ } @@ -4462,7 +4093,10 @@ void intel_shared_dpll_init(struct drm_device *dev) const struct dpll_info *dpll_info; int i; - if (IS_ALDERLAKE_P(dev_priv)) + if (IS_DG2(dev_priv)) + /* No shared DPLLs on DG2; port PLLs are part of the PHY */ + dpll_mgr = NULL; + else if (IS_ALDERLAKE_P(dev_priv)) dpll_mgr = &adlp_pll_mgr; else if (IS_ALDERLAKE_S(dev_priv)) dpll_mgr = &adls_pll_mgr; @@ -4476,8 +4110,6 @@ void intel_shared_dpll_init(struct drm_device *dev) dpll_mgr = &ehl_pll_mgr; else if (DISPLAY_VER(dev_priv) >= 11) dpll_mgr = &icl_pll_mgr; - else if (IS_CANNONLAKE(dev_priv)) - dpll_mgr = &cnl_pll_mgr; else if (IS_GEMINILAKE(dev_priv) || IS_BROXTON(dev_priv)) dpll_mgr = &bxt_pll_mgr; else if (DISPLAY_VER(dev_priv) == 9) @@ -4668,7 +4300,12 @@ void intel_dpll_readout_hw_state(struct drm_i915_private *i915) static void sanitize_dpll_state(struct drm_i915_private *i915, struct intel_shared_dpll *pll) { - if (!pll->on || pll->active_mask) + if (!pll->on) + return; + + adlp_cmtg_clock_gating_wa(i915, pll); + + if (pll->active_mask) return; drm_dbg_kms(&i915->drm, diff --git a/drivers/gpu/drm/i915/display/intel_dpll_mgr.h b/drivers/gpu/drm/i915/display/intel_dpll_mgr.h index 7fd031a70cfd..30e0aa5ca109 100644 --- a/drivers/gpu/drm/i915/display/intel_dpll_mgr.h +++ b/drivers/gpu/drm/i915/display/intel_dpll_mgr.h @@ -204,9 +204,8 @@ struct intel_dpll_hw_state { /* HDMI only, 0 when used for DP */ u32 cfgcr1, cfgcr2; - /* cnl */ + /* icl */ u32 cfgcr0; - /* CNL also uses cfgcr1 */ /* bxt */ u32 ebb0, ebb4, pll0, pll1, pll2, pll3, pll6, pll8, pll9, pll10, pcsdw12; diff --git a/drivers/gpu/drm/i915/display/intel_fbc.c b/drivers/gpu/drm/i915/display/intel_fbc.c index 1847a161cb37..ddfc17e21668 100644 --- a/drivers/gpu/drm/i915/display/intel_fbc.c +++ b/drivers/gpu/drm/i915/display/intel_fbc.c @@ -104,7 +104,7 @@ static void i8xx_fbc_activate(struct drm_i915_private *dev_priv) int i; u32 fbc_ctl; - /* Note: fbc.threshold == 1 for i8xx */ + /* Note: fbc.limit == 1 for i8xx */ cfb_pitch = params->cfb_size / FBC_LL_SIZE; if (params->fb.stride < cfb_pitch) cfb_pitch = params->fb.stride; @@ -148,16 +148,35 @@ static bool i8xx_fbc_is_active(struct drm_i915_private *dev_priv) return intel_de_read(dev_priv, FBC_CONTROL) & FBC_CTL_EN; } +static u32 g4x_dpfc_ctl_limit(struct drm_i915_private *i915) +{ + const struct intel_fbc_reg_params *params = &i915->fbc.params; + int limit = i915->fbc.limit; + + if (params->fb.format->cpp[0] == 2) + limit <<= 1; + + switch (limit) { + default: + MISSING_CASE(limit); + fallthrough; + case 1: + return DPFC_CTL_LIMIT_1X; + case 2: + return DPFC_CTL_LIMIT_2X; + case 4: + return DPFC_CTL_LIMIT_4X; + } +} + static void g4x_fbc_activate(struct drm_i915_private *dev_priv) { struct intel_fbc_reg_params *params = &dev_priv->fbc.params; u32 dpfc_ctl; dpfc_ctl = DPFC_CTL_PLANE(params->crtc.i9xx_plane) | DPFC_SR_EN; - if (params->fb.format->cpp[0] == 2) - dpfc_ctl |= DPFC_CTL_LIMIT_2X; - else - dpfc_ctl |= DPFC_CTL_LIMIT_1X; + + dpfc_ctl |= g4x_dpfc_ctl_limit(dev_priv); if (params->fence_id >= 0) { dpfc_ctl |= DPFC_CTL_FENCE_EN | params->fence_id; @@ -235,24 +254,10 @@ static void ilk_fbc_activate(struct drm_i915_private *dev_priv) { struct intel_fbc_reg_params *params = &dev_priv->fbc.params; u32 dpfc_ctl; - int threshold = dev_priv->fbc.threshold; dpfc_ctl = DPFC_CTL_PLANE(params->crtc.i9xx_plane); - if (params->fb.format->cpp[0] == 2) - threshold++; - switch (threshold) { - case 4: - case 3: - dpfc_ctl |= DPFC_CTL_LIMIT_4X; - break; - case 2: - dpfc_ctl |= DPFC_CTL_LIMIT_2X; - break; - case 1: - dpfc_ctl |= DPFC_CTL_LIMIT_1X; - break; - } + dpfc_ctl |= g4x_dpfc_ctl_limit(dev_priv); if (params->fence_id >= 0) { dpfc_ctl |= DPFC_CTL_FENCE_EN; @@ -300,7 +305,6 @@ static void gen7_fbc_activate(struct drm_i915_private *dev_priv) { struct intel_fbc_reg_params *params = &dev_priv->fbc.params; u32 dpfc_ctl; - int threshold = dev_priv->fbc.threshold; /* Display WA #0529: skl, kbl, bxt. */ if (DISPLAY_VER(dev_priv) == 9) { @@ -318,21 +322,7 @@ static void gen7_fbc_activate(struct drm_i915_private *dev_priv) if (IS_IVYBRIDGE(dev_priv)) dpfc_ctl |= IVB_DPFC_CTL_PLANE(params->crtc.i9xx_plane); - if (params->fb.format->cpp[0] == 2) - threshold++; - - switch (threshold) { - case 4: - case 3: - dpfc_ctl |= DPFC_CTL_LIMIT_4X; - break; - case 2: - dpfc_ctl |= DPFC_CTL_LIMIT_2X; - break; - case 1: - dpfc_ctl |= DPFC_CTL_LIMIT_1X; - break; - } + dpfc_ctl |= g4x_dpfc_ctl_limit(dev_priv); if (params->fence_id >= 0) { dpfc_ctl |= IVB_DPFC_CTL_FENCE_EN; @@ -433,13 +423,8 @@ static u64 intel_fbc_cfb_base_max(struct drm_i915_private *i915) return BIT_ULL(32); } -static int find_compression_threshold(struct drm_i915_private *dev_priv, - struct drm_mm_node *node, - unsigned int size, - unsigned int fb_cpp) +static u64 intel_fbc_stolen_end(struct drm_i915_private *dev_priv) { - int compression_threshold = 1; - int ret; u64 end; /* The FBC hardware for BDW/SKL doesn't have access to the stolen @@ -452,51 +437,69 @@ static int find_compression_threshold(struct drm_i915_private *dev_priv, else end = U64_MAX; - end = min(end, intel_fbc_cfb_base_max(dev_priv)); + return min(end, intel_fbc_cfb_base_max(dev_priv)); +} - /* HACK: This code depends on what we will do in *_enable_fbc. If that - * code changes, this code needs to change as well. - * - * The enable_fbc code will attempt to use one of our 2 compression - * thresholds, therefore, in that case, we only have 1 resort. +static int intel_fbc_max_limit(struct drm_i915_private *dev_priv, int fb_cpp) +{ + /* + * FIXME: FBC1 can have arbitrary cfb stride, + * so we could support different compression ratios. */ + if (DISPLAY_VER(dev_priv) < 5 && !IS_G4X(dev_priv)) + return 1; + + /* WaFbcOnly1to1Ratio:ctg */ + if (IS_G4X(dev_priv)) + return 1; + + /* FBC2 can only do 1:1, 1:2, 1:4 */ + return fb_cpp == 2 ? 2 : 4; +} + +static int find_compression_limit(struct drm_i915_private *dev_priv, + unsigned int size, + unsigned int fb_cpp) +{ + struct intel_fbc *fbc = &dev_priv->fbc; + u64 end = intel_fbc_stolen_end(dev_priv); + int ret, limit = 1; /* Try to over-allocate to reduce reallocations and fragmentation. */ - ret = i915_gem_stolen_insert_node_in_range(dev_priv, node, size <<= 1, - 4096, 0, end); + ret = i915_gem_stolen_insert_node_in_range(dev_priv, &fbc->compressed_fb, + size <<= 1, 4096, 0, end); if (ret == 0) - return compression_threshold; + return limit; -again: - /* HW's ability to limit the CFB is 1:4 */ - if (compression_threshold > 4 || - (fb_cpp == 2 && compression_threshold == 2)) - return 0; - - ret = i915_gem_stolen_insert_node_in_range(dev_priv, node, size >>= 1, - 4096, 0, end); - if (ret && DISPLAY_VER(dev_priv) <= 4) { - return 0; - } else if (ret) { - compression_threshold <<= 1; - goto again; - } else { - return compression_threshold; + for (; limit <= intel_fbc_max_limit(dev_priv, fb_cpp); limit <<= 1) { + ret = i915_gem_stolen_insert_node_in_range(dev_priv, &fbc->compressed_fb, + size >>= 1, 4096, 0, end); + if (ret == 0) + return limit; } + + return 0; } static int intel_fbc_alloc_cfb(struct drm_i915_private *dev_priv, unsigned int size, unsigned int fb_cpp) { struct intel_fbc *fbc = &dev_priv->fbc; - struct drm_mm_node *compressed_llb; int ret; drm_WARN_ON(&dev_priv->drm, drm_mm_node_allocated(&fbc->compressed_fb)); + drm_WARN_ON(&dev_priv->drm, + drm_mm_node_allocated(&fbc->compressed_llb)); - ret = find_compression_threshold(dev_priv, &fbc->compressed_fb, - size, fb_cpp); + if (DISPLAY_VER(dev_priv) < 5 && !IS_G4X(dev_priv)) { + ret = i915_gem_stolen_insert_node(dev_priv, &fbc->compressed_llb, + 4096, 4096); + if (ret) + goto err; + } + + ret = find_compression_limit(dev_priv, size, fb_cpp); if (!ret) goto err_llb; else if (ret > 1) { @@ -504,51 +507,46 @@ static int intel_fbc_alloc_cfb(struct drm_i915_private *dev_priv, "Reducing the compressed framebuffer size. This may lead to less power savings than a non-reduced-size. Try to increase stolen memory size if available in BIOS.\n"); } - fbc->threshold = ret; + fbc->limit = ret; - if (DISPLAY_VER(dev_priv) >= 5) + drm_dbg_kms(&dev_priv->drm, + "reserved %llu bytes of contiguous stolen space for FBC, limit: %d\n", + fbc->compressed_fb.size, fbc->limit); + + return 0; + +err_llb: + if (drm_mm_node_allocated(&fbc->compressed_llb)) + i915_gem_stolen_remove_node(dev_priv, &fbc->compressed_llb); +err: + if (drm_mm_initialized(&dev_priv->mm.stolen)) + drm_info_once(&dev_priv->drm, "not enough stolen space for compressed buffer (need %d more bytes), disabling. Hint: you may be able to increase stolen memory size in the BIOS to avoid this.\n", size); + return -ENOSPC; +} + +static void intel_fbc_program_cfb(struct drm_i915_private *dev_priv) +{ + struct intel_fbc *fbc = &dev_priv->fbc; + + if (DISPLAY_VER(dev_priv) >= 5) { intel_de_write(dev_priv, ILK_DPFC_CB_BASE, fbc->compressed_fb.start); - else if (IS_GM45(dev_priv)) { + } else if (IS_GM45(dev_priv)) { intel_de_write(dev_priv, DPFC_CB_BASE, fbc->compressed_fb.start); } else { - compressed_llb = kzalloc(sizeof(*compressed_llb), GFP_KERNEL); - if (!compressed_llb) - goto err_fb; - - ret = i915_gem_stolen_insert_node(dev_priv, compressed_llb, - 4096, 4096); - if (ret) - goto err_fb; - - fbc->compressed_llb = compressed_llb; - GEM_BUG_ON(range_overflows_end_t(u64, dev_priv->dsm.start, fbc->compressed_fb.start, U32_MAX)); GEM_BUG_ON(range_overflows_end_t(u64, dev_priv->dsm.start, - fbc->compressed_llb->start, + fbc->compressed_llb.start, U32_MAX)); + intel_de_write(dev_priv, FBC_CFB_BASE, dev_priv->dsm.start + fbc->compressed_fb.start); intel_de_write(dev_priv, FBC_LL_BASE, - dev_priv->dsm.start + compressed_llb->start); + dev_priv->dsm.start + fbc->compressed_llb.start); } - - drm_dbg_kms(&dev_priv->drm, - "reserved %llu bytes of contiguous stolen space for FBC, threshold: %d\n", - fbc->compressed_fb.size, fbc->threshold); - - return 0; - -err_fb: - kfree(compressed_llb); - i915_gem_stolen_remove_node(dev_priv, &fbc->compressed_fb); -err_llb: - if (drm_mm_initialized(&dev_priv->mm.stolen)) - drm_info_once(&dev_priv->drm, "not enough stolen space for compressed buffer (need %d more bytes), disabling. Hint: you may be able to increase stolen memory size in the BIOS to avoid this.\n", size); - return -ENOSPC; } static void __intel_fbc_cleanup_cfb(struct drm_i915_private *dev_priv) @@ -558,15 +556,10 @@ static void __intel_fbc_cleanup_cfb(struct drm_i915_private *dev_priv) if (WARN_ON(intel_fbc_hw_is_active(dev_priv))) return; - if (!drm_mm_node_allocated(&fbc->compressed_fb)) - return; - - if (fbc->compressed_llb) { - i915_gem_stolen_remove_node(dev_priv, fbc->compressed_llb); - kfree(fbc->compressed_llb); - } - - i915_gem_stolen_remove_node(dev_priv, &fbc->compressed_fb); + if (drm_mm_node_allocated(&fbc->compressed_llb)) + i915_gem_stolen_remove_node(dev_priv, &fbc->compressed_llb); + if (drm_mm_node_allocated(&fbc->compressed_fb)) + i915_gem_stolen_remove_node(dev_priv, &fbc->compressed_fb); } void intel_fbc_cleanup_cfb(struct drm_i915_private *dev_priv) @@ -753,7 +746,7 @@ static bool intel_fbc_cfb_size_changed(struct drm_i915_private *dev_priv) struct intel_fbc *fbc = &dev_priv->fbc; return intel_fbc_calculate_cfb_size(dev_priv, &fbc->state_cache) > - fbc->compressed_fb.size * fbc->threshold; + fbc->compressed_fb.size * fbc->limit; } static u16 intel_fbc_gen9_wa_cfb_stride(struct drm_i915_private *dev_priv) @@ -763,7 +756,7 @@ static u16 intel_fbc_gen9_wa_cfb_stride(struct drm_i915_private *dev_priv) if ((DISPLAY_VER(dev_priv) == 9) && cache->fb.modifier != I915_FORMAT_MOD_X_TILED) - return DIV_ROUND_UP(cache->plane.src_w, 32 * fbc->threshold) * 8; + return DIV_ROUND_UP(cache->plane.src_w, 32 * fbc->limit) * 8; else return 0; } @@ -919,11 +912,11 @@ static bool intel_fbc_can_activate(struct intel_crtc *crtc) } /* - * Tigerlake is not supporting FBC with PSR2. + * Display 12+ is not supporting FBC with PSR2. * Recommendation is to keep this combination disabled * Bspec: 50422 HSD: 14010260002 */ - if (fbc->state_cache.psr2_active && IS_TIGERLAKE(dev_priv)) { + if (fbc->state_cache.psr2_active && DISPLAY_VER(dev_priv) >= 12) { fbc->no_fbc_reason = "not supported with PSR2"; return false; } @@ -1302,6 +1295,8 @@ void intel_fbc_enable(struct intel_atomic_state *state, fbc->no_fbc_reason = "FBC enabled but not active yet\n"; fbc->crtc = crtc; + + intel_fbc_program_cfb(dev_priv); out: mutex_unlock(&fbc->lock); } diff --git a/drivers/gpu/drm/i915/display/intel_fbdev.c b/drivers/gpu/drm/i915/display/intel_fbdev.c index 4af40229f5ec..df05d285f0bd 100644 --- a/drivers/gpu/drm/i915/display/intel_fbdev.c +++ b/drivers/gpu/drm/i915/display/intel_fbdev.c @@ -335,32 +335,43 @@ static void intel_fbdev_destroy(struct intel_fbdev *ifbdev) * fbcon), so we just find the biggest and use that. */ static bool intel_fbdev_init_bios(struct drm_device *dev, - struct intel_fbdev *ifbdev) + struct intel_fbdev *ifbdev) { struct drm_i915_private *i915 = to_i915(dev); struct intel_framebuffer *fb = NULL; - struct drm_crtc *crtc; - struct intel_crtc *intel_crtc; + struct intel_crtc *crtc; unsigned int max_size = 0; /* Find the largest fb */ - for_each_crtc(dev, crtc) { + for_each_intel_crtc(dev, crtc) { + struct intel_crtc_state *crtc_state = + to_intel_crtc_state(crtc->base.state); + struct intel_plane *plane = + to_intel_plane(crtc->base.primary); + struct intel_plane_state *plane_state = + to_intel_plane_state(plane->base.state); struct drm_i915_gem_object *obj = - intel_fb_obj(crtc->primary->state->fb); - intel_crtc = to_intel_crtc(crtc); + intel_fb_obj(plane_state->uapi.fb); - if (!crtc->state->active || !obj) { + if (!crtc_state->uapi.active) { drm_dbg_kms(&i915->drm, - "pipe %c not active or no fb, skipping\n", - pipe_name(intel_crtc->pipe)); + "[CRTC:%d:%s] not active, skipping\n", + crtc->base.base.id, crtc->base.name); + continue; + } + + if (!obj) { + drm_dbg_kms(&i915->drm, + "[PLANE:%d:%s] no fb, skipping\n", + plane->base.base.id, plane->base.name); continue; } if (obj->base.size > max_size) { drm_dbg_kms(&i915->drm, - "found possible fb from plane %c\n", - pipe_name(intel_crtc->pipe)); - fb = to_intel_framebuffer(crtc->primary->state->fb); + "found possible fb from [PLANE:%d:%s]\n", + plane->base.base.id, plane->base.name); + fb = to_intel_framebuffer(plane_state->uapi.fb); max_size = obj->base.size; } } @@ -372,60 +383,62 @@ static bool intel_fbdev_init_bios(struct drm_device *dev, } /* Now make sure all the pipes will fit into it */ - for_each_crtc(dev, crtc) { + for_each_intel_crtc(dev, crtc) { + struct intel_crtc_state *crtc_state = + to_intel_crtc_state(crtc->base.state); + struct intel_plane *plane = + to_intel_plane(crtc->base.primary); unsigned int cur_size; - intel_crtc = to_intel_crtc(crtc); - - if (!crtc->state->active) { + if (!crtc_state->uapi.active) { drm_dbg_kms(&i915->drm, - "pipe %c not active, skipping\n", - pipe_name(intel_crtc->pipe)); + "[CRTC:%d:%s] not active, skipping\n", + crtc->base.base.id, crtc->base.name); continue; } - drm_dbg_kms(&i915->drm, "checking plane %c for BIOS fb\n", - pipe_name(intel_crtc->pipe)); + drm_dbg_kms(&i915->drm, "checking [PLANE:%d:%s] for BIOS fb\n", + plane->base.base.id, plane->base.name); /* * See if the plane fb we found above will fit on this * pipe. Note we need to use the selected fb's pitch and bpp * rather than the current pipe's, since they differ. */ - cur_size = crtc->state->adjusted_mode.crtc_hdisplay; + cur_size = crtc_state->uapi.adjusted_mode.crtc_hdisplay; cur_size = cur_size * fb->base.format->cpp[0]; if (fb->base.pitches[0] < cur_size) { drm_dbg_kms(&i915->drm, - "fb not wide enough for plane %c (%d vs %d)\n", - pipe_name(intel_crtc->pipe), + "fb not wide enough for [PLANE:%d:%s] (%d vs %d)\n", + plane->base.base.id, plane->base.name, cur_size, fb->base.pitches[0]); fb = NULL; break; } - cur_size = crtc->state->adjusted_mode.crtc_vdisplay; + cur_size = crtc_state->uapi.adjusted_mode.crtc_vdisplay; cur_size = intel_fb_align_height(&fb->base, 0, cur_size); cur_size *= fb->base.pitches[0]; drm_dbg_kms(&i915->drm, - "pipe %c area: %dx%d, bpp: %d, size: %d\n", - pipe_name(intel_crtc->pipe), - crtc->state->adjusted_mode.crtc_hdisplay, - crtc->state->adjusted_mode.crtc_vdisplay, + "[CRTC:%d:%s] area: %dx%d, bpp: %d, size: %d\n", + crtc->base.base.id, crtc->base.name, + crtc_state->uapi.adjusted_mode.crtc_hdisplay, + crtc_state->uapi.adjusted_mode.crtc_vdisplay, fb->base.format->cpp[0] * 8, cur_size); if (cur_size > max_size) { drm_dbg_kms(&i915->drm, - "fb not big enough for plane %c (%d vs %d)\n", - pipe_name(intel_crtc->pipe), + "fb not big enough for [PLANE:%d:%s] (%d vs %d)\n", + plane->base.base.id, plane->base.name, cur_size, max_size); fb = NULL; break; } drm_dbg_kms(&i915->drm, - "fb big enough for plane %c (%d >= %d)\n", - pipe_name(intel_crtc->pipe), + "fb big enough [PLANE:%d:%s] (%d >= %d)\n", + plane->base.base.id, plane->base.name, max_size, cur_size); } @@ -441,15 +454,20 @@ static bool intel_fbdev_init_bios(struct drm_device *dev, drm_framebuffer_get(&ifbdev->fb->base); /* Final pass to check if any active pipes don't have fbs */ - for_each_crtc(dev, crtc) { - intel_crtc = to_intel_crtc(crtc); - - if (!crtc->state->active) + for_each_intel_crtc(dev, crtc) { + struct intel_crtc_state *crtc_state = + to_intel_crtc_state(crtc->base.state); + struct intel_plane *plane = + to_intel_plane(crtc->base.primary); + struct intel_plane_state *plane_state = + to_intel_plane_state(plane->base.state); + + if (!crtc_state->uapi.active) continue; - drm_WARN(dev, !crtc->primary->state->fb, - "re-used BIOS config but lost an fb on crtc %d\n", - crtc->base.id); + drm_WARN(dev, !plane_state->uapi.fb, + "re-used BIOS config but lost an fb on [PLANE:%d:%s]\n", + plane->base.base.id, plane->base.name); } diff --git a/drivers/gpu/drm/i915/display/intel_fdi.c b/drivers/gpu/drm/i915/display/intel_fdi.c index cef1061fd6cb..e10b9cd8e86e 100644 --- a/drivers/gpu/drm/i915/display/intel_fdi.c +++ b/drivers/gpu/drm/i915/display/intel_fdi.c @@ -4,7 +4,6 @@ */ #include "intel_atomic.h" #include "intel_ddi.h" -#include "intel_ddi_buf_trans.h" #include "intel_de.h" #include "intel_display_types.h" #include "intel_fdi.h" @@ -96,10 +95,10 @@ static int ilk_check_fdi_lanes(struct drm_device *dev, enum pipe pipe, } } -int ilk_fdi_compute_config(struct intel_crtc *intel_crtc, - struct intel_crtc_state *pipe_config) +int ilk_fdi_compute_config(struct intel_crtc *crtc, + struct intel_crtc_state *pipe_config) { - struct drm_device *dev = intel_crtc->base.dev; + struct drm_device *dev = crtc->base.dev; struct drm_i915_private *i915 = to_i915(dev); const struct drm_display_mode *adjusted_mode = &pipe_config->hw.adjusted_mode; int lane, link_bw, fdi_dotclock, ret; @@ -125,7 +124,7 @@ retry: intel_link_compute_m_n(pipe_config->pipe_bpp, lane, fdi_dotclock, link_bw, &pipe_config->fdi_m_n, false, false); - ret = ilk_check_fdi_lanes(dev, intel_crtc->pipe, pipe_config); + ret = ilk_check_fdi_lanes(dev, crtc->pipe, pipe_config); if (ret == -EDEADLK) return ret; @@ -569,9 +568,9 @@ void hsw_fdi_link_train(struct intel_encoder *encoder, u32 temp, i, rx_ctl_val; int n_entries; - intel_ddi_get_buf_trans_fdi(dev_priv, &n_entries); + encoder->get_buf_trans(encoder, crtc_state, &n_entries); - intel_prepare_dp_ddi_buffers(encoder, crtc_state); + hsw_prepare_dp_ddi_buffers(encoder, crtc_state); /* Set the FDI_RX_MISC pwrdn lanes and the 2 workarounds listed at the * mode set "sequence for CRT port" document: @@ -691,9 +690,9 @@ void hsw_fdi_link_train(struct intel_encoder *encoder, void ilk_fdi_pll_enable(const struct intel_crtc_state *crtc_state) { - struct intel_crtc *intel_crtc = to_intel_crtc(crtc_state->uapi.crtc); - struct drm_i915_private *dev_priv = to_i915(intel_crtc->base.dev); - enum pipe pipe = intel_crtc->pipe; + struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + enum pipe pipe = crtc->pipe; i915_reg_t reg; u32 temp; @@ -726,11 +725,11 @@ void ilk_fdi_pll_enable(const struct intel_crtc_state *crtc_state) } } -void ilk_fdi_pll_disable(struct intel_crtc *intel_crtc) +void ilk_fdi_pll_disable(struct intel_crtc *crtc) { - struct drm_device *dev = intel_crtc->base.dev; + struct drm_device *dev = crtc->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); - enum pipe pipe = intel_crtc->pipe; + enum pipe pipe = crtc->pipe; i915_reg_t reg; u32 temp; diff --git a/drivers/gpu/drm/i915/display/intel_gmbus.c b/drivers/gpu/drm/i915/display/intel_gmbus.c index fcf47f98ea36..ceb1bf8a8c3c 100644 --- a/drivers/gpu/drm/i915/display/intel_gmbus.c +++ b/drivers/gpu/drm/i915/display/intel_gmbus.c @@ -600,7 +600,7 @@ do_gmbus_xfer(struct i2c_adapter *adapter, struct i2c_msg *msgs, int num, int i = 0, inc, try = 0; int ret = 0; - /* Display WA #0868: skl,bxt,kbl,cfl,glk,cnl */ + /* Display WA #0868: skl,bxt,kbl,cfl,glk */ if (IS_GEMINILAKE(dev_priv) || IS_BROXTON(dev_priv)) bxt_gmbus_clock_gating(dev_priv, false); else if (HAS_PCH_SPT(dev_priv) || HAS_PCH_CNP(dev_priv)) @@ -713,7 +713,7 @@ timeout: ret = -EAGAIN; out: - /* Display WA #0868: skl,bxt,kbl,cfl,glk,cnl */ + /* Display WA #0868: skl,bxt,kbl,cfl,glk */ if (IS_GEMINILAKE(dev_priv) || IS_BROXTON(dev_priv)) bxt_gmbus_clock_gating(dev_priv, true); else if (HAS_PCH_SPT(dev_priv) || HAS_PCH_CNP(dev_priv)) diff --git a/drivers/gpu/drm/i915/display/intel_hdmi.c b/drivers/gpu/drm/i915/display/intel_hdmi.c index 7e51c98c475e..b04685bb6439 100644 --- a/drivers/gpu/drm/i915/display/intel_hdmi.c +++ b/drivers/gpu/drm/i915/display/intel_hdmi.c @@ -51,6 +51,7 @@ #include "intel_hdmi.h" #include "intel_lspcon.h" #include "intel_panel.h" +#include "intel_snps_phy.h" static struct drm_device *intel_hdmi_to_dev(struct intel_hdmi *intel_hdmi) { @@ -270,8 +271,8 @@ static void ibx_write_infoframe(struct intel_encoder *encoder, { const u32 *data = frame; struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - struct intel_crtc *intel_crtc = to_intel_crtc(crtc_state->uapi.crtc); - i915_reg_t reg = TVIDEO_DIP_CTL(intel_crtc->pipe); + struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); + i915_reg_t reg = TVIDEO_DIP_CTL(crtc->pipe); u32 val = intel_de_read(dev_priv, reg); int i; @@ -286,13 +287,13 @@ static void ibx_write_infoframe(struct intel_encoder *encoder, intel_de_write(dev_priv, reg, val); for (i = 0; i < len; i += 4) { - intel_de_write(dev_priv, TVIDEO_DIP_DATA(intel_crtc->pipe), + intel_de_write(dev_priv, TVIDEO_DIP_DATA(crtc->pipe), *data); data++; } /* Write every possible data byte to force correct ECC calculation. */ for (; i < VIDEO_DIP_DATA_SIZE; i += 4) - intel_de_write(dev_priv, TVIDEO_DIP_DATA(intel_crtc->pipe), 0); + intel_de_write(dev_priv, TVIDEO_DIP_DATA(crtc->pipe), 0); val |= g4x_infoframe_enable(type); val &= ~VIDEO_DIP_FREQ_MASK; @@ -349,8 +350,8 @@ static void cpt_write_infoframe(struct intel_encoder *encoder, { const u32 *data = frame; struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - struct intel_crtc *intel_crtc = to_intel_crtc(crtc_state->uapi.crtc); - i915_reg_t reg = TVIDEO_DIP_CTL(intel_crtc->pipe); + struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); + i915_reg_t reg = TVIDEO_DIP_CTL(crtc->pipe); u32 val = intel_de_read(dev_priv, reg); int i; @@ -368,13 +369,13 @@ static void cpt_write_infoframe(struct intel_encoder *encoder, intel_de_write(dev_priv, reg, val); for (i = 0; i < len; i += 4) { - intel_de_write(dev_priv, TVIDEO_DIP_DATA(intel_crtc->pipe), + intel_de_write(dev_priv, TVIDEO_DIP_DATA(crtc->pipe), *data); data++; } /* Write every possible data byte to force correct ECC calculation. */ for (; i < VIDEO_DIP_DATA_SIZE; i += 4) - intel_de_write(dev_priv, TVIDEO_DIP_DATA(intel_crtc->pipe), 0); + intel_de_write(dev_priv, TVIDEO_DIP_DATA(crtc->pipe), 0); val |= g4x_infoframe_enable(type); val &= ~VIDEO_DIP_FREQ_MASK; @@ -427,8 +428,8 @@ static void vlv_write_infoframe(struct intel_encoder *encoder, { const u32 *data = frame; struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - struct intel_crtc *intel_crtc = to_intel_crtc(crtc_state->uapi.crtc); - i915_reg_t reg = VLV_TVIDEO_DIP_CTL(intel_crtc->pipe); + struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); + i915_reg_t reg = VLV_TVIDEO_DIP_CTL(crtc->pipe); u32 val = intel_de_read(dev_priv, reg); int i; @@ -444,13 +445,13 @@ static void vlv_write_infoframe(struct intel_encoder *encoder, for (i = 0; i < len; i += 4) { intel_de_write(dev_priv, - VLV_TVIDEO_DIP_DATA(intel_crtc->pipe), *data); + VLV_TVIDEO_DIP_DATA(crtc->pipe), *data); data++; } /* Write every possible data byte to force correct ECC calculation. */ for (; i < VIDEO_DIP_DATA_SIZE; i += 4) intel_de_write(dev_priv, - VLV_TVIDEO_DIP_DATA(intel_crtc->pipe), 0); + VLV_TVIDEO_DIP_DATA(crtc->pipe), 0); val |= g4x_infoframe_enable(type); val &= ~VIDEO_DIP_FREQ_MASK; @@ -1040,10 +1041,10 @@ static void ibx_set_infoframes(struct intel_encoder *encoder, const struct drm_connector_state *conn_state) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - struct intel_crtc *intel_crtc = to_intel_crtc(crtc_state->uapi.crtc); + struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); struct intel_digital_port *dig_port = enc_to_dig_port(encoder); struct intel_hdmi *intel_hdmi = &dig_port->hdmi; - i915_reg_t reg = TVIDEO_DIP_CTL(intel_crtc->pipe); + i915_reg_t reg = TVIDEO_DIP_CTL(crtc->pipe); u32 val = intel_de_read(dev_priv, reg); u32 port = VIDEO_DIP_PORT(encoder->port); @@ -1099,9 +1100,9 @@ static void cpt_set_infoframes(struct intel_encoder *encoder, const struct drm_connector_state *conn_state) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - struct intel_crtc *intel_crtc = to_intel_crtc(crtc_state->uapi.crtc); + struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(encoder); - i915_reg_t reg = TVIDEO_DIP_CTL(intel_crtc->pipe); + i915_reg_t reg = TVIDEO_DIP_CTL(crtc->pipe); u32 val = intel_de_read(dev_priv, reg); assert_hdmi_port_disabled(intel_hdmi); @@ -1148,9 +1149,9 @@ static void vlv_set_infoframes(struct intel_encoder *encoder, const struct drm_connector_state *conn_state) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - struct intel_crtc *intel_crtc = to_intel_crtc(crtc_state->uapi.crtc); + struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(encoder); - i915_reg_t reg = VLV_TVIDEO_DIP_CTL(intel_crtc->pipe); + i915_reg_t reg = VLV_TVIDEO_DIP_CTL(crtc->pipe); u32 val = intel_de_read(dev_priv, reg); u32 port = VIDEO_DIP_PORT(encoder->port); @@ -1465,14 +1466,12 @@ static int kbl_repositioning_enc_en_signal(struct intel_connector *connector, { struct drm_i915_private *dev_priv = to_i915(connector->base.dev); struct intel_digital_port *dig_port = intel_attached_dig_port(connector); - struct drm_crtc *crtc = connector->base.state->crtc; - struct intel_crtc *intel_crtc = container_of(crtc, - struct intel_crtc, base); + struct intel_crtc *crtc = to_intel_crtc(connector->base.state->crtc); u32 scanline; int ret; for (;;) { - scanline = intel_de_read(dev_priv, PIPEDSL(intel_crtc->pipe)); + scanline = intel_de_read(dev_priv, PIPEDSL(crtc->pipe)); if (scanline > 100 && scanline < 200) break; usleep_range(25, 50); @@ -1852,6 +1851,16 @@ hdmi_port_clock_valid(struct intel_hdmi *hdmi, if (IS_CHERRYVIEW(dev_priv) && clock > 216000 && clock < 240000) return MODE_CLOCK_RANGE; + /* + * SNPS PHYs' MPLLB table-based programming can only handle a fixed + * set of link rates. + * + * FIXME: We will hopefully get an algorithmic way of programming + * the MPLLB for HDMI in the future. + */ + if (IS_DG2(dev_priv)) + return intel_snps_phy_check_hdmi_link_rate(clock); + return MODE_OK; } diff --git a/drivers/gpu/drm/i915/display/intel_lvds.c b/drivers/gpu/drm/i915/display/intel_lvds.c index 7f40e9f60bc2..e0381b0fce91 100644 --- a/drivers/gpu/drm/i915/display/intel_lvds.c +++ b/drivers/gpu/drm/i915/display/intel_lvds.c @@ -411,12 +411,12 @@ static int intel_lvds_compute_config(struct intel_encoder *intel_encoder, struct intel_connector *intel_connector = lvds_encoder->attached_connector; struct drm_display_mode *adjusted_mode = &pipe_config->hw.adjusted_mode; - struct intel_crtc *intel_crtc = to_intel_crtc(pipe_config->uapi.crtc); + struct intel_crtc *crtc = to_intel_crtc(pipe_config->uapi.crtc); unsigned int lvds_bpp; int ret; /* Should never happen!! */ - if (DISPLAY_VER(dev_priv) < 4 && intel_crtc->pipe == 0) { + if (DISPLAY_VER(dev_priv) < 4 && crtc->pipe == 0) { drm_err(&dev_priv->drm, "Can't support LVDS on pipe A\n"); return -EINVAL; } diff --git a/drivers/gpu/drm/i915/display/intel_opregion.c b/drivers/gpu/drm/i915/display/intel_opregion.c index dfd724e506b5..3855fba70980 100644 --- a/drivers/gpu/drm/i915/display/intel_opregion.c +++ b/drivers/gpu/drm/i915/display/intel_opregion.c @@ -1078,6 +1078,9 @@ void intel_opregion_resume(struct drm_i915_private *i915) opregion->asle->ardy = ASLE_ARDY_READY; } + /* Some platforms abuse the _DSM to enable MUX */ + intel_dsm_get_bios_data_funcs_supported(i915); + intel_opregion_notify_adapter(i915, PCI_D0); } diff --git a/drivers/gpu/drm/i915/display/intel_psr.c b/drivers/gpu/drm/i915/display/intel_psr.c index 77865cf6641f..1b0daf649e82 100644 --- a/drivers/gpu/drm/i915/display/intel_psr.c +++ b/drivers/gpu/drm/i915/display/intel_psr.c @@ -32,6 +32,7 @@ #include "intel_dp_aux.h" #include "intel_hdmi.h" #include "intel_psr.h" +#include "intel_snps_phy.h" #include "intel_sprite.h" #include "skl_universal_plane.h" @@ -265,32 +266,44 @@ static u8 intel_dp_get_sink_sync_latency(struct intel_dp *intel_dp) return val; } -static u16 intel_dp_get_su_x_granulartiy(struct intel_dp *intel_dp) +static void intel_dp_get_su_granularity(struct intel_dp *intel_dp) { struct drm_i915_private *i915 = dp_to_i915(intel_dp); - u16 val; ssize_t r; + u16 w; + u8 y; + + /* If sink don't have specific granularity requirements set legacy ones */ + if (!(intel_dp->psr_dpcd[1] & DP_PSR2_SU_GRANULARITY_REQUIRED)) { + /* As PSR2 HW sends full lines, we do not care about x granularity */ + w = 4; + y = 4; + goto exit; + } - /* - * Returning the default X granularity if granularity not required or - * if DPCD read fails - */ - if (!(intel_dp->psr_dpcd[1] & DP_PSR2_SU_GRANULARITY_REQUIRED)) - return 4; - - r = drm_dp_dpcd_read(&intel_dp->aux, DP_PSR2_SU_X_GRANULARITY, &val, 2); + r = drm_dp_dpcd_read(&intel_dp->aux, DP_PSR2_SU_X_GRANULARITY, &w, 2); if (r != 2) drm_dbg_kms(&i915->drm, "Unable to read DP_PSR2_SU_X_GRANULARITY\n"); - /* * Spec says that if the value read is 0 the default granularity should * be used instead. */ - if (r != 2 || val == 0) - val = 4; + if (r != 2 || w == 0) + w = 4; - return val; + r = drm_dp_dpcd_read(&intel_dp->aux, DP_PSR2_SU_Y_GRANULARITY, &y, 1); + if (r != 1) { + drm_dbg_kms(&i915->drm, + "Unable to read DP_PSR2_SU_Y_GRANULARITY\n"); + y = 4; + } + if (y == 0) + y = 1; + +exit: + intel_dp->psr.su_w_granularity = w; + intel_dp->psr.su_y_granularity = y; } void intel_psr_init_dpcd(struct intel_dp *intel_dp) @@ -346,8 +359,7 @@ void intel_psr_init_dpcd(struct intel_dp *intel_dp) if (intel_dp->psr.sink_psr2_support) { intel_dp->psr.colorimetry_support = intel_dp_get_colorimetry_status(intel_dp); - intel_dp->psr.su_x_granularity = - intel_dp_get_su_x_granulartiy(intel_dp); + intel_dp_get_su_granularity(intel_dp); } } } @@ -407,6 +419,9 @@ static void intel_psr_enable_sink(struct intel_dp *intel_dp) dpcd_val |= DP_PSR_CRC_VERIFICATION; } + if (intel_dp->psr.req_psr2_sdp_prior_scanline) + dpcd_val |= DP_PSR_SU_REGION_SCANLINE_CAPTURE; + drm_dp_dpcd_writeb(&intel_dp->aux, DP_PSR_EN_CFG, dpcd_val); drm_dp_dpcd_writeb(&intel_dp->aux, DP_SET_POWER, DP_SET_POWER_D0); @@ -520,18 +535,47 @@ static u32 intel_psr2_get_tp_time(struct intel_dp *intel_dp) static void hsw_activate_psr2(struct intel_dp *intel_dp) { struct drm_i915_private *dev_priv = dp_to_i915(intel_dp); - u32 val; + u32 val = EDP_PSR2_ENABLE; - val = psr_compute_idle_frames(intel_dp) << EDP_PSR2_IDLE_FRAME_SHIFT; + val |= psr_compute_idle_frames(intel_dp) << EDP_PSR2_IDLE_FRAME_SHIFT; + + if (!IS_ALDERLAKE_P(dev_priv)) + val |= EDP_SU_TRACK_ENABLE; - val |= EDP_PSR2_ENABLE | EDP_SU_TRACK_ENABLE; if (DISPLAY_VER(dev_priv) >= 10 && DISPLAY_VER(dev_priv) <= 12) val |= EDP_Y_COORDINATE_ENABLE; val |= EDP_PSR2_FRAME_BEFORE_SU(intel_dp->psr.sink_sync_latency + 1); val |= intel_psr2_get_tp_time(intel_dp); - if (DISPLAY_VER(dev_priv) >= 12) { + /* Wa_22012278275:adl-p */ + if (IS_ADLP_DISPLAY_STEP(dev_priv, STEP_A0, STEP_E0)) { + static const u8 map[] = { + 2, /* 5 lines */ + 1, /* 6 lines */ + 0, /* 7 lines */ + 3, /* 8 lines */ + 6, /* 9 lines */ + 5, /* 10 lines */ + 4, /* 11 lines */ + 7, /* 12 lines */ + }; + /* + * Still using the default IO_BUFFER_WAKE and FAST_WAKE, see + * comments bellow for more information + */ + u32 tmp, lines = 7; + + val |= TGL_EDP_PSR2_BLOCK_COUNT_NUM_2; + + tmp = map[lines - TGL_EDP_PSR2_IO_BUFFER_WAKE_MIN_LINES]; + tmp = tmp << TGL_EDP_PSR2_IO_BUFFER_WAKE_SHIFT; + val |= tmp; + + tmp = map[lines - TGL_EDP_PSR2_FAST_WAKE_MIN_LINES]; + tmp = tmp << TGL_EDP_PSR2_FAST_WAKE_MIN_SHIFT; + val |= tmp; + } else if (DISPLAY_VER(dev_priv) >= 12) { /* * TODO: 7 lines of IO_BUFFER_WAKE and FAST_WAKE are default * values from BSpec. In order to setting an optimal power @@ -547,10 +591,12 @@ static void hsw_activate_psr2(struct intel_dp *intel_dp) val |= EDP_PSR2_FAST_WAKE(7); } + if (intel_dp->psr.req_psr2_sdp_prior_scanline) + val |= EDP_PSR2_SU_SDP_SCANLINE; + if (intel_dp->psr.psr2_sel_fetch_enabled) { - /* WA 1408330847 */ - if (IS_TGL_DISPLAY_STEP(dev_priv, STEP_A0, STEP_A0) || - IS_RKL_REVID(dev_priv, RKL_REVID_A0, RKL_REVID_A0)) + /* Wa_1408330847 */ + if (IS_TGL_DISPLAY_STEP(dev_priv, STEP_A0, STEP_B0)) intel_de_rmw(dev_priv, CHICKEN_PAR1_1, DIS_RAM_BYPASS_PSR2_MAN_TRACK, DIS_RAM_BYPASS_PSR2_MAN_TRACK); @@ -689,6 +735,10 @@ tgl_dc3co_exitline_compute_config(struct intel_dp *intel_dp, if (!dc3co_is_pipe_port_compatible(intel_dp, crtc_state)) return; + /* Wa_16011303918:adl-p */ + if (IS_ADLP_DISPLAY_STEP(dev_priv, STEP_A0, STEP_B0)) + return; + /* * DC3CO Exit time 200us B.Spec 49196 * PSR2 transcoder Early Exit scanlines = ROUNDUP(200 / line time) + 1 @@ -733,7 +783,7 @@ static bool intel_psr2_sel_fetch_config_valid(struct intel_dp *intel_dp, } /* Wa_14010254185 Wa_14010103792 */ - if (IS_TGL_DISPLAY_STEP(dev_priv, STEP_A0, STEP_B1)) { + if (IS_TGL_DISPLAY_STEP(dev_priv, STEP_A0, STEP_C0)) { drm_dbg_kms(&dev_priv->drm, "PSR2 sel fetch not enabled, missing the implementation of WAs\n"); return false; @@ -742,6 +792,67 @@ static bool intel_psr2_sel_fetch_config_valid(struct intel_dp *intel_dp, return crtc_state->enable_psr2_sel_fetch = true; } +static bool psr2_granularity_check(struct intel_dp *intel_dp, + struct intel_crtc_state *crtc_state) +{ + struct drm_i915_private *dev_priv = dp_to_i915(intel_dp); + const int crtc_hdisplay = crtc_state->hw.adjusted_mode.crtc_hdisplay; + const int crtc_vdisplay = crtc_state->hw.adjusted_mode.crtc_vdisplay; + u16 y_granularity = 0; + + /* PSR2 HW only send full lines so we only need to validate the width */ + if (crtc_hdisplay % intel_dp->psr.su_w_granularity) + return false; + + if (crtc_vdisplay % intel_dp->psr.su_y_granularity) + return false; + + /* HW tracking is only aligned to 4 lines */ + if (!crtc_state->enable_psr2_sel_fetch) + return intel_dp->psr.su_y_granularity == 4; + + /* + * adl_p has 1 line granularity. For other platforms with SW tracking we + * can adjust the y coordinates to match sink requirement if multiple of + * 4. + */ + if (IS_ALDERLAKE_P(dev_priv)) + y_granularity = intel_dp->psr.su_y_granularity; + else if (intel_dp->psr.su_y_granularity <= 2) + y_granularity = 4; + else if ((intel_dp->psr.su_y_granularity % 4) == 0) + y_granularity = intel_dp->psr.su_y_granularity; + + if (y_granularity == 0 || crtc_vdisplay % y_granularity) + return false; + + crtc_state->su_y_granularity = y_granularity; + return true; +} + +static bool _compute_psr2_sdp_prior_scanline_indication(struct intel_dp *intel_dp, + struct intel_crtc_state *crtc_state) +{ + const struct drm_display_mode *adjusted_mode = &crtc_state->uapi.adjusted_mode; + struct drm_i915_private *dev_priv = dp_to_i915(intel_dp); + u32 hblank_total, hblank_ns, req_ns; + + hblank_total = adjusted_mode->crtc_hblank_end - adjusted_mode->crtc_hblank_start; + hblank_ns = div_u64(1000000ULL * hblank_total, adjusted_mode->crtc_clock); + + /* From spec: (72 / number of lanes) * 1000 / symbol clock frequency MHz */ + req_ns = (72 / crtc_state->lane_count) * 1000 / (crtc_state->port_clock / 1000); + + if ((hblank_ns - req_ns) > 100) + return true; + + if (DISPLAY_VER(dev_priv) < 13 || intel_dp->edp_dpcd[0] < DP_EDP_14b) + return false; + + crtc_state->req_psr2_sdp_prior_scanline = true; + return true; +} + static bool intel_psr2_config_valid(struct intel_dp *intel_dp, struct intel_crtc_state *crtc_state) { @@ -760,7 +871,8 @@ static bool intel_psr2_config_valid(struct intel_dp *intel_dp, } /* Wa_16011181250 */ - if (IS_ROCKETLAKE(dev_priv) || IS_ALDERLAKE_S(dev_priv)) { + if (IS_ROCKETLAKE(dev_priv) || IS_ALDERLAKE_S(dev_priv) || + IS_DG2(dev_priv)) { drm_dbg_kms(&dev_priv->drm, "PSR2 is defeatured for this platform\n"); return false; } @@ -824,19 +936,6 @@ static bool intel_psr2_config_valid(struct intel_dp *intel_dp, return false; } - /* - * HW sends SU blocks of size four scan lines, which means the starting - * X coordinate and Y granularity requirements will always be met. We - * only need to validate the SU block width is a multiple of - * x granularity. - */ - if (crtc_hdisplay % intel_dp->psr.su_x_granularity) { - drm_dbg_kms(&dev_priv->drm, - "PSR2 not enabled, hdisplay(%d) not multiple of %d\n", - crtc_hdisplay, intel_dp->psr.su_x_granularity); - return false; - } - if (HAS_PSR2_SEL_FETCH(dev_priv)) { if (!intel_psr2_sel_fetch_config_valid(intel_dp, crtc_state) && !HAS_PSR_HW_TRACKING(dev_priv)) { @@ -848,11 +947,16 @@ static bool intel_psr2_config_valid(struct intel_dp *intel_dp, /* Wa_2209313811 */ if (!crtc_state->enable_psr2_sel_fetch && - IS_TGL_DISPLAY_STEP(dev_priv, STEP_A0, STEP_B1)) { + IS_TGL_DISPLAY_STEP(dev_priv, STEP_A0, STEP_C0)) { drm_dbg_kms(&dev_priv->drm, "PSR2 HW tracking is not supported this Display stepping\n"); return false; } + if (!psr2_granularity_check(intel_dp, crtc_state)) { + drm_dbg_kms(&dev_priv->drm, "PSR2 not enabled, SU granularity not compatible\n"); + return false; + } + if (!crtc_state->enable_psr2_sel_fetch && (crtc_hdisplay > psr_max_h || crtc_vdisplay > psr_max_v)) { drm_dbg_kms(&dev_priv->drm, @@ -862,6 +966,20 @@ static bool intel_psr2_config_valid(struct intel_dp *intel_dp, return false; } + if (!_compute_psr2_sdp_prior_scanline_indication(intel_dp, crtc_state)) { + drm_dbg_kms(&dev_priv->drm, + "PSR2 not enabled, PSR2 SDP indication do not fit in hblank\n"); + return false; + } + + /* Wa_16011303918:adl-p */ + if (crtc_state->vrr.enable && + IS_ADLP_DISPLAY_STEP(dev_priv, STEP_A0, STEP_B0)) { + drm_dbg_kms(&dev_priv->drm, + "PSR2 not enabled, not compatible with HW stepping + VRR\n"); + return false; + } + tgl_dc3co_exitline_compute_config(intel_dp, crtc_state); return true; } @@ -1048,6 +1166,14 @@ static void intel_psr_enable_source(struct intel_dp *intel_dp) intel_de_rmw(dev_priv, CHICKEN_PAR1_1, IGNORE_PSR2_HW_TRACKING, intel_dp->psr.psr2_sel_fetch_enabled ? IGNORE_PSR2_HW_TRACKING : 0); + + /* Wa_16011168373:adl-p */ + if (IS_ADLP_DISPLAY_STEP(dev_priv, STEP_A0, STEP_B0) && + intel_dp->psr.psr2_enabled) + intel_de_rmw(dev_priv, + TRANS_SET_CONTEXT_LATENCY(intel_dp->psr.transcoder), + TRANS_SET_CONTEXT_LATENCY_MASK, + TRANS_SET_CONTEXT_LATENCY_VALUE(1)); } static bool psr_interrupt_error_check(struct intel_dp *intel_dp) @@ -1087,6 +1213,7 @@ static void intel_psr_enable_locked(struct intel_dp *intel_dp, { struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp); struct drm_i915_private *dev_priv = dp_to_i915(intel_dp); + enum phy phy = intel_port_to_phy(dev_priv, dig_port->base.port); struct intel_encoder *encoder = &dig_port->base; u32 val; @@ -1101,6 +1228,8 @@ static void intel_psr_enable_locked(struct intel_dp *intel_dp, intel_dp->psr.dc3co_exit_delay = val; intel_dp->psr.dc3co_exitline = crtc_state->dc3co_exitline; intel_dp->psr.psr2_sel_fetch_enabled = crtc_state->enable_psr2_sel_fetch; + intel_dp->psr.req_psr2_sdp_prior_scanline = + crtc_state->req_psr2_sdp_prior_scanline; if (!psr_interrupt_error_check(intel_dp)) return; @@ -1110,6 +1239,7 @@ static void intel_psr_enable_locked(struct intel_dp *intel_dp, intel_dp_compute_psr_vsc_sdp(intel_dp, crtc_state, conn_state, &intel_dp->psr.vsc); intel_write_dp_vsc_sdp(encoder, crtc_state, &intel_dp->psr.vsc); + intel_snps_phy_update_psr_power_state(dev_priv, phy, true); intel_psr_enable_sink(intel_dp); intel_psr_enable_source(intel_dp); intel_dp->psr.enabled = true; @@ -1206,6 +1336,8 @@ static void intel_psr_wait_exit_locked(struct intel_dp *intel_dp) static void intel_psr_disable_locked(struct intel_dp *intel_dp) { struct drm_i915_private *dev_priv = dp_to_i915(intel_dp); + enum phy phy = intel_port_to_phy(dev_priv, + dp_to_dig_port(intel_dp)->base.port); lockdep_assert_held(&intel_dp->psr.lock); @@ -1218,13 +1350,21 @@ static void intel_psr_disable_locked(struct intel_dp *intel_dp) intel_psr_exit(intel_dp); intel_psr_wait_exit_locked(intel_dp); - /* WA 1408330847 */ + /* Wa_1408330847 */ if (intel_dp->psr.psr2_sel_fetch_enabled && - (IS_TGL_DISPLAY_STEP(dev_priv, STEP_A0, STEP_A0) || - IS_RKL_REVID(dev_priv, RKL_REVID_A0, RKL_REVID_A0))) + IS_TGL_DISPLAY_STEP(dev_priv, STEP_A0, STEP_B0)) intel_de_rmw(dev_priv, CHICKEN_PAR1_1, DIS_RAM_BYPASS_PSR2_MAN_TRACK, 0); + /* Wa_16011168373:adl-p */ + if (IS_ADLP_DISPLAY_STEP(dev_priv, STEP_A0, STEP_B0) && + intel_dp->psr.psr2_enabled) + intel_de_rmw(dev_priv, + TRANS_SET_CONTEXT_LATENCY(intel_dp->psr.transcoder), + TRANS_SET_CONTEXT_LATENCY_MASK, 0); + + intel_snps_phy_update_psr_power_state(dev_priv, phy, false); + /* Disable PSR on Sink */ drm_dp_dpcd_writeb(&intel_dp->aux, DP_PSR_EN_CFG, 0); @@ -1397,21 +1537,32 @@ void intel_psr2_program_trans_man_trk_ctl(const struct intel_crtc_state *crtc_st static void psr2_man_trk_ctl_calc(struct intel_crtc_state *crtc_state, struct drm_rect *clip, bool full_update) { + struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); u32 val = PSR2_MAN_TRK_CTL_ENABLE; if (full_update) { - val |= PSR2_MAN_TRK_CTL_SF_SINGLE_FULL_FRAME; + if (IS_ALDERLAKE_P(dev_priv)) + val |= ADLP_PSR2_MAN_TRK_CTL_SF_SINGLE_FULL_FRAME; + else + val |= PSR2_MAN_TRK_CTL_SF_SINGLE_FULL_FRAME; + goto exit; } if (clip->y1 == -1) goto exit; - drm_WARN_ON(crtc_state->uapi.crtc->dev, clip->y1 % 4 || clip->y2 % 4); + if (IS_ALDERLAKE_P(dev_priv)) { + val |= ADLP_PSR2_MAN_TRK_CTL_SU_REGION_START_ADDR(clip->y1); + val |= ADLP_PSR2_MAN_TRK_CTL_SU_REGION_END_ADDR(clip->y2); + } else { + drm_WARN_ON(crtc_state->uapi.crtc->dev, clip->y1 % 4 || clip->y2 % 4); - val |= PSR2_MAN_TRK_CTL_SF_PARTIAL_FRAME_UPDATE; - val |= PSR2_MAN_TRK_CTL_SU_REGION_START_ADDR(clip->y1 / 4 + 1); - val |= PSR2_MAN_TRK_CTL_SU_REGION_END_ADDR(clip->y2 / 4 + 1); + val |= PSR2_MAN_TRK_CTL_SF_PARTIAL_FRAME_UPDATE; + val |= PSR2_MAN_TRK_CTL_SU_REGION_START_ADDR(clip->y1 / 4 + 1); + val |= PSR2_MAN_TRK_CTL_SU_REGION_END_ADDR(clip->y2 / 4 + 1); + } exit: crtc_state->psr2_man_track_ctl = val; } @@ -1432,6 +1583,20 @@ static void clip_area_update(struct drm_rect *overlap_damage_area, overlap_damage_area->y2 = damage_area->y2; } +static void intel_psr2_sel_fetch_pipe_alignment(const struct intel_crtc_state *crtc_state, + struct drm_rect *pipe_clip) +{ + struct drm_i915_private *dev_priv = to_i915(crtc_state->uapi.crtc->dev); + const u16 y_alignment = crtc_state->su_y_granularity; + + pipe_clip->y1 -= pipe_clip->y1 % y_alignment; + if (pipe_clip->y2 % y_alignment) + pipe_clip->y2 = ((pipe_clip->y2 / y_alignment) + 1) * y_alignment; + + if (IS_ALDERLAKE_P(dev_priv) && crtc_state->dsc.compression_enable) + drm_warn(&dev_priv->drm, "Missing PSR2 sel fetch alignment with DSC\n"); +} + int intel_psr2_sel_fetch_update(struct intel_atomic_state *state, struct intel_crtc *crtc) { @@ -1540,10 +1705,7 @@ int intel_psr2_sel_fetch_update(struct intel_atomic_state *state, if (full_update) goto skip_sel_fetch_set_loop; - /* It must be aligned to 4 lines */ - pipe_clip.y1 -= pipe_clip.y1 % 4; - if (pipe_clip.y2 % 4) - pipe_clip.y2 = ((pipe_clip.y2 / 4) + 1) * 4; + intel_psr2_sel_fetch_pipe_alignment(crtc_state, &pipe_clip); /* * Now that we have the pipe damaged area check if it intersect with @@ -1564,6 +1726,7 @@ int intel_psr2_sel_fetch_update(struct intel_atomic_state *state, sel_fetch_area = &new_plane_state->psr2_sel_fetch_area; sel_fetch_area->y1 = inter.y1 - new_plane_state->uapi.dst.y1; sel_fetch_area->y2 = inter.y2 - new_plane_state->uapi.dst.y1; + crtc_state->update_planes |= BIT(plane->id); } skip_sel_fetch_set_loop: diff --git a/drivers/gpu/drm/i915/display/intel_quirks.c b/drivers/gpu/drm/i915/display/intel_quirks.c index 98dd787b00e3..8a52b7a16774 100644 --- a/drivers/gpu/drm/i915/display/intel_quirks.c +++ b/drivers/gpu/drm/i915/display/intel_quirks.c @@ -53,6 +53,12 @@ static void quirk_increase_ddi_disabled_time(struct drm_i915_private *i915) drm_info(&i915->drm, "Applying Increase DDI Disabled quirk\n"); } +static void quirk_no_pps_backlight_power_hook(struct drm_i915_private *i915) +{ + i915->quirks |= QUIRK_NO_PPS_BACKLIGHT_POWER_HOOK; + drm_info(&i915->drm, "Applying no pps backlight power quirk\n"); +} + struct intel_quirk { int device; int subsystem_vendor; @@ -72,6 +78,12 @@ static int intel_dmi_reverse_brightness(const struct dmi_system_id *id) return 1; } +static int intel_dmi_no_pps_backlight(const struct dmi_system_id *id) +{ + DRM_INFO("No pps backlight support on %s\n", id->ident); + return 1; +} + static const struct intel_dmi_quirk intel_dmi_quirks[] = { { .dmi_id_list = &(const struct dmi_system_id[]) { @@ -96,6 +108,28 @@ static const struct intel_dmi_quirk intel_dmi_quirks[] = { }, .hook = quirk_invert_brightness, }, + { + .dmi_id_list = &(const struct dmi_system_id[]) { + { + .callback = intel_dmi_no_pps_backlight, + .ident = "Google Lillipup sku524294", + .matches = {DMI_EXACT_MATCH(DMI_BOARD_VENDOR, "Google"), + DMI_EXACT_MATCH(DMI_BOARD_NAME, "Lindar"), + DMI_EXACT_MATCH(DMI_PRODUCT_SKU, "sku524294"), + }, + }, + { + .callback = intel_dmi_no_pps_backlight, + .ident = "Google Lillipup sku524295", + .matches = {DMI_EXACT_MATCH(DMI_BOARD_VENDOR, "Google"), + DMI_EXACT_MATCH(DMI_BOARD_NAME, "Lindar"), + DMI_EXACT_MATCH(DMI_PRODUCT_SKU, "sku524295"), + }, + }, + { } + }, + .hook = quirk_no_pps_backlight_power_hook, + }, }; static struct intel_quirk intel_quirks[] = { diff --git a/drivers/gpu/drm/i915/display/intel_sdvo.c b/drivers/gpu/drm/i915/display/intel_sdvo.c index e4f91d7a5c60..6cb27599ea03 100644 --- a/drivers/gpu/drm/i915/display/intel_sdvo.c +++ b/drivers/gpu/drm/i915/display/intel_sdvo.c @@ -1824,7 +1824,7 @@ static void intel_enable_sdvo(struct intel_atomic_state *state, struct drm_device *dev = encoder->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); struct intel_sdvo *intel_sdvo = to_sdvo(encoder); - struct intel_crtc *intel_crtc = to_intel_crtc(pipe_config->uapi.crtc); + struct intel_crtc *crtc = to_intel_crtc(pipe_config->uapi.crtc); u32 temp; bool input1, input2; int i; @@ -1835,7 +1835,7 @@ static void intel_enable_sdvo(struct intel_atomic_state *state, intel_sdvo_write_sdvox(intel_sdvo, temp); for (i = 0; i < 2; i++) - intel_wait_for_vblank(dev_priv, intel_crtc->pipe); + intel_wait_for_vblank(dev_priv, crtc->pipe); success = intel_sdvo_get_trained_inputs(intel_sdvo, &input1, &input2); /* diff --git a/drivers/gpu/drm/i915/display/intel_snps_phy.c b/drivers/gpu/drm/i915/display/intel_snps_phy.c new file mode 100644 index 000000000000..18b52b64af95 --- /dev/null +++ b/drivers/gpu/drm/i915/display/intel_snps_phy.c @@ -0,0 +1,862 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2019 Intel Corporation + */ + +#include <linux/util_macros.h> + +#include "intel_de.h" +#include "intel_display_types.h" +#include "intel_snps_phy.h" + +/** + * DOC: Synopsis PHY support + * + * Synopsis PHYs are primarily programmed by looking up magic register values + * in tables rather than calculating the necessary values at runtime. + * + * Of special note is that the SNPS PHYs include a dedicated port PLL, known as + * an "MPLLB." The MPLLB replaces the shared DPLL functionality used on other + * platforms and must be programming directly during the modeset sequence + * since it is not handled by the shared DPLL framework as on other platforms. + */ + +void intel_snps_phy_wait_for_calibration(struct drm_i915_private *dev_priv) +{ + enum phy phy; + + for_each_phy_masked(phy, ~0) { + if (!intel_phy_is_snps(dev_priv, phy)) + continue; + + if (intel_de_wait_for_clear(dev_priv, ICL_PHY_MISC(phy), + DG2_PHY_DP_TX_ACK_MASK, 25)) + DRM_ERROR("SNPS PHY %c failed to calibrate after 25ms.\n", + phy); + } +} + +void intel_snps_phy_update_psr_power_state(struct drm_i915_private *dev_priv, + enum phy phy, bool enable) +{ + u32 val; + + if (!intel_phy_is_snps(dev_priv, phy)) + return; + + val = REG_FIELD_PREP(SNPS_PHY_TX_REQ_LN_DIS_PWR_STATE_PSR, + enable ? 2 : 3); + intel_uncore_rmw(&dev_priv->uncore, SNPS_PHY_TX_REQ(phy), + SNPS_PHY_TX_REQ_LN_DIS_PWR_STATE_PSR, val); +} + +static const u32 dg2_ddi_translations[] = { + /* VS 0, pre-emph 0 */ + REG_FIELD_PREP(SNPS_PHY_TX_EQ_MAIN, 26), + + /* VS 0, pre-emph 1 */ + REG_FIELD_PREP(SNPS_PHY_TX_EQ_MAIN, 33) | + REG_FIELD_PREP(SNPS_PHY_TX_EQ_POST, 6), + + /* VS 0, pre-emph 2 */ + REG_FIELD_PREP(SNPS_PHY_TX_EQ_MAIN, 38) | + REG_FIELD_PREP(SNPS_PHY_TX_EQ_POST, 12), + + /* VS 0, pre-emph 3 */ + REG_FIELD_PREP(SNPS_PHY_TX_EQ_MAIN, 43) | + REG_FIELD_PREP(SNPS_PHY_TX_EQ_POST, 19), + + /* VS 1, pre-emph 0 */ + REG_FIELD_PREP(SNPS_PHY_TX_EQ_MAIN, 39), + + /* VS 1, pre-emph 1 */ + REG_FIELD_PREP(SNPS_PHY_TX_EQ_MAIN, 44) | + REG_FIELD_PREP(SNPS_PHY_TX_EQ_POST, 8), + + /* VS 1, pre-emph 2 */ + REG_FIELD_PREP(SNPS_PHY_TX_EQ_MAIN, 47) | + REG_FIELD_PREP(SNPS_PHY_TX_EQ_POST, 15), + + /* VS 2, pre-emph 0 */ + REG_FIELD_PREP(SNPS_PHY_TX_EQ_MAIN, 52), + + /* VS 2, pre-emph 1 */ + REG_FIELD_PREP(SNPS_PHY_TX_EQ_MAIN, 51) | + REG_FIELD_PREP(SNPS_PHY_TX_EQ_POST, 10), + + /* VS 3, pre-emph 0 */ + REG_FIELD_PREP(SNPS_PHY_TX_EQ_MAIN, 62), +}; + +void intel_snps_phy_ddi_vswing_sequence(struct intel_encoder *encoder, + u32 level) +{ + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + enum phy phy = intel_port_to_phy(dev_priv, encoder->port); + int n_entries, ln; + + n_entries = ARRAY_SIZE(dg2_ddi_translations); + if (level >= n_entries) + level = n_entries - 1; + + for (ln = 0; ln < 4; ln++) + intel_de_write(dev_priv, SNPS_PHY_TX_EQ(ln, phy), + dg2_ddi_translations[level]); +} + +/* + * Basic DP link rates with 100 MHz reference clock. + */ + +static const struct intel_mpllb_state dg2_dp_rbr_100 = { + .clock = 162000, + .ref_control = + REG_FIELD_PREP(SNPS_PHY_REF_CONTROL_REF_RANGE, 3), + .mpllb_cp = + REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_INT, 4) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_PROP, 20) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_INT_GS, 65) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_PROP_GS, 127), + .mpllb_div = + REG_FIELD_PREP(SNPS_PHY_MPLLB_DIV5_CLK_EN, 1) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_TX_CLK_DIV, 2) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_PMIX_EN, 1) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_V2I, 2) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_FREQ_VCO, 2), + .mpllb_div2 = + REG_FIELD_PREP(SNPS_PHY_MPLLB_REF_CLK_DIV, 2) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_MULTIPLIER, 226), + .mpllb_fracn1 = + REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_CGG_UPDATE_EN, 1) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_EN, 1) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_DEN, 5), + .mpllb_fracn2 = + REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_QUOT, 39321) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_REM, 3), +}; + +static const struct intel_mpllb_state dg2_dp_hbr1_100 = { + .clock = 270000, + .ref_control = + REG_FIELD_PREP(SNPS_PHY_REF_CONTROL_REF_RANGE, 3), + .mpllb_cp = + REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_INT, 4) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_PROP, 20) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_INT_GS, 65) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_PROP_GS, 127), + .mpllb_div = + REG_FIELD_PREP(SNPS_PHY_MPLLB_DIV5_CLK_EN, 1) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_TX_CLK_DIV, 1) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_V2I, 2) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_FREQ_VCO, 3), + .mpllb_div2 = + REG_FIELD_PREP(SNPS_PHY_MPLLB_REF_CLK_DIV, 2) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_MULTIPLIER, 184), + .mpllb_fracn1 = + REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_CGG_UPDATE_EN, 1) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_DEN, 1), +}; + +static const struct intel_mpllb_state dg2_dp_hbr2_100 = { + .clock = 540000, + .ref_control = + REG_FIELD_PREP(SNPS_PHY_REF_CONTROL_REF_RANGE, 3), + .mpllb_cp = + REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_INT, 4) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_PROP, 20) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_INT_GS, 65) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_PROP_GS, 127), + .mpllb_div = + REG_FIELD_PREP(SNPS_PHY_MPLLB_DIV5_CLK_EN, 1) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_V2I, 2) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_FREQ_VCO, 3), + .mpllb_div2 = + REG_FIELD_PREP(SNPS_PHY_MPLLB_REF_CLK_DIV, 2) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_MULTIPLIER, 184), + .mpllb_fracn1 = + REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_CGG_UPDATE_EN, 1) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_DEN, 1), +}; + +static const struct intel_mpllb_state dg2_dp_hbr3_100 = { + .clock = 810000, + .ref_control = + REG_FIELD_PREP(SNPS_PHY_REF_CONTROL_REF_RANGE, 3), + .mpllb_cp = + REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_INT, 4) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_PROP, 19) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_INT_GS, 65) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_PROP_GS, 127), + .mpllb_div = + REG_FIELD_PREP(SNPS_PHY_MPLLB_DIV5_CLK_EN, 1) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_V2I, 2), + .mpllb_div2 = + REG_FIELD_PREP(SNPS_PHY_MPLLB_REF_CLK_DIV, 2) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_MULTIPLIER, 292), + .mpllb_fracn1 = + REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_CGG_UPDATE_EN, 1) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_DEN, 1), +}; + +static const struct intel_mpllb_state *dg2_dp_100_tables[] = { + &dg2_dp_rbr_100, + &dg2_dp_hbr1_100, + &dg2_dp_hbr2_100, + &dg2_dp_hbr3_100, + NULL, +}; + +/* + * Basic DP link rates with 38.4 MHz reference clock. + */ + +static const struct intel_mpllb_state dg2_dp_rbr_38_4 = { + .clock = 162000, + .ref_control = + REG_FIELD_PREP(SNPS_PHY_REF_CONTROL_REF_RANGE, 1), + .mpllb_cp = + REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_INT, 5) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_PROP, 25) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_INT_GS, 65) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_PROP_GS, 127), + .mpllb_div = + REG_FIELD_PREP(SNPS_PHY_MPLLB_DIV5_CLK_EN, 1) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_TX_CLK_DIV, 2) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_PMIX_EN, 1) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_V2I, 2) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_FREQ_VCO, 2), + .mpllb_div2 = + REG_FIELD_PREP(SNPS_PHY_MPLLB_REF_CLK_DIV, 1) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_MULTIPLIER, 304), + .mpllb_fracn1 = + REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_CGG_UPDATE_EN, 1) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_EN, 1) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_DEN, 1), + .mpllb_fracn2 = + REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_QUOT, 49152), +}; + +static const struct intel_mpllb_state dg2_dp_hbr1_38_4 = { + .clock = 270000, + .ref_control = + REG_FIELD_PREP(SNPS_PHY_REF_CONTROL_REF_RANGE, 1), + .mpllb_cp = + REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_INT, 5) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_PROP, 25) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_INT_GS, 65) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_PROP_GS, 127), + .mpllb_div = + REG_FIELD_PREP(SNPS_PHY_MPLLB_DIV5_CLK_EN, 1) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_TX_CLK_DIV, 1) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_PMIX_EN, 1) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_V2I, 2) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_FREQ_VCO, 3), + .mpllb_div2 = + REG_FIELD_PREP(SNPS_PHY_MPLLB_REF_CLK_DIV, 1) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_MULTIPLIER, 248), + .mpllb_fracn1 = + REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_CGG_UPDATE_EN, 1) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_EN, 1) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_DEN, 1), + .mpllb_fracn2 = + REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_QUOT, 40960), +}; + +static const struct intel_mpllb_state dg2_dp_hbr2_38_4 = { + .clock = 540000, + .ref_control = + REG_FIELD_PREP(SNPS_PHY_REF_CONTROL_REF_RANGE, 1), + .mpllb_cp = + REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_INT, 5) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_PROP, 25) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_INT_GS, 65) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_PROP_GS, 127), + .mpllb_div = + REG_FIELD_PREP(SNPS_PHY_MPLLB_DIV5_CLK_EN, 1) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_PMIX_EN, 1) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_V2I, 2) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_FREQ_VCO, 3), + .mpllb_div2 = + REG_FIELD_PREP(SNPS_PHY_MPLLB_REF_CLK_DIV, 1) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_MULTIPLIER, 248), + .mpllb_fracn1 = + REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_CGG_UPDATE_EN, 1) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_EN, 1) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_DEN, 1), + .mpllb_fracn2 = + REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_QUOT, 40960), +}; + +static const struct intel_mpllb_state dg2_dp_hbr3_38_4 = { + .clock = 810000, + .ref_control = + REG_FIELD_PREP(SNPS_PHY_REF_CONTROL_REF_RANGE, 1), + .mpllb_cp = + REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_INT, 6) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_PROP, 26) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_INT_GS, 65) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_PROP_GS, 127), + .mpllb_div = + REG_FIELD_PREP(SNPS_PHY_MPLLB_DIV5_CLK_EN, 1) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_PMIX_EN, 1) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_V2I, 2), + .mpllb_div2 = + REG_FIELD_PREP(SNPS_PHY_MPLLB_REF_CLK_DIV, 1) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_MULTIPLIER, 388), + .mpllb_fracn1 = + REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_CGG_UPDATE_EN, 1) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_EN, 1) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_DEN, 1), + .mpllb_fracn2 = + REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_QUOT, 61440), +}; + +static const struct intel_mpllb_state *dg2_dp_38_4_tables[] = { + &dg2_dp_rbr_38_4, + &dg2_dp_hbr1_38_4, + &dg2_dp_hbr2_38_4, + &dg2_dp_hbr3_38_4, + NULL, +}; + +/* + * eDP link rates with 100 MHz reference clock. + */ + +static const struct intel_mpllb_state dg2_edp_r216 = { + .clock = 216000, + .ref_control = + REG_FIELD_PREP(SNPS_PHY_REF_CONTROL_REF_RANGE, 3), + .mpllb_cp = + REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_INT, 4) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_PROP, 19) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_INT_GS, 65) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_PROP_GS, 127), + .mpllb_div = + REG_FIELD_PREP(SNPS_PHY_MPLLB_DIV5_CLK_EN, 1) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_TX_CLK_DIV, 2) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_PMIX_EN, 1) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_V2I, 2), + .mpllb_div2 = + REG_FIELD_PREP(SNPS_PHY_MPLLB_REF_CLK_DIV, 2) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_MULTIPLIER, 312), + .mpllb_fracn1 = + REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_CGG_UPDATE_EN, 1) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_EN, 1) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_DEN, 5), + .mpllb_fracn2 = + REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_QUOT, 52428) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_REM, 4), + .mpllb_sscen = + REG_FIELD_PREP(SNPS_PHY_MPLLB_SSC_EN, 1) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_SSC_PEAK, 50961), + .mpllb_sscstep = + REG_FIELD_PREP(SNPS_PHY_MPLLB_SSC_STEPSIZE, 65752), +}; + +static const struct intel_mpllb_state dg2_edp_r243 = { + .clock = 243000, + .ref_control = + REG_FIELD_PREP(SNPS_PHY_REF_CONTROL_REF_RANGE, 3), + .mpllb_cp = + REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_INT, 4) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_PROP, 20) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_INT_GS, 65) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_PROP_GS, 127), + .mpllb_div = + REG_FIELD_PREP(SNPS_PHY_MPLLB_DIV5_CLK_EN, 1) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_TX_CLK_DIV, 2) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_PMIX_EN, 1) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_V2I, 2), + .mpllb_div2 = + REG_FIELD_PREP(SNPS_PHY_MPLLB_REF_CLK_DIV, 2) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_MULTIPLIER, 356), + .mpllb_fracn1 = + REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_CGG_UPDATE_EN, 1) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_EN, 1) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_DEN, 5), + .mpllb_fracn2 = + REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_QUOT, 26214) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_REM, 2), + .mpllb_sscen = + REG_FIELD_PREP(SNPS_PHY_MPLLB_SSC_EN, 1) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_SSC_PEAK, 57331), + .mpllb_sscstep = + REG_FIELD_PREP(SNPS_PHY_MPLLB_SSC_STEPSIZE, 73971), +}; + +static const struct intel_mpllb_state dg2_edp_r324 = { + .clock = 324000, + .ref_control = + REG_FIELD_PREP(SNPS_PHY_REF_CONTROL_REF_RANGE, 3), + .mpllb_cp = + REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_INT, 4) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_PROP, 20) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_INT_GS, 65) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_PROP_GS, 127), + .mpllb_div = + REG_FIELD_PREP(SNPS_PHY_MPLLB_DIV5_CLK_EN, 1) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_TX_CLK_DIV, 1) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_PMIX_EN, 1) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_V2I, 2) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_FREQ_VCO, 2), + .mpllb_div2 = + REG_FIELD_PREP(SNPS_PHY_MPLLB_REF_CLK_DIV, 2) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_MULTIPLIER, 226), + .mpllb_fracn1 = + REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_CGG_UPDATE_EN, 1) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_EN, 1) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_DEN, 5), + .mpllb_fracn2 = + REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_QUOT, 39321) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_REM, 3), + .mpllb_sscen = + REG_FIELD_PREP(SNPS_PHY_MPLLB_SSC_EN, 1) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_SSC_PEAK, 38221), + .mpllb_sscstep = + REG_FIELD_PREP(SNPS_PHY_MPLLB_SSC_STEPSIZE, 49314), +}; + +static const struct intel_mpllb_state dg2_edp_r432 = { + .clock = 432000, + .ref_control = + REG_FIELD_PREP(SNPS_PHY_REF_CONTROL_REF_RANGE, 3), + .mpllb_cp = + REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_INT, 4) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_PROP, 19) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_INT_GS, 65) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_PROP_GS, 127), + .mpllb_div = + REG_FIELD_PREP(SNPS_PHY_MPLLB_DIV5_CLK_EN, 1) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_TX_CLK_DIV, 1) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_PMIX_EN, 1) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_V2I, 2), + .mpllb_div2 = + REG_FIELD_PREP(SNPS_PHY_MPLLB_REF_CLK_DIV, 2) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_MULTIPLIER, 312), + .mpllb_fracn1 = + REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_CGG_UPDATE_EN, 1) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_EN, 1) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_DEN, 5), + .mpllb_fracn2 = + REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_QUOT, 52428) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_REM, 4), + .mpllb_sscen = + REG_FIELD_PREP(SNPS_PHY_MPLLB_SSC_EN, 1) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_SSC_PEAK, 50961), + .mpllb_sscstep = + REG_FIELD_PREP(SNPS_PHY_MPLLB_SSC_STEPSIZE, 65752), +}; + +static const struct intel_mpllb_state *dg2_edp_tables[] = { + &dg2_dp_rbr_100, + &dg2_edp_r216, + &dg2_edp_r243, + &dg2_dp_hbr1_100, + &dg2_edp_r324, + &dg2_edp_r432, + &dg2_dp_hbr2_100, + &dg2_dp_hbr3_100, + NULL, +}; + +/* + * HDMI link rates with 100 MHz reference clock. + */ + +static const struct intel_mpllb_state dg2_hdmi_25_175 = { + .clock = 25175, + .ref_control = + REG_FIELD_PREP(SNPS_PHY_REF_CONTROL_REF_RANGE, 3), + .mpllb_cp = + REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_INT, 5) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_PROP, 15) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_INT_GS, 64) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_PROP_GS, 124), + .mpllb_div = + REG_FIELD_PREP(SNPS_PHY_MPLLB_DIV5_CLK_EN, 1) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_TX_CLK_DIV, 5) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_PMIX_EN, 1) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_V2I, 2), + .mpllb_div2 = + REG_FIELD_PREP(SNPS_PHY_MPLLB_REF_CLK_DIV, 1) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_MULTIPLIER, 128) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_HDMI_DIV, 1), + .mpllb_fracn1 = + REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_CGG_UPDATE_EN, 1) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_EN, 1) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_DEN, 143), + .mpllb_fracn2 = + REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_QUOT, 36663) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_REM, 71), + .mpllb_sscen = + REG_FIELD_PREP(SNPS_PHY_MPLLB_SSC_UP_SPREAD, 1), +}; + +static const struct intel_mpllb_state dg2_hdmi_27_0 = { + .clock = 27000, + .ref_control = + REG_FIELD_PREP(SNPS_PHY_REF_CONTROL_REF_RANGE, 3), + .mpllb_cp = + REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_INT, 5) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_PROP, 15) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_INT_GS, 64) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_PROP_GS, 124), + .mpllb_div = + REG_FIELD_PREP(SNPS_PHY_MPLLB_DIV5_CLK_EN, 1) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_TX_CLK_DIV, 5) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_PMIX_EN, 1) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_V2I, 2), + .mpllb_div2 = + REG_FIELD_PREP(SNPS_PHY_MPLLB_REF_CLK_DIV, 1) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_MULTIPLIER, 140) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_HDMI_DIV, 1), + .mpllb_fracn1 = + REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_CGG_UPDATE_EN, 1) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_EN, 1) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_DEN, 5), + .mpllb_fracn2 = + REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_QUOT, 26214) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_REM, 2), + .mpllb_sscen = + REG_FIELD_PREP(SNPS_PHY_MPLLB_SSC_UP_SPREAD, 1), +}; + +static const struct intel_mpllb_state dg2_hdmi_74_25 = { + .clock = 74250, + .ref_control = + REG_FIELD_PREP(SNPS_PHY_REF_CONTROL_REF_RANGE, 3), + .mpllb_cp = + REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_INT, 4) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_PROP, 15) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_INT_GS, 64) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_PROP_GS, 124), + .mpllb_div = + REG_FIELD_PREP(SNPS_PHY_MPLLB_DIV5_CLK_EN, 1) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_TX_CLK_DIV, 3) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_PMIX_EN, 1) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_V2I, 2) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_FREQ_VCO, 3), + .mpllb_div2 = + REG_FIELD_PREP(SNPS_PHY_MPLLB_REF_CLK_DIV, 1) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_MULTIPLIER, 86) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_HDMI_DIV, 1), + .mpllb_fracn1 = + REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_CGG_UPDATE_EN, 1) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_EN, 1) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_DEN, 5), + .mpllb_fracn2 = + REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_QUOT, 26214) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_REM, 2), + .mpllb_sscen = + REG_FIELD_PREP(SNPS_PHY_MPLLB_SSC_UP_SPREAD, 1), +}; + +static const struct intel_mpllb_state dg2_hdmi_148_5 = { + .clock = 148500, + .ref_control = + REG_FIELD_PREP(SNPS_PHY_REF_CONTROL_REF_RANGE, 3), + .mpllb_cp = + REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_INT, 4) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_PROP, 15) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_INT_GS, 64) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_PROP_GS, 124), + .mpllb_div = + REG_FIELD_PREP(SNPS_PHY_MPLLB_DIV5_CLK_EN, 1) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_TX_CLK_DIV, 2) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_PMIX_EN, 1) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_V2I, 2) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_FREQ_VCO, 3), + .mpllb_div2 = + REG_FIELD_PREP(SNPS_PHY_MPLLB_REF_CLK_DIV, 1) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_MULTIPLIER, 86) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_HDMI_DIV, 1), + .mpllb_fracn1 = + REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_CGG_UPDATE_EN, 1) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_EN, 1) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_DEN, 5), + .mpllb_fracn2 = + REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_QUOT, 26214) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_REM, 2), + .mpllb_sscen = + REG_FIELD_PREP(SNPS_PHY_MPLLB_SSC_UP_SPREAD, 1), +}; + +static const struct intel_mpllb_state dg2_hdmi_594 = { + .clock = 594000, + .ref_control = + REG_FIELD_PREP(SNPS_PHY_REF_CONTROL_REF_RANGE, 3), + .mpllb_cp = + REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_INT, 4) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_PROP, 15) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_INT_GS, 64) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_PROP_GS, 124), + .mpllb_div = + REG_FIELD_PREP(SNPS_PHY_MPLLB_DIV5_CLK_EN, 1) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_PMIX_EN, 1) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_V2I, 2) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_FREQ_VCO, 3), + .mpllb_div2 = + REG_FIELD_PREP(SNPS_PHY_MPLLB_REF_CLK_DIV, 1) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_MULTIPLIER, 86) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_HDMI_DIV, 1), + .mpllb_fracn1 = + REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_CGG_UPDATE_EN, 1) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_EN, 1) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_DEN, 5), + .mpllb_fracn2 = + REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_QUOT, 26214) | + REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_REM, 2), + .mpllb_sscen = + REG_FIELD_PREP(SNPS_PHY_MPLLB_SSC_UP_SPREAD, 1), +}; + +static const struct intel_mpllb_state *dg2_hdmi_tables[] = { + &dg2_hdmi_25_175, + &dg2_hdmi_27_0, + &dg2_hdmi_74_25, + &dg2_hdmi_148_5, + &dg2_hdmi_594, + NULL, +}; + +static const struct intel_mpllb_state ** +intel_mpllb_tables_get(struct intel_crtc_state *crtc_state, + struct intel_encoder *encoder) +{ + if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_EDP)) { + return dg2_edp_tables; + } else if (intel_crtc_has_dp_encoder(crtc_state)) { + /* + * FIXME: Initially we're just enabling the "combo" outputs on + * port A-D. The MPLLB for those ports takes an input from the + * "Display Filter PLL" which always has an output frequency + * of 100 MHz, hence the use of the _100 tables below. + * + * Once we enable port TC1 it will either use the same 100 MHz + * "Display Filter PLL" (when strapped to support a native + * display connection) or different 38.4 MHz "Filter PLL" when + * strapped to support a USB connection, so we'll need to check + * that to determine which table to use. + */ + if (0) + return dg2_dp_38_4_tables; + else + return dg2_dp_100_tables; + } else if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_HDMI)) { + return dg2_hdmi_tables; + } + + MISSING_CASE(encoder->type); + return NULL; +} + +int intel_mpllb_calc_state(struct intel_crtc_state *crtc_state, + struct intel_encoder *encoder) +{ + const struct intel_mpllb_state **tables; + int i; + + if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_HDMI)) { + if (intel_snps_phy_check_hdmi_link_rate(crtc_state->port_clock) + != MODE_OK) { + /* + * FIXME: Can only support fixed HDMI frequencies + * until we have a proper algorithm under a valid + * license. + */ + DRM_DEBUG_KMS("Can't support HDMI link rate %d\n", + crtc_state->port_clock); + return -EINVAL; + } + } + + tables = intel_mpllb_tables_get(crtc_state, encoder); + if (!tables) + return -EINVAL; + + for (i = 0; tables[i]; i++) { + if (crtc_state->port_clock <= tables[i]->clock) { + crtc_state->mpllb_state = *tables[i]; + return 0; + } + } + + return -EINVAL; +} + +void intel_mpllb_enable(struct intel_encoder *encoder, + const struct intel_crtc_state *crtc_state) +{ + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + const struct intel_mpllb_state *pll_state = &crtc_state->mpllb_state; + enum phy phy = intel_port_to_phy(dev_priv, encoder->port); + i915_reg_t enable_reg = (phy <= PHY_D ? + DG2_PLL_ENABLE(phy) : MG_PLL_ENABLE(0)); + + /* + * 3. Software programs the following PLL registers for the desired + * frequency. + */ + intel_de_write(dev_priv, SNPS_PHY_MPLLB_CP(phy), pll_state->mpllb_cp); + intel_de_write(dev_priv, SNPS_PHY_MPLLB_DIV(phy), pll_state->mpllb_div); + intel_de_write(dev_priv, SNPS_PHY_MPLLB_DIV2(phy), pll_state->mpllb_div2); + intel_de_write(dev_priv, SNPS_PHY_MPLLB_SSCEN(phy), pll_state->mpllb_sscen); + intel_de_write(dev_priv, SNPS_PHY_MPLLB_SSCSTEP(phy), pll_state->mpllb_sscstep); + intel_de_write(dev_priv, SNPS_PHY_MPLLB_FRACN1(phy), pll_state->mpllb_fracn1); + intel_de_write(dev_priv, SNPS_PHY_MPLLB_FRACN2(phy), pll_state->mpllb_fracn2); + + /* + * 4. If the frequency will result in a change to the voltage + * requirement, follow the Display Voltage Frequency Switching - + * Sequence Before Frequency Change. + * + * We handle this step in bxt_set_cdclk(). + */ + + /* 5. Software sets DPLL_ENABLE [PLL Enable] to "1". */ + intel_uncore_rmw(&dev_priv->uncore, enable_reg, 0, PLL_ENABLE); + + /* + * 9. Software sets SNPS_PHY_MPLLB_DIV dp_mpllb_force_en to "1". This + * will keep the PLL running during the DDI lane programming and any + * typeC DP cable disconnect. Do not set the force before enabling the + * PLL because that will start the PLL before it has sampled the + * divider values. + */ + intel_de_write(dev_priv, SNPS_PHY_MPLLB_DIV(phy), + pll_state->mpllb_div | SNPS_PHY_MPLLB_FORCE_EN); + + /* + * 10. Software polls on register DPLL_ENABLE [PLL Lock] to confirm PLL + * is locked at new settings. This register bit is sampling PHY + * dp_mpllb_state interface signal. + */ + if (intel_de_wait_for_set(dev_priv, enable_reg, PLL_LOCK, 5)) + DRM_ERROR("Port %c PLL not locked\n", phy_name(phy)); + + /* + * 11. If the frequency will result in a change to the voltage + * requirement, follow the Display Voltage Frequency Switching - + * Sequence After Frequency Change. + * + * We handle this step in bxt_set_cdclk(). + */ +} + +void intel_mpllb_disable(struct intel_encoder *encoder) +{ + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + enum phy phy = intel_port_to_phy(dev_priv, encoder->port); + i915_reg_t enable_reg = (phy <= PHY_D ? + DG2_PLL_ENABLE(phy) : MG_PLL_ENABLE(0)); + + /* + * 1. If the frequency will result in a change to the voltage + * requirement, follow the Display Voltage Frequency Switching - + * Sequence Before Frequency Change. + * + * We handle this step in bxt_set_cdclk(). + */ + + /* 2. Software programs DPLL_ENABLE [PLL Enable] to "0" */ + intel_uncore_rmw(&dev_priv->uncore, enable_reg, PLL_ENABLE, 0); + + /* + * 4. Software programs SNPS_PHY_MPLLB_DIV dp_mpllb_force_en to "0". + * This will allow the PLL to stop running. + */ + intel_uncore_rmw(&dev_priv->uncore, SNPS_PHY_MPLLB_DIV(phy), + SNPS_PHY_MPLLB_FORCE_EN, 0); + + /* + * 5. Software polls DPLL_ENABLE [PLL Lock] for PHY acknowledgment + * (dp_txX_ack) that the new transmitter setting request is completed. + */ + if (intel_de_wait_for_clear(dev_priv, enable_reg, PLL_LOCK, 5)) + DRM_ERROR("Port %c PLL not locked\n", phy_name(phy)); + + /* + * 6. If the frequency will result in a change to the voltage + * requirement, follow the Display Voltage Frequency Switching - + * Sequence After Frequency Change. + * + * We handle this step in bxt_set_cdclk(). + */ +} + +int intel_mpllb_calc_port_clock(struct intel_encoder *encoder, + const struct intel_mpllb_state *pll_state) +{ + unsigned int frac_quot = 0, frac_rem = 0, frac_den = 1; + unsigned int multiplier, tx_clk_div, refclk; + bool frac_en; + + if (0) + refclk = 38400; + else + refclk = 100000; + + refclk >>= REG_FIELD_GET(SNPS_PHY_MPLLB_REF_CLK_DIV, pll_state->mpllb_div2) - 1; + + frac_en = REG_FIELD_GET(SNPS_PHY_MPLLB_FRACN_EN, pll_state->mpllb_fracn1); + + if (frac_en) { + frac_quot = REG_FIELD_GET(SNPS_PHY_MPLLB_FRACN_QUOT, pll_state->mpllb_fracn2); + frac_rem = REG_FIELD_GET(SNPS_PHY_MPLLB_FRACN_REM, pll_state->mpllb_fracn2); + frac_den = REG_FIELD_GET(SNPS_PHY_MPLLB_FRACN_DEN, pll_state->mpllb_fracn1); + } + + multiplier = REG_FIELD_GET(SNPS_PHY_MPLLB_MULTIPLIER, pll_state->mpllb_div2) / 2 + 16; + + tx_clk_div = REG_FIELD_GET(SNPS_PHY_MPLLB_TX_CLK_DIV, pll_state->mpllb_div); + + return DIV_ROUND_CLOSEST_ULL(mul_u32_u32(refclk, (multiplier << 16) + frac_quot) + + DIV_ROUND_CLOSEST(refclk * frac_rem, frac_den), + 10 << (tx_clk_div + 16)); +} + +void intel_mpllb_readout_hw_state(struct intel_encoder *encoder, + struct intel_mpllb_state *pll_state) +{ + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + enum phy phy = intel_port_to_phy(dev_priv, encoder->port); + + pll_state->mpllb_cp = intel_de_read(dev_priv, SNPS_PHY_MPLLB_CP(phy)); + pll_state->mpllb_div = intel_de_read(dev_priv, SNPS_PHY_MPLLB_DIV(phy)); + pll_state->mpllb_div2 = intel_de_read(dev_priv, SNPS_PHY_MPLLB_DIV2(phy)); + pll_state->mpllb_sscen = intel_de_read(dev_priv, SNPS_PHY_MPLLB_SSCEN(phy)); + pll_state->mpllb_sscstep = intel_de_read(dev_priv, SNPS_PHY_MPLLB_SSCSTEP(phy)); + pll_state->mpllb_fracn1 = intel_de_read(dev_priv, SNPS_PHY_MPLLB_FRACN1(phy)); + pll_state->mpllb_fracn2 = intel_de_read(dev_priv, SNPS_PHY_MPLLB_FRACN2(phy)); + + /* + * REF_CONTROL is under firmware control and never programmed by the + * driver; we read it only for sanity checking purposes. The bspec + * only tells us the expected value for one field in this register, + * so we'll only read out those specific bits here. + */ + pll_state->ref_control = intel_de_read(dev_priv, SNPS_PHY_REF_CONTROL(phy)) & + SNPS_PHY_REF_CONTROL_REF_RANGE; + + /* + * MPLLB_DIV is programmed twice, once with the software-computed + * state, then again with the MPLLB_FORCE_EN bit added. Drop that + * extra bit during readout so that we return the actual expected + * software state. + */ + pll_state->mpllb_div &= ~SNPS_PHY_MPLLB_FORCE_EN; +} + +int intel_snps_phy_check_hdmi_link_rate(int clock) +{ + const struct intel_mpllb_state **tables = dg2_hdmi_tables; + int i; + + for (i = 0; tables[i]; i++) { + if (clock == tables[i]->clock) + return MODE_OK; + } + + return MODE_CLOCK_RANGE; +} diff --git a/drivers/gpu/drm/i915/display/intel_snps_phy.h b/drivers/gpu/drm/i915/display/intel_snps_phy.h new file mode 100644 index 000000000000..6261ff88ef5c --- /dev/null +++ b/drivers/gpu/drm/i915/display/intel_snps_phy.h @@ -0,0 +1,35 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2019 Intel Corporation + */ + +#ifndef __INTEL_SNPS_PHY_H__ +#define __INTEL_SNPS_PHY_H__ + +#include <linux/types.h> + +struct drm_i915_private; +struct intel_encoder; +struct intel_crtc_state; +struct intel_mpllb_state; +enum phy; + +void intel_snps_phy_wait_for_calibration(struct drm_i915_private *dev_priv); +void intel_snps_phy_update_psr_power_state(struct drm_i915_private *dev_priv, + enum phy phy, bool enable); + +int intel_mpllb_calc_state(struct intel_crtc_state *crtc_state, + struct intel_encoder *encoder); +void intel_mpllb_enable(struct intel_encoder *encoder, + const struct intel_crtc_state *crtc_state); +void intel_mpllb_disable(struct intel_encoder *encoder); +void intel_mpllb_readout_hw_state(struct intel_encoder *encoder, + struct intel_mpllb_state *pll_state); +int intel_mpllb_calc_port_clock(struct intel_encoder *encoder, + const struct intel_mpllb_state *pll_state); + +int intel_snps_phy_check_hdmi_link_rate(int clock); +void intel_snps_phy_ddi_vswing_sequence(struct intel_encoder *encoder, + u32 level); + +#endif /* __INTEL_SNPS_PHY_H__ */ diff --git a/drivers/gpu/drm/i915/display/intel_sprite.c b/drivers/gpu/drm/i915/display/intel_sprite.c index 4ae9a7455b23..08116f41da26 100644 --- a/drivers/gpu/drm/i915/display/intel_sprite.c +++ b/drivers/gpu/drm/i915/display/intel_sprite.c @@ -1856,7 +1856,7 @@ intel_sprite_plane_create(struct drm_i915_private *dev_priv, zpos = sprite + 1; drm_plane_create_zpos_immutable_property(&plane->base, zpos); - drm_plane_helper_add(&plane->base, &intel_plane_helper_funcs); + intel_plane_helper_add(plane); return plane; diff --git a/drivers/gpu/drm/i915/display/intel_tc.c b/drivers/gpu/drm/i915/display/intel_tc.c index c23c210a55f5..3ffece568ed9 100644 --- a/drivers/gpu/drm/i915/display/intel_tc.c +++ b/drivers/gpu/drm/i915/display/intel_tc.c @@ -556,7 +556,7 @@ intel_tc_port_get_target_mode(struct intel_digital_port *dig_port) } static void intel_tc_port_reset_mode(struct intel_digital_port *dig_port, - int required_lanes) + int required_lanes, bool force_disconnect) { struct drm_i915_private *i915 = to_i915(dig_port->base.base.dev); enum tc_port_mode old_tc_mode = dig_port->tc_mode; @@ -572,7 +572,8 @@ static void intel_tc_port_reset_mode(struct intel_digital_port *dig_port, } icl_tc_phy_disconnect(dig_port); - icl_tc_phy_connect(dig_port, required_lanes); + if (!force_disconnect) + icl_tc_phy_connect(dig_port, required_lanes); drm_dbg_kms(&i915->drm, "Port %s: TC port mode reset (%s -> %s)\n", dig_port->tc_port_name, @@ -662,7 +663,7 @@ bool intel_tc_port_connected(struct intel_encoder *encoder) } static void __intel_tc_port_lock(struct intel_digital_port *dig_port, - int required_lanes) + int required_lanes, bool force_disconnect) { struct drm_i915_private *i915 = to_i915(dig_port->base.base.dev); intel_wakeref_t wakeref; @@ -676,8 +677,9 @@ static void __intel_tc_port_lock(struct intel_digital_port *dig_port, tc_cold_wref = tc_cold_block(dig_port); - if (intel_tc_port_needs_reset(dig_port)) - intel_tc_port_reset_mode(dig_port, required_lanes); + if (force_disconnect || intel_tc_port_needs_reset(dig_port)) + intel_tc_port_reset_mode(dig_port, required_lanes, + force_disconnect); tc_cold_unblock(dig_port, tc_cold_wref); } @@ -688,7 +690,7 @@ static void __intel_tc_port_lock(struct intel_digital_port *dig_port, void intel_tc_port_lock(struct intel_digital_port *dig_port) { - __intel_tc_port_lock(dig_port, 1); + __intel_tc_port_lock(dig_port, 1, false); } void intel_tc_port_unlock(struct intel_digital_port *dig_port) @@ -702,6 +704,24 @@ void intel_tc_port_unlock(struct intel_digital_port *dig_port) wakeref); } +/** + * intel_tc_port_disconnect_phy: disconnect TypeC PHY from display port + * @dig_port: digital port + * + * Disconnect the given digital port from its TypeC PHY (handing back the + * control of the PHY to the TypeC subsystem). The only purpose of this + * function is to force the disconnect even with a TypeC display output still + * plugged to the TypeC connector, which is required by the TypeC firmwares + * during system suspend and shutdown. Otherwise - during the unplug event + * handling - the PHY ownership is released automatically by + * intel_tc_port_reset_mode(), when calling this function is not required. + */ +void intel_tc_port_disconnect_phy(struct intel_digital_port *dig_port) +{ + __intel_tc_port_lock(dig_port, 1, true); + intel_tc_port_unlock(dig_port); +} + bool intel_tc_port_ref_held(struct intel_digital_port *dig_port) { return mutex_is_locked(&dig_port->tc_lock) || @@ -711,7 +731,7 @@ bool intel_tc_port_ref_held(struct intel_digital_port *dig_port) void intel_tc_port_get_link(struct intel_digital_port *dig_port, int required_lanes) { - __intel_tc_port_lock(dig_port, required_lanes); + __intel_tc_port_lock(dig_port, required_lanes, false); dig_port->tc_link_refcount++; intel_tc_port_unlock(dig_port); } diff --git a/drivers/gpu/drm/i915/display/intel_tc.h b/drivers/gpu/drm/i915/display/intel_tc.h index 0eacbd76ec15..0c881f645e27 100644 --- a/drivers/gpu/drm/i915/display/intel_tc.h +++ b/drivers/gpu/drm/i915/display/intel_tc.h @@ -13,6 +13,8 @@ struct intel_digital_port; struct intel_encoder; bool intel_tc_port_connected(struct intel_encoder *encoder); +void intel_tc_port_disconnect_phy(struct intel_digital_port *dig_port); + u32 intel_tc_port_get_lane_mask(struct intel_digital_port *dig_port); u32 intel_tc_port_get_pin_assignment_mask(struct intel_digital_port *dig_port); int intel_tc_port_fia_max_lane_count(struct intel_digital_port *dig_port); diff --git a/drivers/gpu/drm/i915/display/intel_tv.c b/drivers/gpu/drm/i915/display/intel_tv.c index aa52af7891f0..d02f09f7e750 100644 --- a/drivers/gpu/drm/i915/display/intel_tv.c +++ b/drivers/gpu/drm/i915/display/intel_tv.c @@ -1420,7 +1420,7 @@ static void intel_tv_pre_enable(struct intel_atomic_state *state, const struct drm_connector_state *conn_state) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - struct intel_crtc *intel_crtc = to_intel_crtc(pipe_config->uapi.crtc); + struct intel_crtc *crtc = to_intel_crtc(pipe_config->uapi.crtc); struct intel_tv *intel_tv = enc_to_tv(encoder); const struct intel_tv_connector_state *tv_conn_state = to_intel_tv_connector_state(conn_state); @@ -1466,7 +1466,7 @@ static void intel_tv_pre_enable(struct intel_atomic_state *state, break; } - tv_ctl |= TV_ENC_PIPE_SEL(intel_crtc->pipe); + tv_ctl |= TV_ENC_PIPE_SEL(crtc->pipe); switch (tv_mode->oversample) { case 8: @@ -1571,8 +1571,7 @@ static int intel_tv_detect_type(struct intel_tv *intel_tv, struct drm_connector *connector) { - struct drm_crtc *crtc = connector->state->crtc; - struct intel_crtc *intel_crtc = to_intel_crtc(crtc); + struct intel_crtc *crtc = to_intel_crtc(connector->state->crtc); struct drm_device *dev = connector->dev; struct drm_i915_private *dev_priv = to_i915(dev); u32 tv_ctl, save_tv_ctl; @@ -1594,7 +1593,7 @@ intel_tv_detect_type(struct intel_tv *intel_tv, /* Poll for TV detection */ tv_ctl &= ~(TV_ENC_ENABLE | TV_ENC_PIPE_SEL_MASK | TV_TEST_MODE_MASK); tv_ctl |= TV_TEST_MODE_MONITOR_DETECT; - tv_ctl |= TV_ENC_PIPE_SEL(intel_crtc->pipe); + tv_ctl |= TV_ENC_PIPE_SEL(crtc->pipe); tv_dac &= ~(TVDAC_SENSE_MASK | DAC_A_MASK | DAC_B_MASK | DAC_C_MASK); tv_dac |= (TVDAC_STATE_CHG_EN | @@ -1619,7 +1618,7 @@ intel_tv_detect_type(struct intel_tv *intel_tv, intel_de_write(dev_priv, TV_DAC, tv_dac); intel_de_posting_read(dev_priv, TV_DAC); - intel_wait_for_vblank(dev_priv, intel_crtc->pipe); + intel_wait_for_vblank(dev_priv, crtc->pipe); type = -1; tv_dac = intel_de_read(dev_priv, TV_DAC); @@ -1652,7 +1651,7 @@ intel_tv_detect_type(struct intel_tv *intel_tv, intel_de_posting_read(dev_priv, TV_CTL); /* For unknown reasons the hw barfs if we don't do this vblank wait. */ - intel_wait_for_vblank(dev_priv, intel_crtc->pipe); + intel_wait_for_vblank(dev_priv, crtc->pipe); /* Restore interrupt config */ if (connector->polled & DRM_CONNECTOR_POLL_HPD) { diff --git a/drivers/gpu/drm/i915/display/intel_vbt_defs.h b/drivers/gpu/drm/i915/display/intel_vbt_defs.h index dbe24d7e7375..330077c2e588 100644 --- a/drivers/gpu/drm/i915/display/intel_vbt_defs.h +++ b/drivers/gpu/drm/i915/display/intel_vbt_defs.h @@ -456,7 +456,7 @@ struct child_device_config { u16 dp_gpio_pin_num; /* 195 */ u8 dp_iboost_level:4; /* 196 */ u8 hdmi_iboost_level:4; /* 196 */ - u8 dp_max_link_rate:3; /* 216/230 CNL+ */ + u8 dp_max_link_rate:3; /* 216/230 GLK+ */ u8 dp_max_link_rate_reserved:5; /* 216/230 */ } __packed; diff --git a/drivers/gpu/drm/i915/display/intel_vdsc.c b/drivers/gpu/drm/i915/display/intel_vdsc.c index 85749370508c..df3286aa6999 100644 --- a/drivers/gpu/drm/i915/display/intel_vdsc.c +++ b/drivers/gpu/drm/i915/display/intel_vdsc.c @@ -348,7 +348,10 @@ bool intel_dsc_source_support(const struct intel_crtc_state *crtc_state) if (DISPLAY_VER(i915) >= 12) return true; - if ((DISPLAY_VER(i915) >= 11 || IS_CANNONLAKE(i915)) && (pipe != PIPE_A || (cpu_transcoder == TRANSCODER_EDP || cpu_transcoder == TRANSCODER_DSI_0 || cpu_transcoder == TRANSCODER_DSI_1))) + if (DISPLAY_VER(i915) >= 11 && + (pipe != PIPE_A || cpu_transcoder == TRANSCODER_EDP || + cpu_transcoder == TRANSCODER_DSI_0 || + cpu_transcoder == TRANSCODER_DSI_1)) return true; return false; diff --git a/drivers/gpu/drm/i915/display/intel_vga.c b/drivers/gpu/drm/i915/display/intel_vga.c index f002b82ba9c0..fa779f7ea415 100644 --- a/drivers/gpu/drm/i915/display/intel_vga.c +++ b/drivers/gpu/drm/i915/display/intel_vga.c @@ -29,6 +29,9 @@ void intel_vga_disable(struct drm_i915_private *dev_priv) i915_reg_t vga_reg = intel_vga_cntrl_reg(dev_priv); u8 sr1; + if (intel_de_read(dev_priv, vga_reg) & VGA_DISP_DISABLE) + return; + /* WaEnableVGAAccessThroughIOPort:ctg,elk,ilk,snb,ivb,vlv,hsw */ vga_get_uninterruptible(pdev, VGA_RSRC_LEGACY_IO); outb(SR01, VGA_SR_INDEX); @@ -121,9 +124,9 @@ intel_vga_set_state(struct drm_i915_private *i915, bool enable_decode) } static unsigned int -intel_vga_set_decode(void *cookie, bool enable_decode) +intel_vga_set_decode(struct pci_dev *pdev, bool enable_decode) { - struct drm_i915_private *i915 = cookie; + struct drm_i915_private *i915 = pdev_to_i915(pdev); intel_vga_set_state(i915, enable_decode); @@ -136,6 +139,7 @@ intel_vga_set_decode(void *cookie, bool enable_decode) int intel_vga_register(struct drm_i915_private *i915) { + struct pci_dev *pdev = to_pci_dev(i915->drm.dev); int ret; @@ -147,7 +151,7 @@ int intel_vga_register(struct drm_i915_private *i915) * then we do not take part in VGA arbitration and the * vga_client_register() fails with -ENODEV. */ - ret = vga_client_register(pdev, i915, NULL, intel_vga_set_decode); + ret = vga_client_register(pdev, intel_vga_set_decode); if (ret && ret != -ENODEV) return ret; @@ -158,5 +162,5 @@ void intel_vga_unregister(struct drm_i915_private *i915) { struct pci_dev *pdev = to_pci_dev(i915->drm.dev); - vga_client_register(pdev, NULL, NULL, NULL); + vga_client_unregister(pdev); } diff --git a/drivers/gpu/drm/i915/display/skl_scaler.c b/drivers/gpu/drm/i915/display/skl_scaler.c index 394b7bbf48d8..37eabeff8197 100644 --- a/drivers/gpu/drm/i915/display/skl_scaler.c +++ b/drivers/gpu/drm/i915/display/skl_scaler.c @@ -96,9 +96,8 @@ skl_update_scaler(struct intel_crtc_state *crtc_state, bool force_detach, { struct intel_crtc_scaler_state *scaler_state = &crtc_state->scaler_state; - struct intel_crtc *intel_crtc = - to_intel_crtc(crtc_state->uapi.crtc); - struct drm_i915_private *dev_priv = to_i915(intel_crtc->base.dev); + struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); const struct drm_display_mode *adjusted_mode = &crtc_state->hw.adjusted_mode; @@ -141,7 +140,7 @@ skl_update_scaler(struct intel_crtc_state *crtc_state, bool force_detach, drm_dbg_kms(&dev_priv->drm, "scaler_user index %u.%u: " "Staged freeing scaler id %d scaler_users = 0x%x\n", - intel_crtc->pipe, scaler_user, *scaler_id, + crtc->pipe, scaler_user, *scaler_id, scaler_state->scaler_users); *scaler_id = -1; } @@ -167,7 +166,7 @@ skl_update_scaler(struct intel_crtc_state *crtc_state, bool force_detach, drm_dbg_kms(&dev_priv->drm, "scaler_user index %u.%u: src %ux%u dst %ux%u " "size is out of scaler range\n", - intel_crtc->pipe, scaler_user, src_w, src_h, + crtc->pipe, scaler_user, src_w, src_h, dst_w, dst_h); return -EINVAL; } @@ -176,7 +175,7 @@ skl_update_scaler(struct intel_crtc_state *crtc_state, bool force_detach, scaler_state->scaler_users |= (1 << scaler_user); drm_dbg_kms(&dev_priv->drm, "scaler_user index %u.%u: " "staged scaling request for %ux%u->%ux%u scaler_users = 0x%x\n", - intel_crtc->pipe, scaler_user, src_w, src_h, dst_w, dst_h, + crtc->pipe, scaler_user, src_w, src_h, dst_w, dst_h, scaler_state->scaler_users); return 0; @@ -295,12 +294,12 @@ int skl_update_scaler_plane(struct intel_crtc_state *crtc_state, return 0; } -static int cnl_coef_tap(int i) +static int glk_coef_tap(int i) { return i % 7; } -static u16 cnl_nearest_filter_coef(int t) +static u16 glk_nearest_filter_coef(int t) { return t == 3 ? 0x0800 : 0x3000; } @@ -342,29 +341,29 @@ static u16 cnl_nearest_filter_coef(int t) * */ -static void cnl_program_nearest_filter_coefs(struct drm_i915_private *dev_priv, +static void glk_program_nearest_filter_coefs(struct drm_i915_private *dev_priv, enum pipe pipe, int id, int set) { int i; - intel_de_write_fw(dev_priv, CNL_PS_COEF_INDEX_SET(pipe, id, set), + intel_de_write_fw(dev_priv, GLK_PS_COEF_INDEX_SET(pipe, id, set), PS_COEE_INDEX_AUTO_INC); for (i = 0; i < 17 * 7; i += 2) { u32 tmp; int t; - t = cnl_coef_tap(i); - tmp = cnl_nearest_filter_coef(t); + t = glk_coef_tap(i); + tmp = glk_nearest_filter_coef(t); - t = cnl_coef_tap(i + 1); - tmp |= cnl_nearest_filter_coef(t) << 16; + t = glk_coef_tap(i + 1); + tmp |= glk_nearest_filter_coef(t) << 16; - intel_de_write_fw(dev_priv, CNL_PS_COEF_DATA_SET(pipe, id, set), + intel_de_write_fw(dev_priv, GLK_PS_COEF_DATA_SET(pipe, id, set), tmp); } - intel_de_write_fw(dev_priv, CNL_PS_COEF_INDEX_SET(pipe, id, set), 0); + intel_de_write_fw(dev_priv, GLK_PS_COEF_INDEX_SET(pipe, id, set), 0); } static u32 skl_scaler_get_filter_select(enum drm_scaling_filter filter, int set) @@ -387,7 +386,7 @@ static void skl_scaler_setup_filter(struct drm_i915_private *dev_priv, enum pipe case DRM_SCALING_FILTER_DEFAULT: break; case DRM_SCALING_FILTER_NEAREST_NEIGHBOR: - cnl_program_nearest_filter_coefs(dev_priv, pipe, id, set); + glk_program_nearest_filter_coefs(dev_priv, pipe, id, set); break; default: MISSING_CASE(filter); @@ -515,17 +514,17 @@ skl_program_plane_scaler(struct intel_plane *plane, (crtc_w << 16) | crtc_h); } -static void skl_detach_scaler(struct intel_crtc *intel_crtc, int id) +static void skl_detach_scaler(struct intel_crtc *crtc, int id) { - struct drm_device *dev = intel_crtc->base.dev; + struct drm_device *dev = crtc->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); unsigned long irqflags; spin_lock_irqsave(&dev_priv->uncore.lock, irqflags); - intel_de_write_fw(dev_priv, SKL_PS_CTRL(intel_crtc->pipe, id), 0); - intel_de_write_fw(dev_priv, SKL_PS_WIN_POS(intel_crtc->pipe, id), 0); - intel_de_write_fw(dev_priv, SKL_PS_WIN_SZ(intel_crtc->pipe, id), 0); + intel_de_write_fw(dev_priv, SKL_PS_CTRL(crtc->pipe, id), 0); + intel_de_write_fw(dev_priv, SKL_PS_WIN_POS(crtc->pipe, id), 0); + intel_de_write_fw(dev_priv, SKL_PS_WIN_SZ(crtc->pipe, id), 0); spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags); } @@ -535,15 +534,15 @@ static void skl_detach_scaler(struct intel_crtc *intel_crtc, int id) */ void skl_detach_scalers(const struct intel_crtc_state *crtc_state) { - struct intel_crtc *intel_crtc = to_intel_crtc(crtc_state->uapi.crtc); + struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); const struct intel_crtc_scaler_state *scaler_state = &crtc_state->scaler_state; int i; /* loop through and disable scalers that aren't in use */ - for (i = 0; i < intel_crtc->num_scalers; i++) { + for (i = 0; i < crtc->num_scalers; i++) { if (!scaler_state->scalers[i].in_use) - skl_detach_scaler(intel_crtc, i); + skl_detach_scaler(crtc, i); } } diff --git a/drivers/gpu/drm/i915/display/skl_universal_plane.c b/drivers/gpu/drm/i915/display/skl_universal_plane.c index 92a4fd508e92..724e7b04f3b6 100644 --- a/drivers/gpu/drm/i915/display/skl_universal_plane.c +++ b/drivers/gpu/drm/i915/display/skl_universal_plane.c @@ -835,7 +835,7 @@ static u32 skl_plane_ctl_rotate(unsigned int rotate) return 0; } -static u32 cnl_plane_ctl_flip(unsigned int reflect) +static u32 icl_plane_ctl_flip(unsigned int reflect) { switch (reflect) { case 0: @@ -917,8 +917,8 @@ static u32 skl_plane_ctl(const struct intel_crtc_state *crtc_state, plane_ctl |= skl_plane_ctl_tiling(fb->modifier); plane_ctl |= skl_plane_ctl_rotate(rotation & DRM_MODE_ROTATE_MASK); - if (DISPLAY_VER(dev_priv) >= 11 || IS_CANNONLAKE(dev_priv)) - plane_ctl |= cnl_plane_ctl_flip(rotation & + if (DISPLAY_VER(dev_priv) >= 11) + plane_ctl |= icl_plane_ctl_flip(rotation & DRM_MODE_REFLECT_MASK); if (key->flags & I915_SET_COLORKEY_DESTINATION) @@ -926,7 +926,7 @@ static u32 skl_plane_ctl(const struct intel_crtc_state *crtc_state, else if (key->flags & I915_SET_COLORKEY_SOURCE) plane_ctl |= PLANE_CTL_KEY_ENABLE_SOURCE; - /* Wa_22012358565:adlp */ + /* Wa_22012358565:adl-p */ if (DISPLAY_VER(dev_priv) == 13) plane_ctl |= adlp_plane_ctl_arb_slots(plane_state); @@ -1270,7 +1270,7 @@ static int skl_plane_check_dst_coordinates(const struct intel_crtc_state *crtc_s int pipe_src_w = crtc_state->pipe_src_w; /* - * Display WA #1175: cnl,glk + * Display WA #1175: glk * Planes other than the cursor may cause FIFO underflow and display * corruption if starting less than 4 pixels from the right edge of * the screen. @@ -1828,7 +1828,7 @@ static bool skl_plane_has_ccs(struct drm_i915_private *dev_priv, if (plane_id == PLANE_CURSOR) return false; - if (DISPLAY_VER(dev_priv) >= 11 || IS_CANNONLAKE(dev_priv)) + if (DISPLAY_VER(dev_priv) >= 11) return true; if (IS_GEMINILAKE(dev_priv)) @@ -1910,11 +1910,11 @@ static bool gen12_plane_supports_mc_ccs(struct drm_i915_private *dev_priv, { /* Wa_14010477008:tgl[a0..c0],rkl[all],dg1[all] */ if (IS_DG1(dev_priv) || IS_ROCKETLAKE(dev_priv) || - IS_TGL_DISPLAY_STEP(dev_priv, STEP_A0, STEP_C0)) + IS_TGL_DISPLAY_STEP(dev_priv, STEP_A0, STEP_D0)) return false; /* Wa_22011186057 */ - if (IS_ADLP_DISPLAY_STEP(dev_priv, STEP_A0, STEP_A0)) + if (IS_ADLP_DISPLAY_STEP(dev_priv, STEP_A0, STEP_B0)) return false; return plane_id < PLANE_SPRITE4; @@ -1938,7 +1938,7 @@ static bool gen12_plane_format_mod_supported(struct drm_plane *_plane, case I915_FORMAT_MOD_Y_TILED_GEN12_RC_CCS: case I915_FORMAT_MOD_Y_TILED_GEN12_RC_CCS_CC: /* Wa_22011186057 */ - if (IS_ADLP_DISPLAY_STEP(dev_priv, STEP_A0, STEP_A0)) + if (IS_ADLP_DISPLAY_STEP(dev_priv, STEP_A0, STEP_B0)) return false; break; default: @@ -1995,7 +1995,7 @@ static const u64 *gen12_get_plane_modifiers(struct drm_i915_private *dev_priv, enum plane_id plane_id) { /* Wa_22011186057 */ - if (IS_ADLP_DISPLAY_STEP(dev_priv, STEP_A0, STEP_A0)) + if (IS_ADLP_DISPLAY_STEP(dev_priv, STEP_A0, STEP_B0)) return adlp_step_a_plane_format_modifiers; else if (gen12_plane_supports_mc_ccs(dev_priv, plane_id)) return gen12_plane_format_modifiers_mc_ccs; @@ -2144,7 +2144,7 @@ skl_universal_plane_create(struct drm_i915_private *dev_priv, DRM_MODE_ROTATE_0 | DRM_MODE_ROTATE_90 | DRM_MODE_ROTATE_180 | DRM_MODE_ROTATE_270; - if (DISPLAY_VER(dev_priv) >= 11 || IS_CANNONLAKE(dev_priv)) + if (DISPLAY_VER(dev_priv) >= 11) supported_rotations |= DRM_MODE_REFLECT_X; drm_plane_create_rotation_property(&plane->base, @@ -2174,12 +2174,12 @@ skl_universal_plane_create(struct drm_i915_private *dev_priv, if (DISPLAY_VER(dev_priv) >= 12) drm_plane_enable_fb_damage_clips(&plane->base); - if (DISPLAY_VER(dev_priv) >= 11 || IS_CANNONLAKE(dev_priv)) + if (DISPLAY_VER(dev_priv) >= 11) drm_plane_create_scaling_filter_property(&plane->base, BIT(DRM_SCALING_FILTER_DEFAULT) | BIT(DRM_SCALING_FILTER_NEAREST_NEIGHBOR)); - drm_plane_helper_add(&plane->base, &intel_plane_helper_funcs); + intel_plane_helper_add(plane); return plane; @@ -2295,7 +2295,7 @@ skl_get_initial_plane_config(struct intel_crtc *crtc, break; } - if ((DISPLAY_VER(dev_priv) >= 11 || IS_CANNONLAKE(dev_priv)) && val & PLANE_CTL_FLIP_HORIZONTAL) + if (DISPLAY_VER(dev_priv) >= 11 && val & PLANE_CTL_FLIP_HORIZONTAL) plane_config->rotation |= DRM_MODE_REFLECT_X; /* 90/270 degree rotation would require extra work */ diff --git a/drivers/gpu/drm/i915/display/vlv_dsi.c b/drivers/gpu/drm/i915/display/vlv_dsi.c index 084c9c43b2ed..0ee4ff341e25 100644 --- a/drivers/gpu/drm/i915/display/vlv_dsi.c +++ b/drivers/gpu/drm/i915/display/vlv_dsi.c @@ -780,10 +780,9 @@ static void intel_dsi_pre_enable(struct intel_atomic_state *state, const struct drm_connector_state *conn_state) { struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder); - struct drm_crtc *crtc = pipe_config->uapi.crtc; - struct drm_i915_private *dev_priv = to_i915(crtc->dev); - struct intel_crtc *intel_crtc = to_intel_crtc(crtc); - enum pipe pipe = intel_crtc->pipe; + struct intel_crtc *crtc = to_intel_crtc(pipe_config->uapi.crtc); + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + enum pipe pipe = crtc->pipe; enum port port; u32 val; bool glk_cold_boot = false; @@ -1389,7 +1388,7 @@ static void intel_dsi_prepare(struct intel_encoder *intel_encoder, struct drm_encoder *encoder = &intel_encoder->base; struct drm_device *dev = encoder->dev; struct drm_i915_private *dev_priv = to_i915(dev); - struct intel_crtc *intel_crtc = to_intel_crtc(pipe_config->uapi.crtc); + struct intel_crtc *crtc = to_intel_crtc(pipe_config->uapi.crtc); struct intel_dsi *intel_dsi = enc_to_intel_dsi(to_intel_encoder(encoder)); const struct drm_display_mode *adjusted_mode = &pipe_config->hw.adjusted_mode; enum port port; @@ -1397,7 +1396,7 @@ static void intel_dsi_prepare(struct intel_encoder *intel_encoder, u32 val, tmp; u16 mode_hdisplay; - drm_dbg_kms(&dev_priv->drm, "pipe %c\n", pipe_name(intel_crtc->pipe)); + drm_dbg_kms(&dev_priv->drm, "pipe %c\n", pipe_name(crtc->pipe)); mode_hdisplay = adjusted_mode->crtc_hdisplay; @@ -1424,7 +1423,7 @@ static void intel_dsi_prepare(struct intel_encoder *intel_encoder, intel_de_write(dev_priv, MIPI_CTRL(port), tmp | READ_REQUEST_PRIORITY_HIGH); } else if (IS_GEMINILAKE(dev_priv) || IS_BROXTON(dev_priv)) { - enum pipe pipe = intel_crtc->pipe; + enum pipe pipe = crtc->pipe; tmp = intel_de_read(dev_priv, MIPI_CTRL(port)); tmp &= ~BXT_PIPE_SELECT_MASK; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_clflush.c b/drivers/gpu/drm/i915/gem/i915_gem_clflush.c index daf9284ef1f5..f0435c6feb68 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_clflush.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_clflush.c @@ -24,13 +24,11 @@ static void __do_clflush(struct drm_i915_gem_object *obj) i915_gem_object_flush_frontbuffer(obj, ORIGIN_CPU); } -static int clflush_work(struct dma_fence_work *base) +static void clflush_work(struct dma_fence_work *base) { struct clflush *clflush = container_of(base, typeof(*clflush), base); __do_clflush(clflush->obj); - - return 0; } static void clflush_release(struct dma_fence_work *base) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c b/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c deleted file mode 100644 index 44821d94544f..000000000000 --- a/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c +++ /dev/null @@ -1,355 +0,0 @@ -// SPDX-License-Identifier: MIT -/* - * Copyright © 2019 Intel Corporation - */ - -#include "i915_drv.h" -#include "gt/intel_context.h" -#include "gt/intel_engine_pm.h" -#include "i915_gem_client_blt.h" -#include "i915_gem_object_blt.h" - -struct i915_sleeve { - struct i915_vma *vma; - struct drm_i915_gem_object *obj; - struct sg_table *pages; - struct i915_page_sizes page_sizes; -}; - -static int vma_set_pages(struct i915_vma *vma) -{ - struct i915_sleeve *sleeve = vma->private; - - vma->pages = sleeve->pages; - vma->page_sizes = sleeve->page_sizes; - - return 0; -} - -static void vma_clear_pages(struct i915_vma *vma) -{ - GEM_BUG_ON(!vma->pages); - vma->pages = NULL; -} - -static void vma_bind(struct i915_address_space *vm, - struct i915_vm_pt_stash *stash, - struct i915_vma *vma, - enum i915_cache_level cache_level, - u32 flags) -{ - vm->vma_ops.bind_vma(vm, stash, vma, cache_level, flags); -} - -static void vma_unbind(struct i915_address_space *vm, struct i915_vma *vma) -{ - vm->vma_ops.unbind_vma(vm, vma); -} - -static const struct i915_vma_ops proxy_vma_ops = { - .set_pages = vma_set_pages, - .clear_pages = vma_clear_pages, - .bind_vma = vma_bind, - .unbind_vma = vma_unbind, -}; - -static struct i915_sleeve *create_sleeve(struct i915_address_space *vm, - struct drm_i915_gem_object *obj, - struct sg_table *pages, - struct i915_page_sizes *page_sizes) -{ - struct i915_sleeve *sleeve; - struct i915_vma *vma; - int err; - - sleeve = kzalloc(sizeof(*sleeve), GFP_KERNEL); - if (!sleeve) - return ERR_PTR(-ENOMEM); - - vma = i915_vma_instance(obj, vm, NULL); - if (IS_ERR(vma)) { - err = PTR_ERR(vma); - goto err_free; - } - - vma->private = sleeve; - vma->ops = &proxy_vma_ops; - - sleeve->vma = vma; - sleeve->pages = pages; - sleeve->page_sizes = *page_sizes; - - return sleeve; - -err_free: - kfree(sleeve); - return ERR_PTR(err); -} - -static void destroy_sleeve(struct i915_sleeve *sleeve) -{ - kfree(sleeve); -} - -struct clear_pages_work { - struct dma_fence dma; - struct dma_fence_cb cb; - struct i915_sw_fence wait; - struct work_struct work; - struct irq_work irq_work; - struct i915_sleeve *sleeve; - struct intel_context *ce; - u32 value; -}; - -static const char *clear_pages_work_driver_name(struct dma_fence *fence) -{ - return DRIVER_NAME; -} - -static const char *clear_pages_work_timeline_name(struct dma_fence *fence) -{ - return "clear"; -} - -static void clear_pages_work_release(struct dma_fence *fence) -{ - struct clear_pages_work *w = container_of(fence, typeof(*w), dma); - - destroy_sleeve(w->sleeve); - - i915_sw_fence_fini(&w->wait); - - BUILD_BUG_ON(offsetof(typeof(*w), dma)); - dma_fence_free(&w->dma); -} - -static const struct dma_fence_ops clear_pages_work_ops = { - .get_driver_name = clear_pages_work_driver_name, - .get_timeline_name = clear_pages_work_timeline_name, - .release = clear_pages_work_release, -}; - -static void clear_pages_signal_irq_worker(struct irq_work *work) -{ - struct clear_pages_work *w = container_of(work, typeof(*w), irq_work); - - dma_fence_signal(&w->dma); - dma_fence_put(&w->dma); -} - -static void clear_pages_dma_fence_cb(struct dma_fence *fence, - struct dma_fence_cb *cb) -{ - struct clear_pages_work *w = container_of(cb, typeof(*w), cb); - - if (fence->error) - dma_fence_set_error(&w->dma, fence->error); - - /* - * Push the signalling of the fence into yet another worker to avoid - * the nightmare locking around the fence spinlock. - */ - irq_work_queue(&w->irq_work); -} - -static void clear_pages_worker(struct work_struct *work) -{ - struct clear_pages_work *w = container_of(work, typeof(*w), work); - struct drm_i915_gem_object *obj = w->sleeve->vma->obj; - struct i915_vma *vma = w->sleeve->vma; - struct i915_gem_ww_ctx ww; - struct i915_request *rq; - struct i915_vma *batch; - int err = w->dma.error; - - if (unlikely(err)) - goto out_signal; - - if (obj->cache_dirty) { - if (i915_gem_object_has_struct_page(obj)) - drm_clflush_sg(w->sleeve->pages); - obj->cache_dirty = false; - } - obj->read_domains = I915_GEM_GPU_DOMAINS; - obj->write_domain = 0; - - i915_gem_ww_ctx_init(&ww, false); - intel_engine_pm_get(w->ce->engine); -retry: - err = intel_context_pin_ww(w->ce, &ww); - if (err) - goto out_signal; - - batch = intel_emit_vma_fill_blt(w->ce, vma, &ww, w->value); - if (IS_ERR(batch)) { - err = PTR_ERR(batch); - goto out_ctx; - } - - rq = i915_request_create(w->ce); - if (IS_ERR(rq)) { - err = PTR_ERR(rq); - goto out_batch; - } - - /* There's no way the fence has signalled */ - if (dma_fence_add_callback(&rq->fence, &w->cb, - clear_pages_dma_fence_cb)) - GEM_BUG_ON(1); - - err = intel_emit_vma_mark_active(batch, rq); - if (unlikely(err)) - goto out_request; - - /* - * w->dma is already exported via (vma|obj)->resv we need only - * keep track of the GPU activity within this vma/request, and - * propagate the signal from the request to w->dma. - */ - err = __i915_vma_move_to_active(vma, rq); - if (err) - goto out_request; - - if (rq->engine->emit_init_breadcrumb) { - err = rq->engine->emit_init_breadcrumb(rq); - if (unlikely(err)) - goto out_request; - } - - err = rq->engine->emit_bb_start(rq, - batch->node.start, batch->node.size, - 0); -out_request: - if (unlikely(err)) { - i915_request_set_error_once(rq, err); - err = 0; - } - - i915_request_add(rq); -out_batch: - intel_emit_vma_release(w->ce, batch); -out_ctx: - intel_context_unpin(w->ce); -out_signal: - if (err == -EDEADLK) { - err = i915_gem_ww_ctx_backoff(&ww); - if (!err) - goto retry; - } - i915_gem_ww_ctx_fini(&ww); - - i915_vma_unpin(w->sleeve->vma); - intel_engine_pm_put(w->ce->engine); - - if (unlikely(err)) { - dma_fence_set_error(&w->dma, err); - dma_fence_signal(&w->dma); - dma_fence_put(&w->dma); - } -} - -static int pin_wait_clear_pages_work(struct clear_pages_work *w, - struct intel_context *ce) -{ - struct i915_vma *vma = w->sleeve->vma; - struct i915_gem_ww_ctx ww; - int err; - - i915_gem_ww_ctx_init(&ww, false); -retry: - err = i915_gem_object_lock(vma->obj, &ww); - if (err) - goto out; - - err = i915_vma_pin_ww(vma, &ww, 0, 0, PIN_USER); - if (unlikely(err)) - goto out; - - err = i915_sw_fence_await_reservation(&w->wait, - vma->obj->base.resv, NULL, - true, 0, I915_FENCE_GFP); - if (err) - goto err_unpin_vma; - - dma_resv_add_excl_fence(vma->obj->base.resv, &w->dma); - -err_unpin_vma: - if (err) - i915_vma_unpin(vma); -out: - if (err == -EDEADLK) { - err = i915_gem_ww_ctx_backoff(&ww); - if (!err) - goto retry; - } - i915_gem_ww_ctx_fini(&ww); - return err; -} - -static int __i915_sw_fence_call -clear_pages_work_notify(struct i915_sw_fence *fence, - enum i915_sw_fence_notify state) -{ - struct clear_pages_work *w = container_of(fence, typeof(*w), wait); - - switch (state) { - case FENCE_COMPLETE: - schedule_work(&w->work); - break; - - case FENCE_FREE: - dma_fence_put(&w->dma); - break; - } - - return NOTIFY_DONE; -} - -static DEFINE_SPINLOCK(fence_lock); - -/* XXX: better name please */ -int i915_gem_schedule_fill_pages_blt(struct drm_i915_gem_object *obj, - struct intel_context *ce, - struct sg_table *pages, - struct i915_page_sizes *page_sizes, - u32 value) -{ - struct clear_pages_work *work; - struct i915_sleeve *sleeve; - int err; - - sleeve = create_sleeve(ce->vm, obj, pages, page_sizes); - if (IS_ERR(sleeve)) - return PTR_ERR(sleeve); - - work = kmalloc(sizeof(*work), GFP_KERNEL); - if (!work) { - destroy_sleeve(sleeve); - return -ENOMEM; - } - - work->value = value; - work->sleeve = sleeve; - work->ce = ce; - - INIT_WORK(&work->work, clear_pages_worker); - - init_irq_work(&work->irq_work, clear_pages_signal_irq_worker); - - dma_fence_init(&work->dma, &clear_pages_work_ops, &fence_lock, 0, 0); - i915_sw_fence_init(&work->wait, clear_pages_work_notify); - - err = pin_wait_clear_pages_work(work, ce); - if (err < 0) - dma_fence_set_error(&work->dma, err); - - dma_fence_get(&work->dma); - i915_sw_fence_commit(&work->wait); - - return err; -} - -#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) -#include "selftests/i915_gem_client_blt.c" -#endif diff --git a/drivers/gpu/drm/i915/gem/i915_gem_client_blt.h b/drivers/gpu/drm/i915/gem/i915_gem_client_blt.h deleted file mode 100644 index 3dbd28c22ff5..000000000000 --- a/drivers/gpu/drm/i915/gem/i915_gem_client_blt.h +++ /dev/null @@ -1,21 +0,0 @@ -/* SPDX-License-Identifier: MIT */ -/* - * Copyright © 2019 Intel Corporation - */ -#ifndef __I915_GEM_CLIENT_BLT_H__ -#define __I915_GEM_CLIENT_BLT_H__ - -#include <linux/types.h> - -struct drm_i915_gem_object; -struct i915_page_sizes; -struct intel_context; -struct sg_table; - -int i915_gem_schedule_fill_pages_blt(struct drm_i915_gem_object *obj, - struct intel_context *ce, - struct sg_table *pages, - struct i915_page_sizes *page_sizes, - u32 value); - -#endif diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index 7720b8c22c81..cff72679ad7c 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -67,35 +67,32 @@ #include <linux/log2.h> #include <linux/nospec.h> +#include <drm/drm_syncobj.h> + #include "gt/gen6_ppgtt.h" #include "gt/intel_context.h" #include "gt/intel_context_param.h" #include "gt/intel_engine_heartbeat.h" #include "gt/intel_engine_user.h" -#include "gt/intel_execlists_submission.h" /* virtual_engine */ #include "gt/intel_gpu_commands.h" #include "gt/intel_ring.h" #include "i915_gem_context.h" -#include "i915_globals.h" #include "i915_trace.h" #include "i915_user_extensions.h" #define ALL_L3_SLICES(dev) (1 << NUM_L3_SLICES(dev)) - 1 -static struct i915_global_gem_context { - struct i915_global base; - struct kmem_cache *slab_luts; -} global; +static struct kmem_cache *slab_luts; struct i915_lut_handle *i915_lut_handle_alloc(void) { - return kmem_cache_alloc(global.slab_luts, GFP_KERNEL); + return kmem_cache_alloc(slab_luts, GFP_KERNEL); } void i915_lut_handle_free(struct i915_lut_handle *lut) { - return kmem_cache_free(global.slab_luts, lut); + return kmem_cache_free(slab_luts, lut); } static void lut_close(struct i915_gem_context *ctx) @@ -167,6 +164,577 @@ lookup_user_engine(struct i915_gem_context *ctx, return i915_gem_context_get_engine(ctx, idx); } +static int validate_priority(struct drm_i915_private *i915, + const struct drm_i915_gem_context_param *args) +{ + s64 priority = args->value; + + if (args->size) + return -EINVAL; + + if (!(i915->caps.scheduler & I915_SCHEDULER_CAP_PRIORITY)) + return -ENODEV; + + if (priority > I915_CONTEXT_MAX_USER_PRIORITY || + priority < I915_CONTEXT_MIN_USER_PRIORITY) + return -EINVAL; + + if (priority > I915_CONTEXT_DEFAULT_PRIORITY && + !capable(CAP_SYS_NICE)) + return -EPERM; + + return 0; +} + +static void proto_context_close(struct i915_gem_proto_context *pc) +{ + int i; + + if (pc->vm) + i915_vm_put(pc->vm); + if (pc->user_engines) { + for (i = 0; i < pc->num_user_engines; i++) + kfree(pc->user_engines[i].siblings); + kfree(pc->user_engines); + } + kfree(pc); +} + +static int proto_context_set_persistence(struct drm_i915_private *i915, + struct i915_gem_proto_context *pc, + bool persist) +{ + if (persist) { + /* + * Only contexts that are short-lived [that will expire or be + * reset] are allowed to survive past termination. We require + * hangcheck to ensure that the persistent requests are healthy. + */ + if (!i915->params.enable_hangcheck) + return -EINVAL; + + pc->user_flags |= BIT(UCONTEXT_PERSISTENCE); + } else { + /* To cancel a context we use "preempt-to-idle" */ + if (!(i915->caps.scheduler & I915_SCHEDULER_CAP_PREEMPTION)) + return -ENODEV; + + /* + * If the cancel fails, we then need to reset, cleanly! + * + * If the per-engine reset fails, all hope is lost! We resort + * to a full GPU reset in that unlikely case, but realistically + * if the engine could not reset, the full reset does not fare + * much better. The damage has been done. + * + * However, if we cannot reset an engine by itself, we cannot + * cleanup a hanging persistent context without causing + * colateral damage, and we should not pretend we can by + * exposing the interface. + */ + if (!intel_has_reset_engine(&i915->gt)) + return -ENODEV; + + pc->user_flags &= ~BIT(UCONTEXT_PERSISTENCE); + } + + return 0; +} + +static struct i915_gem_proto_context * +proto_context_create(struct drm_i915_private *i915, unsigned int flags) +{ + struct i915_gem_proto_context *pc, *err; + + pc = kzalloc(sizeof(*pc), GFP_KERNEL); + if (!pc) + return ERR_PTR(-ENOMEM); + + pc->num_user_engines = -1; + pc->user_engines = NULL; + pc->user_flags = BIT(UCONTEXT_BANNABLE) | + BIT(UCONTEXT_RECOVERABLE); + if (i915->params.enable_hangcheck) + pc->user_flags |= BIT(UCONTEXT_PERSISTENCE); + pc->sched.priority = I915_PRIORITY_NORMAL; + + if (flags & I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE) { + if (!HAS_EXECLISTS(i915)) { + err = ERR_PTR(-EINVAL); + goto proto_close; + } + pc->single_timeline = true; + } + + return pc; + +proto_close: + proto_context_close(pc); + return err; +} + +static int proto_context_register_locked(struct drm_i915_file_private *fpriv, + struct i915_gem_proto_context *pc, + u32 *id) +{ + int ret; + void *old; + + lockdep_assert_held(&fpriv->proto_context_lock); + + ret = xa_alloc(&fpriv->context_xa, id, NULL, xa_limit_32b, GFP_KERNEL); + if (ret) + return ret; + + old = xa_store(&fpriv->proto_context_xa, *id, pc, GFP_KERNEL); + if (xa_is_err(old)) { + xa_erase(&fpriv->context_xa, *id); + return xa_err(old); + } + WARN_ON(old); + + return 0; +} + +static int proto_context_register(struct drm_i915_file_private *fpriv, + struct i915_gem_proto_context *pc, + u32 *id) +{ + int ret; + + mutex_lock(&fpriv->proto_context_lock); + ret = proto_context_register_locked(fpriv, pc, id); + mutex_unlock(&fpriv->proto_context_lock); + + return ret; +} + +static int set_proto_ctx_vm(struct drm_i915_file_private *fpriv, + struct i915_gem_proto_context *pc, + const struct drm_i915_gem_context_param *args) +{ + struct drm_i915_private *i915 = fpriv->dev_priv; + struct i915_address_space *vm; + + if (args->size) + return -EINVAL; + + if (!HAS_FULL_PPGTT(i915)) + return -ENODEV; + + if (upper_32_bits(args->value)) + return -ENOENT; + + vm = i915_gem_vm_lookup(fpriv, args->value); + if (!vm) + return -ENOENT; + + if (pc->vm) + i915_vm_put(pc->vm); + pc->vm = vm; + + return 0; +} + +struct set_proto_ctx_engines { + struct drm_i915_private *i915; + unsigned num_engines; + struct i915_gem_proto_engine *engines; +}; + +static int +set_proto_ctx_engines_balance(struct i915_user_extension __user *base, + void *data) +{ + struct i915_context_engines_load_balance __user *ext = + container_of_user(base, typeof(*ext), base); + const struct set_proto_ctx_engines *set = data; + struct drm_i915_private *i915 = set->i915; + struct intel_engine_cs **siblings; + u16 num_siblings, idx; + unsigned int n; + int err; + + if (!HAS_EXECLISTS(i915)) + return -ENODEV; + + if (get_user(idx, &ext->engine_index)) + return -EFAULT; + + if (idx >= set->num_engines) { + drm_dbg(&i915->drm, "Invalid placement value, %d >= %d\n", + idx, set->num_engines); + return -EINVAL; + } + + idx = array_index_nospec(idx, set->num_engines); + if (set->engines[idx].type != I915_GEM_ENGINE_TYPE_INVALID) { + drm_dbg(&i915->drm, + "Invalid placement[%d], already occupied\n", idx); + return -EEXIST; + } + + if (get_user(num_siblings, &ext->num_siblings)) + return -EFAULT; + + err = check_user_mbz(&ext->flags); + if (err) + return err; + + err = check_user_mbz(&ext->mbz64); + if (err) + return err; + + if (num_siblings == 0) + return 0; + + siblings = kmalloc_array(num_siblings, sizeof(*siblings), GFP_KERNEL); + if (!siblings) + return -ENOMEM; + + for (n = 0; n < num_siblings; n++) { + struct i915_engine_class_instance ci; + + if (copy_from_user(&ci, &ext->engines[n], sizeof(ci))) { + err = -EFAULT; + goto err_siblings; + } + + siblings[n] = intel_engine_lookup_user(i915, + ci.engine_class, + ci.engine_instance); + if (!siblings[n]) { + drm_dbg(&i915->drm, + "Invalid sibling[%d]: { class:%d, inst:%d }\n", + n, ci.engine_class, ci.engine_instance); + err = -EINVAL; + goto err_siblings; + } + } + + if (num_siblings == 1) { + set->engines[idx].type = I915_GEM_ENGINE_TYPE_PHYSICAL; + set->engines[idx].engine = siblings[0]; + kfree(siblings); + } else { + set->engines[idx].type = I915_GEM_ENGINE_TYPE_BALANCED; + set->engines[idx].num_siblings = num_siblings; + set->engines[idx].siblings = siblings; + } + + return 0; + +err_siblings: + kfree(siblings); + + return err; +} + +static int +set_proto_ctx_engines_bond(struct i915_user_extension __user *base, void *data) +{ + struct i915_context_engines_bond __user *ext = + container_of_user(base, typeof(*ext), base); + const struct set_proto_ctx_engines *set = data; + struct drm_i915_private *i915 = set->i915; + struct i915_engine_class_instance ci; + struct intel_engine_cs *master; + u16 idx, num_bonds; + int err, n; + + if (get_user(idx, &ext->virtual_index)) + return -EFAULT; + + if (idx >= set->num_engines) { + drm_dbg(&i915->drm, + "Invalid index for virtual engine: %d >= %d\n", + idx, set->num_engines); + return -EINVAL; + } + + idx = array_index_nospec(idx, set->num_engines); + if (set->engines[idx].type == I915_GEM_ENGINE_TYPE_INVALID) { + drm_dbg(&i915->drm, "Invalid engine at %d\n", idx); + return -EINVAL; + } + + if (set->engines[idx].type != I915_GEM_ENGINE_TYPE_PHYSICAL) { + drm_dbg(&i915->drm, + "Bonding with virtual engines not allowed\n"); + return -EINVAL; + } + + err = check_user_mbz(&ext->flags); + if (err) + return err; + + for (n = 0; n < ARRAY_SIZE(ext->mbz64); n++) { + err = check_user_mbz(&ext->mbz64[n]); + if (err) + return err; + } + + if (copy_from_user(&ci, &ext->master, sizeof(ci))) + return -EFAULT; + + master = intel_engine_lookup_user(i915, + ci.engine_class, + ci.engine_instance); + if (!master) { + drm_dbg(&i915->drm, + "Unrecognised master engine: { class:%u, instance:%u }\n", + ci.engine_class, ci.engine_instance); + return -EINVAL; + } + + if (intel_engine_uses_guc(master)) { + DRM_DEBUG("bonding extension not supported with GuC submission"); + return -ENODEV; + } + + if (get_user(num_bonds, &ext->num_bonds)) + return -EFAULT; + + for (n = 0; n < num_bonds; n++) { + struct intel_engine_cs *bond; + + if (copy_from_user(&ci, &ext->engines[n], sizeof(ci))) + return -EFAULT; + + bond = intel_engine_lookup_user(i915, + ci.engine_class, + ci.engine_instance); + if (!bond) { + drm_dbg(&i915->drm, + "Unrecognised engine[%d] for bonding: { class:%d, instance: %d }\n", + n, ci.engine_class, ci.engine_instance); + return -EINVAL; + } + } + + return 0; +} + +static const i915_user_extension_fn set_proto_ctx_engines_extensions[] = { + [I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE] = set_proto_ctx_engines_balance, + [I915_CONTEXT_ENGINES_EXT_BOND] = set_proto_ctx_engines_bond, +}; + +static int set_proto_ctx_engines(struct drm_i915_file_private *fpriv, + struct i915_gem_proto_context *pc, + const struct drm_i915_gem_context_param *args) +{ + struct drm_i915_private *i915 = fpriv->dev_priv; + struct set_proto_ctx_engines set = { .i915 = i915 }; + struct i915_context_param_engines __user *user = + u64_to_user_ptr(args->value); + unsigned int n; + u64 extensions; + int err; + + if (pc->num_user_engines >= 0) { + drm_dbg(&i915->drm, "Cannot set engines twice"); + return -EINVAL; + } + + if (args->size < sizeof(*user) || + !IS_ALIGNED(args->size - sizeof(*user), sizeof(*user->engines))) { + drm_dbg(&i915->drm, "Invalid size for engine array: %d\n", + args->size); + return -EINVAL; + } + + set.num_engines = (args->size - sizeof(*user)) / sizeof(*user->engines); + /* RING_MASK has no shift so we can use it directly here */ + if (set.num_engines > I915_EXEC_RING_MASK + 1) + return -EINVAL; + + set.engines = kmalloc_array(set.num_engines, sizeof(*set.engines), GFP_KERNEL); + if (!set.engines) + return -ENOMEM; + + for (n = 0; n < set.num_engines; n++) { + struct i915_engine_class_instance ci; + struct intel_engine_cs *engine; + + if (copy_from_user(&ci, &user->engines[n], sizeof(ci))) { + kfree(set.engines); + return -EFAULT; + } + + memset(&set.engines[n], 0, sizeof(set.engines[n])); + + if (ci.engine_class == (u16)I915_ENGINE_CLASS_INVALID && + ci.engine_instance == (u16)I915_ENGINE_CLASS_INVALID_NONE) + continue; + + engine = intel_engine_lookup_user(i915, + ci.engine_class, + ci.engine_instance); + if (!engine) { + drm_dbg(&i915->drm, + "Invalid engine[%d]: { class:%d, instance:%d }\n", + n, ci.engine_class, ci.engine_instance); + kfree(set.engines); + return -ENOENT; + } + + set.engines[n].type = I915_GEM_ENGINE_TYPE_PHYSICAL; + set.engines[n].engine = engine; + } + + err = -EFAULT; + if (!get_user(extensions, &user->extensions)) + err = i915_user_extensions(u64_to_user_ptr(extensions), + set_proto_ctx_engines_extensions, + ARRAY_SIZE(set_proto_ctx_engines_extensions), + &set); + if (err) { + kfree(set.engines); + return err; + } + + pc->num_user_engines = set.num_engines; + pc->user_engines = set.engines; + + return 0; +} + +static int set_proto_ctx_sseu(struct drm_i915_file_private *fpriv, + struct i915_gem_proto_context *pc, + struct drm_i915_gem_context_param *args) +{ + struct drm_i915_private *i915 = fpriv->dev_priv; + struct drm_i915_gem_context_param_sseu user_sseu; + struct intel_sseu *sseu; + int ret; + + if (args->size < sizeof(user_sseu)) + return -EINVAL; + + if (GRAPHICS_VER(i915) != 11) + return -ENODEV; + + if (copy_from_user(&user_sseu, u64_to_user_ptr(args->value), + sizeof(user_sseu))) + return -EFAULT; + + if (user_sseu.rsvd) + return -EINVAL; + + if (user_sseu.flags & ~(I915_CONTEXT_SSEU_FLAG_ENGINE_INDEX)) + return -EINVAL; + + if (!!(user_sseu.flags & I915_CONTEXT_SSEU_FLAG_ENGINE_INDEX) != (pc->num_user_engines >= 0)) + return -EINVAL; + + if (pc->num_user_engines >= 0) { + int idx = user_sseu.engine.engine_instance; + struct i915_gem_proto_engine *pe; + + if (idx >= pc->num_user_engines) + return -EINVAL; + + pe = &pc->user_engines[idx]; + + /* Only render engine supports RPCS configuration. */ + if (pe->engine->class != RENDER_CLASS) + return -EINVAL; + + sseu = &pe->sseu; + } else { + /* Only render engine supports RPCS configuration. */ + if (user_sseu.engine.engine_class != I915_ENGINE_CLASS_RENDER) + return -EINVAL; + + /* There is only one render engine */ + if (user_sseu.engine.engine_instance != 0) + return -EINVAL; + + sseu = &pc->legacy_rcs_sseu; + } + + ret = i915_gem_user_to_context_sseu(&i915->gt, &user_sseu, sseu); + if (ret) + return ret; + + args->size = sizeof(user_sseu); + + return 0; +} + +static int set_proto_ctx_param(struct drm_i915_file_private *fpriv, + struct i915_gem_proto_context *pc, + struct drm_i915_gem_context_param *args) +{ + int ret = 0; + + switch (args->param) { + case I915_CONTEXT_PARAM_NO_ERROR_CAPTURE: + if (args->size) + ret = -EINVAL; + else if (args->value) + pc->user_flags |= BIT(UCONTEXT_NO_ERROR_CAPTURE); + else + pc->user_flags &= ~BIT(UCONTEXT_NO_ERROR_CAPTURE); + break; + + case I915_CONTEXT_PARAM_BANNABLE: + if (args->size) + ret = -EINVAL; + else if (!capable(CAP_SYS_ADMIN) && !args->value) + ret = -EPERM; + else if (args->value) + pc->user_flags |= BIT(UCONTEXT_BANNABLE); + else + pc->user_flags &= ~BIT(UCONTEXT_BANNABLE); + break; + + case I915_CONTEXT_PARAM_RECOVERABLE: + if (args->size) + ret = -EINVAL; + else if (args->value) + pc->user_flags |= BIT(UCONTEXT_RECOVERABLE); + else + pc->user_flags &= ~BIT(UCONTEXT_RECOVERABLE); + break; + + case I915_CONTEXT_PARAM_PRIORITY: + ret = validate_priority(fpriv->dev_priv, args); + if (!ret) + pc->sched.priority = args->value; + break; + + case I915_CONTEXT_PARAM_SSEU: + ret = set_proto_ctx_sseu(fpriv, pc, args); + break; + + case I915_CONTEXT_PARAM_VM: + ret = set_proto_ctx_vm(fpriv, pc, args); + break; + + case I915_CONTEXT_PARAM_ENGINES: + ret = set_proto_ctx_engines(fpriv, pc, args); + break; + + case I915_CONTEXT_PARAM_PERSISTENCE: + if (args->size) + ret = -EINVAL; + ret = proto_context_set_persistence(fpriv->dev_priv, pc, + args->value); + break; + + case I915_CONTEXT_PARAM_NO_ZEROMAP: + case I915_CONTEXT_PARAM_BAN_PERIOD: + case I915_CONTEXT_PARAM_RINGSIZE: + default: + ret = -EINVAL; + break; + } + + return ret; +} + static struct i915_address_space * context_get_vm_rcu(struct i915_gem_context *ctx) { @@ -205,14 +773,16 @@ context_get_vm_rcu(struct i915_gem_context *ctx) } while (1); } -static void intel_context_set_gem(struct intel_context *ce, - struct i915_gem_context *ctx) +static int intel_context_set_gem(struct intel_context *ce, + struct i915_gem_context *ctx, + struct intel_sseu sseu) { + int ret = 0; + GEM_BUG_ON(rcu_access_pointer(ce->gem_context)); RCU_INIT_POINTER(ce->gem_context, ctx); - if (!test_bit(CONTEXT_ALLOC_BIT, &ce->flags)) - ce->ring = __intel_context_ring_size(SZ_16K); + ce->ring_size = SZ_16K; if (rcu_access_pointer(ctx->vm)) { struct i915_address_space *vm; @@ -225,15 +795,23 @@ static void intel_context_set_gem(struct intel_context *ce, ce->vm = vm; } - GEM_BUG_ON(ce->timeline); - if (ctx->timeline) - ce->timeline = intel_timeline_get(ctx->timeline); - if (ctx->sched.priority >= I915_PRIORITY_NORMAL && - intel_engine_has_timeslices(ce->engine)) + intel_engine_has_timeslices(ce->engine) && + intel_engine_has_semaphores(ce->engine)) __set_bit(CONTEXT_USE_SEMAPHORES, &ce->flags); - intel_context_set_watchdog_us(ce, ctx->watchdog.timeout_us); + if (IS_ACTIVE(CONFIG_DRM_I915_REQUEST_TIMEOUT) && + ctx->i915->params.request_timeout_ms) { + unsigned int timeout_ms = ctx->i915->params.request_timeout_ms; + + intel_context_set_watchdog_us(ce, (u64)timeout_ms * 1000); + } + + /* A valid SSEU has no zero fields */ + if (sseu.slice_mask && !WARN_ON(ce->engine->class != RENDER_CLASS)) + ret = intel_context_reconfigure_sseu(ce, sseu); + + return ret; } static void __free_engines(struct i915_gem_engines *e, unsigned int count) @@ -301,11 +879,12 @@ static struct i915_gem_engines *alloc_engines(unsigned int count) return e; } -static struct i915_gem_engines *default_engines(struct i915_gem_context *ctx) +static struct i915_gem_engines *default_engines(struct i915_gem_context *ctx, + struct intel_sseu rcs_sseu) { const struct intel_gt *gt = &ctx->i915->gt; struct intel_engine_cs *engine; - struct i915_gem_engines *e; + struct i915_gem_engines *e, *err; enum intel_engine_id id; e = alloc_engines(I915_NUM_ENGINES); @@ -314,6 +893,8 @@ static struct i915_gem_engines *default_engines(struct i915_gem_context *ctx) for_each_engine(engine, gt, id) { struct intel_context *ce; + struct intel_sseu sseu = {}; + int ret; if (engine->legacy_idx == INVALID_ENGINE) continue; @@ -323,18 +904,79 @@ static struct i915_gem_engines *default_engines(struct i915_gem_context *ctx) ce = intel_context_create(engine); if (IS_ERR(ce)) { - __free_engines(e, e->num_engines + 1); - return ERR_CAST(ce); + err = ERR_CAST(ce); + goto free_engines; } - intel_context_set_gem(ce, ctx); - e->engines[engine->legacy_idx] = ce; - e->num_engines = max(e->num_engines, engine->legacy_idx); + e->num_engines = max(e->num_engines, engine->legacy_idx + 1); + + if (engine->class == RENDER_CLASS) + sseu = rcs_sseu; + + ret = intel_context_set_gem(ce, ctx, sseu); + if (ret) { + err = ERR_PTR(ret); + goto free_engines; + } + + } + + return e; + +free_engines: + free_engines(e); + return err; +} + +static struct i915_gem_engines *user_engines(struct i915_gem_context *ctx, + unsigned int num_engines, + struct i915_gem_proto_engine *pe) +{ + struct i915_gem_engines *e, *err; + unsigned int n; + + e = alloc_engines(num_engines); + for (n = 0; n < num_engines; n++) { + struct intel_context *ce; + int ret; + + switch (pe[n].type) { + case I915_GEM_ENGINE_TYPE_PHYSICAL: + ce = intel_context_create(pe[n].engine); + break; + + case I915_GEM_ENGINE_TYPE_BALANCED: + ce = intel_engine_create_virtual(pe[n].siblings, + pe[n].num_siblings); + break; + + case I915_GEM_ENGINE_TYPE_INVALID: + default: + GEM_WARN_ON(pe[n].type != I915_GEM_ENGINE_TYPE_INVALID); + continue; + } + + if (IS_ERR(ce)) { + err = ERR_CAST(ce); + goto free_engines; + } + + e->engines[n] = ce; + + ret = intel_context_set_gem(ce, ctx, pe->sseu); + if (ret) { + err = ERR_PTR(ret); + goto free_engines; + } } - e->num_engines++; + e->num_engines = num_engines; return e; + +free_engines: + free_engines(e); + return err; } void i915_gem_context_release(struct kref *ref) @@ -347,9 +989,6 @@ void i915_gem_context_release(struct kref *ref) mutex_destroy(&ctx->engines_mutex); mutex_destroy(&ctx->lut_mutex); - if (ctx->timeline) - intel_timeline_put(ctx->timeline); - put_pid(ctx->pid); mutex_destroy(&ctx->mutex); @@ -441,7 +1080,7 @@ static void kill_engines(struct i915_gem_engines *engines, bool ban) for_each_gem_engine(ce, engines, it) { struct intel_engine_cs *engine; - if (ban && intel_context_set_banned(ce)) + if (ban && intel_context_ban(ce, NULL)) continue; /* @@ -566,6 +1205,9 @@ static void context_close(struct i915_gem_context *ctx) if (vm) i915_vm_close(vm); + if (ctx->syncobj) + drm_syncobj_put(ctx->syncobj); + ctx->file_priv = ERR_PTR(-EBADF); /* @@ -635,57 +1277,6 @@ static int __context_set_persistence(struct i915_gem_context *ctx, bool state) return 0; } -static struct i915_gem_context * -__create_context(struct drm_i915_private *i915) -{ - struct i915_gem_context *ctx; - struct i915_gem_engines *e; - int err; - int i; - - ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); - if (!ctx) - return ERR_PTR(-ENOMEM); - - kref_init(&ctx->ref); - ctx->i915 = i915; - ctx->sched.priority = I915_PRIORITY_NORMAL; - mutex_init(&ctx->mutex); - INIT_LIST_HEAD(&ctx->link); - - spin_lock_init(&ctx->stale.lock); - INIT_LIST_HEAD(&ctx->stale.engines); - - mutex_init(&ctx->engines_mutex); - e = default_engines(ctx); - if (IS_ERR(e)) { - err = PTR_ERR(e); - goto err_free; - } - RCU_INIT_POINTER(ctx->engines, e); - - INIT_RADIX_TREE(&ctx->handles_vma, GFP_KERNEL); - mutex_init(&ctx->lut_mutex); - - /* NB: Mark all slices as needing a remap so that when the context first - * loads it will restore whatever remap state already exists. If there - * is no remap info, it will be a NOP. */ - ctx->remap_slice = ALL_L3_SLICES(i915); - - i915_gem_context_set_bannable(ctx); - i915_gem_context_set_recoverable(ctx); - __context_set_persistence(ctx, true /* cgroup hook? */); - - for (i = 0; i < ARRAY_SIZE(ctx->hang_timestamp); i++) - ctx->hang_timestamp[i] = jiffies - CONTEXT_FAST_HANG_JIFFIES; - - return ctx; - -err_free: - kfree(ctx); - return ERR_PTR(err); -} - static inline struct i915_gem_engines * __context_engines_await(const struct i915_gem_context *ctx, bool *user_engines) @@ -714,168 +1305,112 @@ __context_engines_await(const struct i915_gem_context *ctx, return engines; } -static int +static void context_apply_all(struct i915_gem_context *ctx, - int (*fn)(struct intel_context *ce, void *data), + void (*fn)(struct intel_context *ce, void *data), void *data) { struct i915_gem_engines_iter it; struct i915_gem_engines *e; struct intel_context *ce; - int err = 0; e = __context_engines_await(ctx, NULL); - for_each_gem_engine(ce, e, it) { - err = fn(ce, data); - if (err) - break; - } + for_each_gem_engine(ce, e, it) + fn(ce, data); i915_sw_fence_complete(&e->fence); - - return err; -} - -static int __apply_ppgtt(struct intel_context *ce, void *vm) -{ - i915_vm_put(ce->vm); - ce->vm = i915_vm_get(vm); - return 0; -} - -static struct i915_address_space * -__set_ppgtt(struct i915_gem_context *ctx, struct i915_address_space *vm) -{ - struct i915_address_space *old; - - old = rcu_replace_pointer(ctx->vm, - i915_vm_open(vm), - lockdep_is_held(&ctx->mutex)); - GEM_BUG_ON(old && i915_vm_is_4lvl(vm) != i915_vm_is_4lvl(old)); - - context_apply_all(ctx, __apply_ppgtt, vm); - - return old; -} - -static void __assign_ppgtt(struct i915_gem_context *ctx, - struct i915_address_space *vm) -{ - if (vm == rcu_access_pointer(ctx->vm)) - return; - - vm = __set_ppgtt(ctx, vm); - if (vm) - i915_vm_close(vm); -} - -static void __set_timeline(struct intel_timeline **dst, - struct intel_timeline *src) -{ - struct intel_timeline *old = *dst; - - *dst = src ? intel_timeline_get(src) : NULL; - - if (old) - intel_timeline_put(old); -} - -static int __apply_timeline(struct intel_context *ce, void *timeline) -{ - __set_timeline(&ce->timeline, timeline); - return 0; -} - -static void __assign_timeline(struct i915_gem_context *ctx, - struct intel_timeline *timeline) -{ - __set_timeline(&ctx->timeline, timeline); - context_apply_all(ctx, __apply_timeline, timeline); -} - -static int __apply_watchdog(struct intel_context *ce, void *timeout_us) -{ - return intel_context_set_watchdog_us(ce, (uintptr_t)timeout_us); -} - -static int -__set_watchdog(struct i915_gem_context *ctx, unsigned long timeout_us) -{ - int ret; - - ret = context_apply_all(ctx, __apply_watchdog, - (void *)(uintptr_t)timeout_us); - if (!ret) - ctx->watchdog.timeout_us = timeout_us; - - return ret; -} - -static void __set_default_fence_expiry(struct i915_gem_context *ctx) -{ - struct drm_i915_private *i915 = ctx->i915; - int ret; - - if (!IS_ACTIVE(CONFIG_DRM_I915_REQUEST_TIMEOUT) || - !i915->params.request_timeout_ms) - return; - - /* Default expiry for user fences. */ - ret = __set_watchdog(ctx, i915->params.request_timeout_ms * 1000); - if (ret) - drm_notice(&i915->drm, - "Failed to configure default fence expiry! (%d)", - ret); } static struct i915_gem_context * -i915_gem_create_context(struct drm_i915_private *i915, unsigned int flags) +i915_gem_create_context(struct drm_i915_private *i915, + const struct i915_gem_proto_context *pc) { struct i915_gem_context *ctx; + struct i915_address_space *vm = NULL; + struct i915_gem_engines *e; + int err; + int i; - if (flags & I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE && - !HAS_EXECLISTS(i915)) - return ERR_PTR(-EINVAL); + ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); + if (!ctx) + return ERR_PTR(-ENOMEM); - ctx = __create_context(i915); - if (IS_ERR(ctx)) - return ctx; + kref_init(&ctx->ref); + ctx->i915 = i915; + ctx->sched = pc->sched; + mutex_init(&ctx->mutex); + INIT_LIST_HEAD(&ctx->link); - if (HAS_FULL_PPGTT(i915)) { + spin_lock_init(&ctx->stale.lock); + INIT_LIST_HEAD(&ctx->stale.engines); + + if (pc->vm) { + vm = i915_vm_get(pc->vm); + } else if (HAS_FULL_PPGTT(i915)) { struct i915_ppgtt *ppgtt; ppgtt = i915_ppgtt_create(&i915->gt); if (IS_ERR(ppgtt)) { drm_dbg(&i915->drm, "PPGTT setup failed (%ld)\n", PTR_ERR(ppgtt)); - context_close(ctx); - return ERR_CAST(ppgtt); + err = PTR_ERR(ppgtt); + goto err_ctx; } + vm = &ppgtt->vm; + } + if (vm) { + RCU_INIT_POINTER(ctx->vm, i915_vm_open(vm)); - mutex_lock(&ctx->mutex); - __assign_ppgtt(ctx, &ppgtt->vm); - mutex_unlock(&ctx->mutex); + /* i915_vm_open() takes a reference */ + i915_vm_put(vm); + } - i915_vm_put(&ppgtt->vm); + mutex_init(&ctx->engines_mutex); + if (pc->num_user_engines >= 0) { + i915_gem_context_set_user_engines(ctx); + e = user_engines(ctx, pc->num_user_engines, pc->user_engines); + } else { + i915_gem_context_clear_user_engines(ctx); + e = default_engines(ctx, pc->legacy_rcs_sseu); + } + if (IS_ERR(e)) { + err = PTR_ERR(e); + goto err_vm; } + RCU_INIT_POINTER(ctx->engines, e); - if (flags & I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE) { - struct intel_timeline *timeline; + INIT_RADIX_TREE(&ctx->handles_vma, GFP_KERNEL); + mutex_init(&ctx->lut_mutex); - timeline = intel_timeline_create(&i915->gt); - if (IS_ERR(timeline)) { - context_close(ctx); - return ERR_CAST(timeline); - } + /* NB: Mark all slices as needing a remap so that when the context first + * loads it will restore whatever remap state already exists. If there + * is no remap info, it will be a NOP. */ + ctx->remap_slice = ALL_L3_SLICES(i915); - __assign_timeline(ctx, timeline); - intel_timeline_put(timeline); - } + ctx->user_flags = pc->user_flags; - __set_default_fence_expiry(ctx); + for (i = 0; i < ARRAY_SIZE(ctx->hang_timestamp); i++) + ctx->hang_timestamp[i] = jiffies - CONTEXT_FAST_HANG_JIFFIES; + + if (pc->single_timeline) { + err = drm_syncobj_create(&ctx->syncobj, + DRM_SYNCOBJ_CREATE_SIGNALED, + NULL); + if (err) + goto err_engines; + } trace_i915_context_create(ctx); return ctx; + +err_engines: + free_engines(e); +err_vm: + if (ctx->vm) + i915_vm_close(ctx->vm); +err_ctx: + kfree(ctx); + return ERR_PTR(err); } static void init_contexts(struct i915_gem_contexts *gc) @@ -889,83 +1424,83 @@ void i915_gem_init__contexts(struct drm_i915_private *i915) init_contexts(&i915->gem.contexts); } -static int gem_context_register(struct i915_gem_context *ctx, - struct drm_i915_file_private *fpriv, - u32 *id) +static void gem_context_register(struct i915_gem_context *ctx, + struct drm_i915_file_private *fpriv, + u32 id) { struct drm_i915_private *i915 = ctx->i915; - struct i915_address_space *vm; - int ret; + void *old; ctx->file_priv = fpriv; - mutex_lock(&ctx->mutex); - vm = i915_gem_context_vm(ctx); - if (vm) - WRITE_ONCE(vm->file, fpriv); /* XXX */ - mutex_unlock(&ctx->mutex); - ctx->pid = get_task_pid(current, PIDTYPE_PID); snprintf(ctx->name, sizeof(ctx->name), "%s[%d]", current->comm, pid_nr(ctx->pid)); /* And finally expose ourselves to userspace via the idr */ - ret = xa_alloc(&fpriv->context_xa, id, ctx, xa_limit_32b, GFP_KERNEL); - if (ret) - goto err_pid; + old = xa_store(&fpriv->context_xa, id, ctx, GFP_KERNEL); + WARN_ON(old); spin_lock(&i915->gem.contexts.lock); list_add_tail(&ctx->link, &i915->gem.contexts.list); spin_unlock(&i915->gem.contexts.lock); - - return 0; - -err_pid: - put_pid(fetch_and_zero(&ctx->pid)); - return ret; } int i915_gem_context_open(struct drm_i915_private *i915, struct drm_file *file) { struct drm_i915_file_private *file_priv = file->driver_priv; + struct i915_gem_proto_context *pc; struct i915_gem_context *ctx; int err; - u32 id; - xa_init_flags(&file_priv->context_xa, XA_FLAGS_ALLOC); + mutex_init(&file_priv->proto_context_lock); + xa_init_flags(&file_priv->proto_context_xa, XA_FLAGS_ALLOC); + + /* 0 reserved for the default context */ + xa_init_flags(&file_priv->context_xa, XA_FLAGS_ALLOC1); /* 0 reserved for invalid/unassigned ppgtt */ xa_init_flags(&file_priv->vm_xa, XA_FLAGS_ALLOC1); - ctx = i915_gem_create_context(i915, 0); + pc = proto_context_create(i915, 0); + if (IS_ERR(pc)) { + err = PTR_ERR(pc); + goto err; + } + + ctx = i915_gem_create_context(i915, pc); + proto_context_close(pc); if (IS_ERR(ctx)) { err = PTR_ERR(ctx); goto err; } - err = gem_context_register(ctx, file_priv, &id); - if (err < 0) - goto err_ctx; + gem_context_register(ctx, file_priv, 0); - GEM_BUG_ON(id); return 0; -err_ctx: - context_close(ctx); err: xa_destroy(&file_priv->vm_xa); xa_destroy(&file_priv->context_xa); + xa_destroy(&file_priv->proto_context_xa); + mutex_destroy(&file_priv->proto_context_lock); return err; } void i915_gem_context_close(struct drm_file *file) { struct drm_i915_file_private *file_priv = file->driver_priv; + struct i915_gem_proto_context *pc; struct i915_address_space *vm; struct i915_gem_context *ctx; unsigned long idx; + xa_for_each(&file_priv->proto_context_xa, idx, pc) + proto_context_close(pc); + xa_destroy(&file_priv->proto_context_xa); + mutex_destroy(&file_priv->proto_context_lock); + xa_for_each(&file_priv->context_xa, idx, ctx) context_close(ctx); xa_destroy(&file_priv->context_xa); @@ -995,8 +1530,6 @@ int i915_gem_vm_create_ioctl(struct drm_device *dev, void *data, if (IS_ERR(ppgtt)) return PTR_ERR(ppgtt); - ppgtt->vm.file = file_priv; - if (args->extensions) { err = i915_user_extensions(u64_to_user_ptr(args->extensions), NULL, 0, @@ -1040,120 +1573,6 @@ int i915_gem_vm_destroy_ioctl(struct drm_device *dev, void *data, return 0; } -struct context_barrier_task { - struct i915_active base; - void (*task)(void *data); - void *data; -}; - -static void cb_retire(struct i915_active *base) -{ - struct context_barrier_task *cb = container_of(base, typeof(*cb), base); - - if (cb->task) - cb->task(cb->data); - - i915_active_fini(&cb->base); - kfree(cb); -} - -I915_SELFTEST_DECLARE(static intel_engine_mask_t context_barrier_inject_fault); -static int context_barrier_task(struct i915_gem_context *ctx, - intel_engine_mask_t engines, - bool (*skip)(struct intel_context *ce, void *data), - int (*pin)(struct intel_context *ce, struct i915_gem_ww_ctx *ww, void *data), - int (*emit)(struct i915_request *rq, void *data), - void (*task)(void *data), - void *data) -{ - struct context_barrier_task *cb; - struct i915_gem_engines_iter it; - struct i915_gem_engines *e; - struct i915_gem_ww_ctx ww; - struct intel_context *ce; - int err = 0; - - GEM_BUG_ON(!task); - - cb = kmalloc(sizeof(*cb), GFP_KERNEL); - if (!cb) - return -ENOMEM; - - i915_active_init(&cb->base, NULL, cb_retire, 0); - err = i915_active_acquire(&cb->base); - if (err) { - kfree(cb); - return err; - } - - e = __context_engines_await(ctx, NULL); - if (!e) { - i915_active_release(&cb->base); - return -ENOENT; - } - - for_each_gem_engine(ce, e, it) { - struct i915_request *rq; - - if (I915_SELFTEST_ONLY(context_barrier_inject_fault & - ce->engine->mask)) { - err = -ENXIO; - break; - } - - if (!(ce->engine->mask & engines)) - continue; - - if (skip && skip(ce, data)) - continue; - - i915_gem_ww_ctx_init(&ww, true); -retry: - err = intel_context_pin_ww(ce, &ww); - if (err) - goto err; - - if (pin) - err = pin(ce, &ww, data); - if (err) - goto err_unpin; - - rq = i915_request_create(ce); - if (IS_ERR(rq)) { - err = PTR_ERR(rq); - goto err_unpin; - } - - err = 0; - if (emit) - err = emit(rq, data); - if (err == 0) - err = i915_active_add_request(&cb->base, rq); - - i915_request_add(rq); -err_unpin: - intel_context_unpin(ce); -err: - if (err == -EDEADLK) { - err = i915_gem_ww_ctx_backoff(&ww); - if (!err) - goto retry; - } - i915_gem_ww_ctx_fini(&ww); - - if (err) - break; - } - i915_sw_fence_complete(&e->fence); - - cb->task = err ? NULL : task; /* caller needs to unwind instead */ - cb->data = data; - - i915_active_release(&cb->base); - - return err; -} - static int get_ppgtt(struct drm_i915_file_private *file_priv, struct i915_gem_context *ctx, struct drm_i915_gem_context_param *args) @@ -1186,211 +1605,6 @@ err_put: return err; } -static void set_ppgtt_barrier(void *data) -{ - struct i915_address_space *old = data; - - if (GRAPHICS_VER(old->i915) < 8) - gen6_ppgtt_unpin_all(i915_vm_to_ppgtt(old)); - - i915_vm_close(old); -} - -static int pin_ppgtt_update(struct intel_context *ce, struct i915_gem_ww_ctx *ww, void *data) -{ - struct i915_address_space *vm = ce->vm; - - if (!HAS_LOGICAL_RING_CONTEXTS(vm->i915)) - /* ppGTT is not part of the legacy context image */ - return gen6_ppgtt_pin(i915_vm_to_ppgtt(vm), ww); - - return 0; -} - -static int emit_ppgtt_update(struct i915_request *rq, void *data) -{ - struct i915_address_space *vm = rq->context->vm; - struct intel_engine_cs *engine = rq->engine; - u32 base = engine->mmio_base; - u32 *cs; - int i; - - if (i915_vm_is_4lvl(vm)) { - struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); - const dma_addr_t pd_daddr = px_dma(ppgtt->pd); - - cs = intel_ring_begin(rq, 6); - if (IS_ERR(cs)) - return PTR_ERR(cs); - - *cs++ = MI_LOAD_REGISTER_IMM(2); - - *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(base, 0)); - *cs++ = upper_32_bits(pd_daddr); - *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(base, 0)); - *cs++ = lower_32_bits(pd_daddr); - - *cs++ = MI_NOOP; - intel_ring_advance(rq, cs); - } else if (HAS_LOGICAL_RING_CONTEXTS(engine->i915)) { - struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); - int err; - - /* Magic required to prevent forcewake errors! */ - err = engine->emit_flush(rq, EMIT_INVALIDATE); - if (err) - return err; - - cs = intel_ring_begin(rq, 4 * GEN8_3LVL_PDPES + 2); - if (IS_ERR(cs)) - return PTR_ERR(cs); - - *cs++ = MI_LOAD_REGISTER_IMM(2 * GEN8_3LVL_PDPES) | MI_LRI_FORCE_POSTED; - for (i = GEN8_3LVL_PDPES; i--; ) { - const dma_addr_t pd_daddr = i915_page_dir_dma_addr(ppgtt, i); - - *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(base, i)); - *cs++ = upper_32_bits(pd_daddr); - *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(base, i)); - *cs++ = lower_32_bits(pd_daddr); - } - *cs++ = MI_NOOP; - intel_ring_advance(rq, cs); - } - - return 0; -} - -static bool skip_ppgtt_update(struct intel_context *ce, void *data) -{ - if (HAS_LOGICAL_RING_CONTEXTS(ce->engine->i915)) - return !ce->state; - else - return !atomic_read(&ce->pin_count); -} - -static int set_ppgtt(struct drm_i915_file_private *file_priv, - struct i915_gem_context *ctx, - struct drm_i915_gem_context_param *args) -{ - struct i915_address_space *vm, *old; - int err; - - if (args->size) - return -EINVAL; - - if (!rcu_access_pointer(ctx->vm)) - return -ENODEV; - - if (upper_32_bits(args->value)) - return -ENOENT; - - rcu_read_lock(); - vm = xa_load(&file_priv->vm_xa, args->value); - if (vm && !kref_get_unless_zero(&vm->ref)) - vm = NULL; - rcu_read_unlock(); - if (!vm) - return -ENOENT; - - err = mutex_lock_interruptible(&ctx->mutex); - if (err) - goto out; - - if (i915_gem_context_is_closed(ctx)) { - err = -ENOENT; - goto unlock; - } - - if (vm == rcu_access_pointer(ctx->vm)) - goto unlock; - - old = __set_ppgtt(ctx, vm); - - /* Teardown the existing obj:vma cache, it will have to be rebuilt. */ - lut_close(ctx); - - /* - * We need to flush any requests using the current ppgtt before - * we release it as the requests do not hold a reference themselves, - * only indirectly through the context. - */ - err = context_barrier_task(ctx, ALL_ENGINES, - skip_ppgtt_update, - pin_ppgtt_update, - emit_ppgtt_update, - set_ppgtt_barrier, - old); - if (err) { - i915_vm_close(__set_ppgtt(ctx, old)); - i915_vm_close(old); - lut_close(ctx); /* force a rebuild of the old obj:vma cache */ - } - -unlock: - mutex_unlock(&ctx->mutex); -out: - i915_vm_put(vm); - return err; -} - -static int __apply_ringsize(struct intel_context *ce, void *sz) -{ - return intel_context_set_ring_size(ce, (unsigned long)sz); -} - -static int set_ringsize(struct i915_gem_context *ctx, - struct drm_i915_gem_context_param *args) -{ - if (!HAS_LOGICAL_RING_CONTEXTS(ctx->i915)) - return -ENODEV; - - if (args->size) - return -EINVAL; - - if (!IS_ALIGNED(args->value, I915_GTT_PAGE_SIZE)) - return -EINVAL; - - if (args->value < I915_GTT_PAGE_SIZE) - return -EINVAL; - - if (args->value > 128 * I915_GTT_PAGE_SIZE) - return -EINVAL; - - return context_apply_all(ctx, - __apply_ringsize, - __intel_context_ring_size(args->value)); -} - -static int __get_ringsize(struct intel_context *ce, void *arg) -{ - long sz; - - sz = intel_context_get_ring_size(ce); - GEM_BUG_ON(sz > INT_MAX); - - return sz; /* stop on first engine */ -} - -static int get_ringsize(struct i915_gem_context *ctx, - struct drm_i915_gem_context_param *args) -{ - int sz; - - if (!HAS_LOGICAL_RING_CONTEXTS(ctx->i915)) - return -ENODEV; - - if (args->size) - return -EINVAL; - - sz = context_apply_all(ctx, __get_ringsize, NULL); - if (sz < 0) - return sz; - - args->value = sz; - return 0; -} - int i915_gem_user_to_context_sseu(struct intel_gt *gt, const struct drm_i915_gem_context_param_sseu *user, @@ -1545,382 +1759,6 @@ out_ce: return ret; } -struct set_engines { - struct i915_gem_context *ctx; - struct i915_gem_engines *engines; -}; - -static int -set_engines__load_balance(struct i915_user_extension __user *base, void *data) -{ - struct i915_context_engines_load_balance __user *ext = - container_of_user(base, typeof(*ext), base); - const struct set_engines *set = data; - struct drm_i915_private *i915 = set->ctx->i915; - struct intel_engine_cs *stack[16]; - struct intel_engine_cs **siblings; - struct intel_context *ce; - u16 num_siblings, idx; - unsigned int n; - int err; - - if (!HAS_EXECLISTS(i915)) - return -ENODEV; - - if (intel_uc_uses_guc_submission(&i915->gt.uc)) - return -ENODEV; /* not implement yet */ - - if (get_user(idx, &ext->engine_index)) - return -EFAULT; - - if (idx >= set->engines->num_engines) { - drm_dbg(&i915->drm, "Invalid placement value, %d >= %d\n", - idx, set->engines->num_engines); - return -EINVAL; - } - - idx = array_index_nospec(idx, set->engines->num_engines); - if (set->engines->engines[idx]) { - drm_dbg(&i915->drm, - "Invalid placement[%d], already occupied\n", idx); - return -EEXIST; - } - - if (get_user(num_siblings, &ext->num_siblings)) - return -EFAULT; - - err = check_user_mbz(&ext->flags); - if (err) - return err; - - err = check_user_mbz(&ext->mbz64); - if (err) - return err; - - siblings = stack; - if (num_siblings > ARRAY_SIZE(stack)) { - siblings = kmalloc_array(num_siblings, - sizeof(*siblings), - GFP_KERNEL); - if (!siblings) - return -ENOMEM; - } - - for (n = 0; n < num_siblings; n++) { - struct i915_engine_class_instance ci; - - if (copy_from_user(&ci, &ext->engines[n], sizeof(ci))) { - err = -EFAULT; - goto out_siblings; - } - - siblings[n] = intel_engine_lookup_user(i915, - ci.engine_class, - ci.engine_instance); - if (!siblings[n]) { - drm_dbg(&i915->drm, - "Invalid sibling[%d]: { class:%d, inst:%d }\n", - n, ci.engine_class, ci.engine_instance); - err = -EINVAL; - goto out_siblings; - } - } - - ce = intel_execlists_create_virtual(siblings, n); - if (IS_ERR(ce)) { - err = PTR_ERR(ce); - goto out_siblings; - } - - intel_context_set_gem(ce, set->ctx); - - if (cmpxchg(&set->engines->engines[idx], NULL, ce)) { - intel_context_put(ce); - err = -EEXIST; - goto out_siblings; - } - -out_siblings: - if (siblings != stack) - kfree(siblings); - - return err; -} - -static int -set_engines__bond(struct i915_user_extension __user *base, void *data) -{ - struct i915_context_engines_bond __user *ext = - container_of_user(base, typeof(*ext), base); - const struct set_engines *set = data; - struct drm_i915_private *i915 = set->ctx->i915; - struct i915_engine_class_instance ci; - struct intel_engine_cs *virtual; - struct intel_engine_cs *master; - u16 idx, num_bonds; - int err, n; - - if (get_user(idx, &ext->virtual_index)) - return -EFAULT; - - if (idx >= set->engines->num_engines) { - drm_dbg(&i915->drm, - "Invalid index for virtual engine: %d >= %d\n", - idx, set->engines->num_engines); - return -EINVAL; - } - - idx = array_index_nospec(idx, set->engines->num_engines); - if (!set->engines->engines[idx]) { - drm_dbg(&i915->drm, "Invalid engine at %d\n", idx); - return -EINVAL; - } - virtual = set->engines->engines[idx]->engine; - - err = check_user_mbz(&ext->flags); - if (err) - return err; - - for (n = 0; n < ARRAY_SIZE(ext->mbz64); n++) { - err = check_user_mbz(&ext->mbz64[n]); - if (err) - return err; - } - - if (copy_from_user(&ci, &ext->master, sizeof(ci))) - return -EFAULT; - - master = intel_engine_lookup_user(i915, - ci.engine_class, ci.engine_instance); - if (!master) { - drm_dbg(&i915->drm, - "Unrecognised master engine: { class:%u, instance:%u }\n", - ci.engine_class, ci.engine_instance); - return -EINVAL; - } - - if (get_user(num_bonds, &ext->num_bonds)) - return -EFAULT; - - for (n = 0; n < num_bonds; n++) { - struct intel_engine_cs *bond; - - if (copy_from_user(&ci, &ext->engines[n], sizeof(ci))) - return -EFAULT; - - bond = intel_engine_lookup_user(i915, - ci.engine_class, - ci.engine_instance); - if (!bond) { - drm_dbg(&i915->drm, - "Unrecognised engine[%d] for bonding: { class:%d, instance: %d }\n", - n, ci.engine_class, ci.engine_instance); - return -EINVAL; - } - - /* - * A non-virtual engine has no siblings to choose between; and - * a submit fence will always be directed to the one engine. - */ - if (intel_engine_is_virtual(virtual)) { - err = intel_virtual_engine_attach_bond(virtual, - master, - bond); - if (err) - return err; - } - } - - return 0; -} - -static const i915_user_extension_fn set_engines__extensions[] = { - [I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE] = set_engines__load_balance, - [I915_CONTEXT_ENGINES_EXT_BOND] = set_engines__bond, -}; - -static int -set_engines(struct i915_gem_context *ctx, - const struct drm_i915_gem_context_param *args) -{ - struct drm_i915_private *i915 = ctx->i915; - struct i915_context_param_engines __user *user = - u64_to_user_ptr(args->value); - struct set_engines set = { .ctx = ctx }; - unsigned int num_engines, n; - u64 extensions; - int err; - - if (!args->size) { /* switch back to legacy user_ring_map */ - if (!i915_gem_context_user_engines(ctx)) - return 0; - - set.engines = default_engines(ctx); - if (IS_ERR(set.engines)) - return PTR_ERR(set.engines); - - goto replace; - } - - BUILD_BUG_ON(!IS_ALIGNED(sizeof(*user), sizeof(*user->engines))); - if (args->size < sizeof(*user) || - !IS_ALIGNED(args->size, sizeof(*user->engines))) { - drm_dbg(&i915->drm, "Invalid size for engine array: %d\n", - args->size); - return -EINVAL; - } - - /* - * Note that I915_EXEC_RING_MASK limits execbuf to only using the - * first 64 engines defined here. - */ - num_engines = (args->size - sizeof(*user)) / sizeof(*user->engines); - set.engines = alloc_engines(num_engines); - if (!set.engines) - return -ENOMEM; - - for (n = 0; n < num_engines; n++) { - struct i915_engine_class_instance ci; - struct intel_engine_cs *engine; - struct intel_context *ce; - - if (copy_from_user(&ci, &user->engines[n], sizeof(ci))) { - __free_engines(set.engines, n); - return -EFAULT; - } - - if (ci.engine_class == (u16)I915_ENGINE_CLASS_INVALID && - ci.engine_instance == (u16)I915_ENGINE_CLASS_INVALID_NONE) { - set.engines->engines[n] = NULL; - continue; - } - - engine = intel_engine_lookup_user(ctx->i915, - ci.engine_class, - ci.engine_instance); - if (!engine) { - drm_dbg(&i915->drm, - "Invalid engine[%d]: { class:%d, instance:%d }\n", - n, ci.engine_class, ci.engine_instance); - __free_engines(set.engines, n); - return -ENOENT; - } - - ce = intel_context_create(engine); - if (IS_ERR(ce)) { - __free_engines(set.engines, n); - return PTR_ERR(ce); - } - - intel_context_set_gem(ce, ctx); - - set.engines->engines[n] = ce; - } - set.engines->num_engines = num_engines; - - err = -EFAULT; - if (!get_user(extensions, &user->extensions)) - err = i915_user_extensions(u64_to_user_ptr(extensions), - set_engines__extensions, - ARRAY_SIZE(set_engines__extensions), - &set); - if (err) { - free_engines(set.engines); - return err; - } - -replace: - mutex_lock(&ctx->engines_mutex); - if (i915_gem_context_is_closed(ctx)) { - mutex_unlock(&ctx->engines_mutex); - free_engines(set.engines); - return -ENOENT; - } - if (args->size) - i915_gem_context_set_user_engines(ctx); - else - i915_gem_context_clear_user_engines(ctx); - set.engines = rcu_replace_pointer(ctx->engines, set.engines, 1); - mutex_unlock(&ctx->engines_mutex); - - /* Keep track of old engine sets for kill_context() */ - engines_idle_release(ctx, set.engines); - - return 0; -} - -static int -get_engines(struct i915_gem_context *ctx, - struct drm_i915_gem_context_param *args) -{ - struct i915_context_param_engines __user *user; - struct i915_gem_engines *e; - size_t n, count, size; - bool user_engines; - int err = 0; - - e = __context_engines_await(ctx, &user_engines); - if (!e) - return -ENOENT; - - if (!user_engines) { - i915_sw_fence_complete(&e->fence); - args->size = 0; - return 0; - } - - count = e->num_engines; - - /* Be paranoid in case we have an impedance mismatch */ - if (!check_struct_size(user, engines, count, &size)) { - err = -EINVAL; - goto err_free; - } - if (overflows_type(size, args->size)) { - err = -EINVAL; - goto err_free; - } - - if (!args->size) { - args->size = size; - goto err_free; - } - - if (args->size < size) { - err = -EINVAL; - goto err_free; - } - - user = u64_to_user_ptr(args->value); - if (put_user(0, &user->extensions)) { - err = -EFAULT; - goto err_free; - } - - for (n = 0; n < count; n++) { - struct i915_engine_class_instance ci = { - .engine_class = I915_ENGINE_CLASS_INVALID, - .engine_instance = I915_ENGINE_CLASS_INVALID_NONE, - }; - - if (e->engines[n]) { - ci.engine_class = e->engines[n]->engine->uabi_class; - ci.engine_instance = e->engines[n]->engine->uabi_instance; - } - - if (copy_to_user(&user->engines[n], &ci, sizeof(ci))) { - err = -EFAULT; - goto err_free; - } - } - - args->size = size; - -err_free: - i915_sw_fence_complete(&e->fence); - return err; -} - static int set_persistence(struct i915_gem_context *ctx, const struct drm_i915_gem_context_param *args) @@ -1931,41 +1769,30 @@ set_persistence(struct i915_gem_context *ctx, return __context_set_persistence(ctx, args->value); } -static int __apply_priority(struct intel_context *ce, void *arg) +static void __apply_priority(struct intel_context *ce, void *arg) { struct i915_gem_context *ctx = arg; if (!intel_engine_has_timeslices(ce->engine)) - return 0; + return; - if (ctx->sched.priority >= I915_PRIORITY_NORMAL) + if (ctx->sched.priority >= I915_PRIORITY_NORMAL && + intel_engine_has_semaphores(ce->engine)) intel_context_set_use_semaphores(ce); else intel_context_clear_use_semaphores(ce); - - return 0; } static int set_priority(struct i915_gem_context *ctx, const struct drm_i915_gem_context_param *args) { - s64 priority = args->value; - - if (args->size) - return -EINVAL; - - if (!(ctx->i915->caps.scheduler & I915_SCHEDULER_CAP_PRIORITY)) - return -ENODEV; - - if (priority > I915_CONTEXT_MAX_USER_PRIORITY || - priority < I915_CONTEXT_MIN_USER_PRIORITY) - return -EINVAL; + int err; - if (priority > I915_CONTEXT_DEFAULT_PRIORITY && - !capable(CAP_SYS_NICE)) - return -EPERM; + err = validate_priority(ctx->i915, args); + if (err) + return err; - ctx->sched.priority = priority; + ctx->sched.priority = args->value; context_apply_all(ctx, __apply_priority, ctx); return 0; @@ -1978,15 +1805,6 @@ static int ctx_setparam(struct drm_i915_file_private *fpriv, int ret = 0; switch (args->param) { - case I915_CONTEXT_PARAM_NO_ZEROMAP: - if (args->size) - ret = -EINVAL; - else if (args->value) - set_bit(UCONTEXT_NO_ZEROMAP, &ctx->user_flags); - else - clear_bit(UCONTEXT_NO_ZEROMAP, &ctx->user_flags); - break; - case I915_CONTEXT_PARAM_NO_ERROR_CAPTURE: if (args->size) ret = -EINVAL; @@ -2024,23 +1842,15 @@ static int ctx_setparam(struct drm_i915_file_private *fpriv, ret = set_sseu(ctx, args); break; - case I915_CONTEXT_PARAM_VM: - ret = set_ppgtt(fpriv, ctx, args); - break; - - case I915_CONTEXT_PARAM_ENGINES: - ret = set_engines(ctx, args); - break; - case I915_CONTEXT_PARAM_PERSISTENCE: ret = set_persistence(ctx, args); break; - case I915_CONTEXT_PARAM_RINGSIZE: - ret = set_ringsize(ctx, args); - break; - + case I915_CONTEXT_PARAM_NO_ZEROMAP: case I915_CONTEXT_PARAM_BAN_PERIOD: + case I915_CONTEXT_PARAM_RINGSIZE: + case I915_CONTEXT_PARAM_VM: + case I915_CONTEXT_PARAM_ENGINES: default: ret = -EINVAL; break; @@ -2050,7 +1860,7 @@ static int ctx_setparam(struct drm_i915_file_private *fpriv, } struct create_ext { - struct i915_gem_context *ctx; + struct i915_gem_proto_context *pc; struct drm_i915_file_private *fpriv; }; @@ -2065,233 +1875,84 @@ static int create_setparam(struct i915_user_extension __user *ext, void *data) if (local.param.ctx_id) return -EINVAL; - return ctx_setparam(arg->fpriv, arg->ctx, &local.param); + return set_proto_ctx_param(arg->fpriv, arg->pc, &local.param); } -static int copy_ring_size(struct intel_context *dst, - struct intel_context *src) +static int invalid_ext(struct i915_user_extension __user *ext, void *data) { - long sz; - - sz = intel_context_get_ring_size(src); - if (sz < 0) - return sz; - - return intel_context_set_ring_size(dst, sz); + return -EINVAL; } -static int clone_engines(struct i915_gem_context *dst, - struct i915_gem_context *src) -{ - struct i915_gem_engines *clone, *e; - bool user_engines; - unsigned long n; - - e = __context_engines_await(src, &user_engines); - if (!e) - return -ENOENT; - - clone = alloc_engines(e->num_engines); - if (!clone) - goto err_unlock; - - for (n = 0; n < e->num_engines; n++) { - struct intel_engine_cs *engine; - - if (!e->engines[n]) { - clone->engines[n] = NULL; - continue; - } - engine = e->engines[n]->engine; - - /* - * Virtual engines are singletons; they can only exist - * inside a single context, because they embed their - * HW context... As each virtual context implies a single - * timeline (each engine can only dequeue a single request - * at any time), it would be surprising for two contexts - * to use the same engine. So let's create a copy of - * the virtual engine instead. - */ - if (intel_engine_is_virtual(engine)) - clone->engines[n] = - intel_execlists_clone_virtual(engine); - else - clone->engines[n] = intel_context_create(engine); - if (IS_ERR_OR_NULL(clone->engines[n])) { - __free_engines(clone, n); - goto err_unlock; - } - - intel_context_set_gem(clone->engines[n], dst); - - /* Copy across the preferred ringsize */ - if (copy_ring_size(clone->engines[n], e->engines[n])) { - __free_engines(clone, n + 1); - goto err_unlock; - } - } - clone->num_engines = n; - i915_sw_fence_complete(&e->fence); - - /* Serialised by constructor */ - engines_idle_release(dst, rcu_replace_pointer(dst->engines, clone, 1)); - if (user_engines) - i915_gem_context_set_user_engines(dst); - else - i915_gem_context_clear_user_engines(dst); - return 0; - -err_unlock: - i915_sw_fence_complete(&e->fence); - return -ENOMEM; -} - -static int clone_flags(struct i915_gem_context *dst, - struct i915_gem_context *src) -{ - dst->user_flags = src->user_flags; - return 0; -} - -static int clone_schedattr(struct i915_gem_context *dst, - struct i915_gem_context *src) -{ - dst->sched = src->sched; - return 0; -} - -static int clone_sseu(struct i915_gem_context *dst, - struct i915_gem_context *src) -{ - struct i915_gem_engines *e = i915_gem_context_lock_engines(src); - struct i915_gem_engines *clone; - unsigned long n; - int err; - - /* no locking required; sole access under constructor*/ - clone = __context_engines_static(dst); - if (e->num_engines != clone->num_engines) { - err = -EINVAL; - goto unlock; - } - - for (n = 0; n < e->num_engines; n++) { - struct intel_context *ce = e->engines[n]; - - if (clone->engines[n]->engine->class != ce->engine->class) { - /* Must have compatible engine maps! */ - err = -EINVAL; - goto unlock; - } - - /* serialises with set_sseu */ - err = intel_context_lock_pinned(ce); - if (err) - goto unlock; - - clone->engines[n]->sseu = ce->sseu; - intel_context_unlock_pinned(ce); - } - - err = 0; -unlock: - i915_gem_context_unlock_engines(src); - return err; -} +static const i915_user_extension_fn create_extensions[] = { + [I915_CONTEXT_CREATE_EXT_SETPARAM] = create_setparam, + [I915_CONTEXT_CREATE_EXT_CLONE] = invalid_ext, +}; -static int clone_timeline(struct i915_gem_context *dst, - struct i915_gem_context *src) +static bool client_is_banned(struct drm_i915_file_private *file_priv) { - if (src->timeline) - __assign_timeline(dst, src->timeline); - - return 0; + return atomic_read(&file_priv->ban_score) >= I915_CLIENT_SCORE_BANNED; } -static int clone_vm(struct i915_gem_context *dst, - struct i915_gem_context *src) +static inline struct i915_gem_context * +__context_lookup(struct drm_i915_file_private *file_priv, u32 id) { - struct i915_address_space *vm; - int err = 0; - - if (!rcu_access_pointer(src->vm)) - return 0; + struct i915_gem_context *ctx; rcu_read_lock(); - vm = context_get_vm_rcu(src); + ctx = xa_load(&file_priv->context_xa, id); + if (ctx && !kref_get_unless_zero(&ctx->ref)) + ctx = NULL; rcu_read_unlock(); - if (!mutex_lock_interruptible(&dst->mutex)) { - __assign_ppgtt(dst, vm); - mutex_unlock(&dst->mutex); - } else { - err = -EINTR; - } - - i915_vm_put(vm); - return err; + return ctx; } -static int create_clone(struct i915_user_extension __user *ext, void *data) -{ - static int (* const fn[])(struct i915_gem_context *dst, - struct i915_gem_context *src) = { -#define MAP(x, y) [ilog2(I915_CONTEXT_CLONE_##x)] = y - MAP(ENGINES, clone_engines), - MAP(FLAGS, clone_flags), - MAP(SCHEDATTR, clone_schedattr), - MAP(SSEU, clone_sseu), - MAP(TIMELINE, clone_timeline), - MAP(VM, clone_vm), -#undef MAP - }; - struct drm_i915_gem_context_create_ext_clone local; - const struct create_ext *arg = data; - struct i915_gem_context *dst = arg->ctx; - struct i915_gem_context *src; - int err, bit; +static struct i915_gem_context * +finalize_create_context_locked(struct drm_i915_file_private *file_priv, + struct i915_gem_proto_context *pc, u32 id) +{ + struct i915_gem_context *ctx; + void *old; - if (copy_from_user(&local, ext, sizeof(local))) - return -EFAULT; + lockdep_assert_held(&file_priv->proto_context_lock); - BUILD_BUG_ON(GENMASK(BITS_PER_TYPE(local.flags) - 1, ARRAY_SIZE(fn)) != - I915_CONTEXT_CLONE_UNKNOWN); + ctx = i915_gem_create_context(file_priv->dev_priv, pc); + if (IS_ERR(ctx)) + return ctx; - if (local.flags & I915_CONTEXT_CLONE_UNKNOWN) - return -EINVAL; + gem_context_register(ctx, file_priv, id); - if (local.rsvd) - return -EINVAL; + old = xa_erase(&file_priv->proto_context_xa, id); + GEM_BUG_ON(old != pc); + proto_context_close(pc); - rcu_read_lock(); - src = __i915_gem_context_lookup_rcu(arg->fpriv, local.clone_id); - rcu_read_unlock(); - if (!src) - return -ENOENT; + /* One for the xarray and one for the caller */ + return i915_gem_context_get(ctx); +} - GEM_BUG_ON(src == dst); +struct i915_gem_context * +i915_gem_context_lookup(struct drm_i915_file_private *file_priv, u32 id) +{ + struct i915_gem_proto_context *pc; + struct i915_gem_context *ctx; - for (bit = 0; bit < ARRAY_SIZE(fn); bit++) { - if (!(local.flags & BIT(bit))) - continue; + ctx = __context_lookup(file_priv, id); + if (ctx) + return ctx; - err = fn[bit](dst, src); - if (err) - return err; + mutex_lock(&file_priv->proto_context_lock); + /* Try one more time under the lock */ + ctx = __context_lookup(file_priv, id); + if (!ctx) { + pc = xa_load(&file_priv->proto_context_xa, id); + if (!pc) + ctx = ERR_PTR(-ENOENT); + else + ctx = finalize_create_context_locked(file_priv, pc, id); } + mutex_unlock(&file_priv->proto_context_lock); - return 0; -} - -static const i915_user_extension_fn create_extensions[] = { - [I915_CONTEXT_CREATE_EXT_SETPARAM] = create_setparam, - [I915_CONTEXT_CREATE_EXT_CLONE] = create_clone, -}; - -static bool client_is_banned(struct drm_i915_file_private *file_priv) -{ - return atomic_read(&file_priv->ban_score) >= I915_CLIENT_SCORE_BANNED; + return ctx; } int i915_gem_context_create_ioctl(struct drm_device *dev, void *data, @@ -2321,9 +1982,9 @@ int i915_gem_context_create_ioctl(struct drm_device *dev, void *data, return -EIO; } - ext_data.ctx = i915_gem_create_context(i915, args->flags); - if (IS_ERR(ext_data.ctx)) - return PTR_ERR(ext_data.ctx); + ext_data.pc = proto_context_create(i915, args->flags); + if (IS_ERR(ext_data.pc)) + return PTR_ERR(ext_data.pc); if (args->flags & I915_CONTEXT_CREATE_FLAGS_USE_EXTENSIONS) { ret = i915_user_extensions(u64_to_user_ptr(args->extensions), @@ -2331,20 +1992,39 @@ int i915_gem_context_create_ioctl(struct drm_device *dev, void *data, ARRAY_SIZE(create_extensions), &ext_data); if (ret) - goto err_ctx; + goto err_pc; } - ret = gem_context_register(ext_data.ctx, ext_data.fpriv, &id); - if (ret < 0) - goto err_ctx; + if (GRAPHICS_VER(i915) > 12) { + struct i915_gem_context *ctx; + + /* Get ourselves a context ID */ + ret = xa_alloc(&ext_data.fpriv->context_xa, &id, NULL, + xa_limit_32b, GFP_KERNEL); + if (ret) + goto err_pc; + + ctx = i915_gem_create_context(i915, ext_data.pc); + if (IS_ERR(ctx)) { + ret = PTR_ERR(ctx); + goto err_pc; + } + + proto_context_close(ext_data.pc); + gem_context_register(ctx, ext_data.fpriv, id); + } else { + ret = proto_context_register(ext_data.fpriv, ext_data.pc, &id); + if (ret < 0) + goto err_pc; + } args->ctx_id = id; drm_dbg(&i915->drm, "HW context %d created\n", args->ctx_id); return 0; -err_ctx: - context_close(ext_data.ctx); +err_pc: + proto_context_close(ext_data.pc); return ret; } @@ -2353,6 +2033,7 @@ int i915_gem_context_destroy_ioctl(struct drm_device *dev, void *data, { struct drm_i915_gem_context_destroy *args = data; struct drm_i915_file_private *file_priv = file->driver_priv; + struct i915_gem_proto_context *pc; struct i915_gem_context *ctx; if (args->pad != 0) @@ -2361,11 +2042,24 @@ int i915_gem_context_destroy_ioctl(struct drm_device *dev, void *data, if (!args->ctx_id) return -ENOENT; + /* We need to hold the proto-context lock here to prevent races + * with finalize_create_context_locked(). + */ + mutex_lock(&file_priv->proto_context_lock); ctx = xa_erase(&file_priv->context_xa, args->ctx_id); - if (!ctx) + pc = xa_erase(&file_priv->proto_context_xa, args->ctx_id); + mutex_unlock(&file_priv->proto_context_lock); + + if (!ctx && !pc) return -ENOENT; + GEM_WARN_ON(ctx && pc); + + if (pc) + proto_context_close(pc); + + if (ctx) + context_close(ctx); - context_close(ctx); return 0; } @@ -2433,15 +2127,10 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data, int ret = 0; ctx = i915_gem_context_lookup(file_priv, args->ctx_id); - if (!ctx) - return -ENOENT; + if (IS_ERR(ctx)) + return PTR_ERR(ctx); switch (args->param) { - case I915_CONTEXT_PARAM_NO_ZEROMAP: - args->size = 0; - args->value = test_bit(UCONTEXT_NO_ZEROMAP, &ctx->user_flags); - break; - case I915_CONTEXT_PARAM_GTT_SIZE: args->size = 0; rcu_read_lock(); @@ -2480,20 +2169,15 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data, ret = get_ppgtt(file_priv, ctx, args); break; - case I915_CONTEXT_PARAM_ENGINES: - ret = get_engines(ctx, args); - break; - case I915_CONTEXT_PARAM_PERSISTENCE: args->size = 0; args->value = i915_gem_context_is_persistent(ctx); break; - case I915_CONTEXT_PARAM_RINGSIZE: - ret = get_ringsize(ctx, args); - break; - + case I915_CONTEXT_PARAM_NO_ZEROMAP: case I915_CONTEXT_PARAM_BAN_PERIOD: + case I915_CONTEXT_PARAM_ENGINES: + case I915_CONTEXT_PARAM_RINGSIZE: default: ret = -EINVAL; break; @@ -2508,16 +2192,32 @@ int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data, { struct drm_i915_file_private *file_priv = file->driver_priv; struct drm_i915_gem_context_param *args = data; + struct i915_gem_proto_context *pc; struct i915_gem_context *ctx; - int ret; + int ret = 0; - ctx = i915_gem_context_lookup(file_priv, args->ctx_id); - if (!ctx) - return -ENOENT; + mutex_lock(&file_priv->proto_context_lock); + ctx = __context_lookup(file_priv, args->ctx_id); + if (!ctx) { + pc = xa_load(&file_priv->proto_context_xa, args->ctx_id); + if (pc) { + /* Contexts should be finalized inside + * GEM_CONTEXT_CREATE starting with graphics + * version 13. + */ + WARN_ON(GRAPHICS_VER(file_priv->dev_priv) > 12); + ret = set_proto_ctx_param(file_priv, pc, args); + } else { + ret = -ENOENT; + } + } + mutex_unlock(&file_priv->proto_context_lock); - ret = ctx_setparam(file_priv, ctx, args); + if (ctx) { + ret = ctx_setparam(file_priv, ctx, args); + i915_gem_context_put(ctx); + } - i915_gem_context_put(ctx); return ret; } @@ -2527,16 +2227,13 @@ int i915_gem_context_reset_stats_ioctl(struct drm_device *dev, struct drm_i915_private *i915 = to_i915(dev); struct drm_i915_reset_stats *args = data; struct i915_gem_context *ctx; - int ret; if (args->flags || args->pad) return -EINVAL; - ret = -ENOENT; - rcu_read_lock(); - ctx = __i915_gem_context_lookup_rcu(file->driver_priv, args->ctx_id); - if (!ctx) - goto out; + ctx = i915_gem_context_lookup(file->driver_priv, args->ctx_id); + if (IS_ERR(ctx)) + return PTR_ERR(ctx); /* * We opt for unserialised reads here. This may result in tearing @@ -2553,10 +2250,8 @@ int i915_gem_context_reset_stats_ioctl(struct drm_device *dev, args->batch_active = atomic_read(&ctx->guilty_count); args->batch_pending = atomic_read(&ctx->active_count); - ret = 0; -out: - rcu_read_unlock(); - return ret; + i915_gem_context_put(ctx); + return 0; } /* GEM context-engines iterator: for_each_gem_engine() */ @@ -2584,27 +2279,16 @@ i915_gem_engines_iter_next(struct i915_gem_engines_iter *it) #include "selftests/i915_gem_context.c" #endif -static void i915_global_gem_context_shrink(void) +void i915_gem_context_module_exit(void) { - kmem_cache_shrink(global.slab_luts); + kmem_cache_destroy(slab_luts); } -static void i915_global_gem_context_exit(void) -{ - kmem_cache_destroy(global.slab_luts); -} - -static struct i915_global_gem_context global = { { - .shrink = i915_global_gem_context_shrink, - .exit = i915_global_gem_context_exit, -} }; - -int __init i915_global_gem_context_init(void) +int __init i915_gem_context_module_init(void) { - global.slab_luts = KMEM_CACHE(i915_lut_handle, 0); - if (!global.slab_luts) + slab_luts = KMEM_CACHE(i915_lut_handle, 0); + if (!slab_luts) return -ENOMEM; - i915_global_register(&global.base); return 0; } diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.h b/drivers/gpu/drm/i915/gem/i915_gem_context.h index b5c908f3f4f2..18060536b0c2 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.h @@ -133,6 +133,9 @@ int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data, int i915_gem_context_reset_stats_ioctl(struct drm_device *dev, void *data, struct drm_file *file); +struct i915_gem_context * +i915_gem_context_lookup(struct drm_i915_file_private *file_priv, u32 id); + static inline struct i915_gem_context * i915_gem_context_get(struct i915_gem_context *ctx) { @@ -221,6 +224,9 @@ i915_gem_engines_iter_next(struct i915_gem_engines_iter *it); for (i915_gem_engines_iter_init(&(it), (engines)); \ ((ce) = i915_gem_engines_iter_next(&(it)));) +void i915_gem_context_module_exit(void); +int i915_gem_context_module_init(void); + struct i915_lut_handle *i915_lut_handle_alloc(void); void i915_lut_handle_free(struct i915_lut_handle *lut); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h index 340473aa70de..94c03a97cb77 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h @@ -30,33 +30,187 @@ struct i915_address_space; struct intel_timeline; struct intel_ring; +/** + * struct i915_gem_engines - A set of engines + */ struct i915_gem_engines { union { + /** @link: Link in i915_gem_context::stale::engines */ struct list_head link; + + /** @rcu: RCU to use when freeing */ struct rcu_head rcu; }; + + /** @fence: Fence used for delayed destruction of engines */ struct i915_sw_fence fence; + + /** @ctx: i915_gem_context backpointer */ struct i915_gem_context *ctx; + + /** @num_engines: Number of engines in this set */ unsigned int num_engines; + + /** @engines: Array of engines */ struct intel_context *engines[]; }; +/** + * struct i915_gem_engines_iter - Iterator for an i915_gem_engines set + */ struct i915_gem_engines_iter { + /** @idx: Index into i915_gem_engines::engines */ unsigned int idx; + + /** @engines: Engine set being iterated */ const struct i915_gem_engines *engines; }; /** + * enum i915_gem_engine_type - Describes the type of an i915_gem_proto_engine + */ +enum i915_gem_engine_type { + /** @I915_GEM_ENGINE_TYPE_INVALID: An invalid engine */ + I915_GEM_ENGINE_TYPE_INVALID = 0, + + /** @I915_GEM_ENGINE_TYPE_PHYSICAL: A single physical engine */ + I915_GEM_ENGINE_TYPE_PHYSICAL, + + /** @I915_GEM_ENGINE_TYPE_BALANCED: A load-balanced engine set */ + I915_GEM_ENGINE_TYPE_BALANCED, +}; + +/** + * struct i915_gem_proto_engine - prototype engine + * + * This struct describes an engine that a context may contain. Engines + * have three types: + * + * - I915_GEM_ENGINE_TYPE_INVALID: Invalid engines can be created but they + * show up as a NULL in i915_gem_engines::engines[i] and any attempt to + * use them by the user results in -EINVAL. They are also useful during + * proto-context construction because the client may create invalid + * engines and then set them up later as virtual engines. + * + * - I915_GEM_ENGINE_TYPE_PHYSICAL: A single physical engine, described by + * i915_gem_proto_engine::engine. + * + * - I915_GEM_ENGINE_TYPE_BALANCED: A load-balanced engine set, described + * i915_gem_proto_engine::num_siblings and i915_gem_proto_engine::siblings. + */ +struct i915_gem_proto_engine { + /** @type: Type of this engine */ + enum i915_gem_engine_type type; + + /** @engine: Engine, for physical */ + struct intel_engine_cs *engine; + + /** @num_siblings: Number of balanced siblings */ + unsigned int num_siblings; + + /** @siblings: Balanced siblings */ + struct intel_engine_cs **siblings; + + /** @sseu: Client-set SSEU parameters */ + struct intel_sseu sseu; +}; + +/** + * struct i915_gem_proto_context - prototype context + * + * The struct i915_gem_proto_context represents the creation parameters for + * a struct i915_gem_context. This is used to gather parameters provided + * either through creation flags or via SET_CONTEXT_PARAM so that, when we + * create the final i915_gem_context, those parameters can be immutable. + * + * The context uAPI allows for two methods of setting context parameters: + * SET_CONTEXT_PARAM and CONTEXT_CREATE_EXT_SETPARAM. The former is + * allowed to be called at any time while the later happens as part of + * GEM_CONTEXT_CREATE. When these were initially added, Currently, + * everything settable via one is settable via the other. While some + * params are fairly simple and setting them on a live context is harmless + * such the context priority, others are far trickier such as the VM or the + * set of engines. To avoid some truly nasty race conditions, we don't + * allow setting the VM or the set of engines on live contexts. + * + * The way we dealt with this without breaking older userspace that sets + * the VM or engine set via SET_CONTEXT_PARAM is to delay the creation of + * the actual context until after the client is done configuring it with + * SET_CONTEXT_PARAM. From the perspective of the client, it has the same + * u32 context ID the whole time. From the perspective of i915, however, + * it's an i915_gem_proto_context right up until the point where we attempt + * to do something which the proto-context can't handle at which point the + * real context gets created. + * + * This is accomplished via a little xarray dance. When GEM_CONTEXT_CREATE + * is called, we create a proto-context, reserve a slot in context_xa but + * leave it NULL, the proto-context in the corresponding slot in + * proto_context_xa. Then, whenever we go to look up a context, we first + * check context_xa. If it's there, we return the i915_gem_context and + * we're done. If it's not, we look in proto_context_xa and, if we find it + * there, we create the actual context and kill the proto-context. + * + * At the time we made this change (April, 2021), we did a fairly complete + * audit of existing userspace to ensure this wouldn't break anything: + * + * - Mesa/i965 didn't use the engines or VM APIs at all + * + * - Mesa/ANV used the engines API but via CONTEXT_CREATE_EXT_SETPARAM and + * didn't use the VM API. + * + * - Mesa/iris didn't use the engines or VM APIs at all + * + * - The open-source compute-runtime didn't yet use the engines API but + * did use the VM API via SET_CONTEXT_PARAM. However, CONTEXT_SETPARAM + * was always the second ioctl on that context, immediately following + * GEM_CONTEXT_CREATE. + * + * - The media driver sets engines and bonding/balancing via + * SET_CONTEXT_PARAM. However, CONTEXT_SETPARAM to set the VM was + * always the second ioctl on that context, immediately following + * GEM_CONTEXT_CREATE and setting engines immediately followed that. + * + * In order for this dance to work properly, any modification to an + * i915_gem_proto_context that is exposed to the client via + * drm_i915_file_private::proto_context_xa must be guarded by + * drm_i915_file_private::proto_context_lock. The exception is when a + * proto-context has not yet been exposed such as when handling + * CONTEXT_CREATE_SET_PARAM during GEM_CONTEXT_CREATE. + */ +struct i915_gem_proto_context { + /** @vm: See &i915_gem_context.vm */ + struct i915_address_space *vm; + + /** @user_flags: See &i915_gem_context.user_flags */ + unsigned long user_flags; + + /** @sched: See &i915_gem_context.sched */ + struct i915_sched_attr sched; + + /** @num_user_engines: Number of user-specified engines or -1 */ + int num_user_engines; + + /** @user_engines: User-specified engines */ + struct i915_gem_proto_engine *user_engines; + + /** @legacy_rcs_sseu: Client-set SSEU parameters for the legacy RCS */ + struct intel_sseu legacy_rcs_sseu; + + /** @single_timeline: See See &i915_gem_context.syncobj */ + bool single_timeline; +}; + +/** * struct i915_gem_context - client state * * The struct i915_gem_context represents the combined view of the driver and * logical hardware state for a particular client. */ struct i915_gem_context { - /** i915: i915 device backpointer */ + /** @i915: i915 device backpointer */ struct drm_i915_private *i915; - /** file_priv: owning file descriptor */ + /** @file_priv: owning file descriptor */ struct drm_i915_file_private *file_priv; /** @@ -81,9 +235,23 @@ struct i915_gem_context { * CONTEXT_USER_ENGINES flag is set). */ struct i915_gem_engines __rcu *engines; - struct mutex engines_mutex; /* guards writes to engines */ - struct intel_timeline *timeline; + /** @engines_mutex: guards writes to engines */ + struct mutex engines_mutex; + + /** + * @syncobj: Shared timeline syncobj + * + * When the SHARED_TIMELINE flag is set on context creation, we + * emulate a single timeline across all engines using this syncobj. + * For every execbuffer2 call, this syncobj is used as both an in- + * and out-fence. Unlike the real intel_timeline, this doesn't + * provide perfect atomic in-order guarantees if the client races + * with itself by calling execbuffer2 twice concurrently. However, + * if userspace races with itself, that's not likely to yield well- + * defined results anyway so we choose to not care. + */ + struct drm_syncobj *syncobj; /** * @vm: unique address space (GTT) @@ -106,7 +274,7 @@ struct i915_gem_context { */ struct pid *pid; - /** link: place with &drm_i915_private.context_list */ + /** @link: place with &drm_i915_private.context_list */ struct list_head link; /** @@ -129,7 +297,6 @@ struct i915_gem_context { * @user_flags: small set of booleans controlled by the user */ unsigned long user_flags; -#define UCONTEXT_NO_ZEROMAP 0 #define UCONTEXT_NO_ERROR_CAPTURE 1 #define UCONTEXT_BANNABLE 2 #define UCONTEXT_RECOVERABLE 3 @@ -142,11 +309,13 @@ struct i915_gem_context { #define CONTEXT_CLOSED 0 #define CONTEXT_USER_ENGINES 1 + /** @mutex: guards everything that isn't engines or handles_vma */ struct mutex mutex; + /** @sched: scheduler parameters */ struct i915_sched_attr sched; - /** guilty_count: How many times this context has caused a GPU hang. */ + /** @guilty_count: How many times this context has caused a GPU hang. */ atomic_t guilty_count; /** * @active_count: How many times this context was active during a GPU @@ -154,25 +323,23 @@ struct i915_gem_context { */ atomic_t active_count; - struct { - u64 timeout_us; - } watchdog; - /** * @hang_timestamp: The last time(s) this context caused a GPU hang */ unsigned long hang_timestamp[2]; #define CONTEXT_FAST_HANG_JIFFIES (120 * HZ) /* 3 hangs within 120s? Banned! */ - /** remap_slice: Bitmask of cache lines that need remapping */ + /** @remap_slice: Bitmask of cache lines that need remapping */ u8 remap_slice; /** - * handles_vma: rbtree to look up our context specific obj/vma for + * @handles_vma: rbtree to look up our context specific obj/vma for * the user handle. (user handles are per fd, but the binding is * per vm, which may be one per context or shared with the global GTT) */ struct radix_tree_root handles_vma; + + /** @lut_mutex: Locks handles_vma */ struct mutex lut_mutex; /** @@ -184,8 +351,11 @@ struct i915_gem_context { */ char name[TASK_COMM_LEN + 8]; + /** @stale: tracks stale engines to be destroyed */ struct { + /** @lock: guards engines */ spinlock_t lock; + /** @engines: list of stale engines */ struct list_head engines; } stale; }; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_create.c b/drivers/gpu/drm/i915/gem/i915_gem_create.c index 548ddf39d853..23fee13a3384 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_create.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_create.c @@ -11,13 +11,14 @@ #include "i915_trace.h" #include "i915_user_extensions.h" -static u32 object_max_page_size(struct drm_i915_gem_object *obj) +static u32 object_max_page_size(struct intel_memory_region **placements, + unsigned int n_placements) { u32 max_page_size = 0; int i; - for (i = 0; i < obj->mm.n_placements; i++) { - struct intel_memory_region *mr = obj->mm.placements[i]; + for (i = 0; i < n_placements; i++) { + struct intel_memory_region *mr = placements[i]; GEM_BUG_ON(!is_power_of_2(mr->min_page_size)); max_page_size = max_t(u32, max_page_size, mr->min_page_size); @@ -27,10 +28,13 @@ static u32 object_max_page_size(struct drm_i915_gem_object *obj) return max_page_size; } -static void object_set_placements(struct drm_i915_gem_object *obj, - struct intel_memory_region **placements, - unsigned int n_placements) +static int object_set_placements(struct drm_i915_gem_object *obj, + struct intel_memory_region **placements, + unsigned int n_placements) { + struct intel_memory_region **arr; + unsigned int i; + GEM_BUG_ON(!n_placements); /* @@ -44,9 +48,20 @@ static void object_set_placements(struct drm_i915_gem_object *obj, obj->mm.placements = &i915->mm.regions[mr->id]; obj->mm.n_placements = 1; } else { - obj->mm.placements = placements; + arr = kmalloc_array(n_placements, + sizeof(struct intel_memory_region *), + GFP_KERNEL); + if (!arr) + return -ENOMEM; + + for (i = 0; i < n_placements; i++) + arr[i] = placements[i]; + + obj->mm.placements = arr; obj->mm.n_placements = n_placements; } + + return 0; } static int i915_gem_publish(struct drm_i915_gem_object *obj, @@ -67,40 +82,67 @@ static int i915_gem_publish(struct drm_i915_gem_object *obj, return 0; } -static int -i915_gem_setup(struct drm_i915_gem_object *obj, u64 size) +/** + * Creates a new object using the same path as DRM_I915_GEM_CREATE_EXT + * @i915: i915 private + * @size: size of the buffer, in bytes + * @placements: possible placement regions, in priority order + * @n_placements: number of possible placement regions + * + * This function is exposed primarily for selftests and does very little + * error checking. It is assumed that the set of placement regions has + * already been verified to be valid. + */ +struct drm_i915_gem_object * +__i915_gem_object_create_user(struct drm_i915_private *i915, u64 size, + struct intel_memory_region **placements, + unsigned int n_placements) { - struct intel_memory_region *mr = obj->mm.placements[0]; + struct intel_memory_region *mr = placements[0]; + struct drm_i915_gem_object *obj; unsigned int flags; int ret; - size = round_up(size, object_max_page_size(obj)); + i915_gem_flush_free_objects(i915); + + size = round_up(size, object_max_page_size(placements, n_placements)); if (size == 0) - return -EINVAL; + return ERR_PTR(-EINVAL); /* For most of the ABI (e.g. mmap) we think in system pages */ GEM_BUG_ON(!IS_ALIGNED(size, PAGE_SIZE)); if (i915_gem_object_size_2big(size)) - return -E2BIG; + return ERR_PTR(-E2BIG); + + obj = i915_gem_object_alloc(); + if (!obj) + return ERR_PTR(-ENOMEM); + + ret = object_set_placements(obj, placements, n_placements); + if (ret) + goto object_free; /* - * For now resort to CPU based clearing for device local-memory, in the - * near future this will use the blitter engine for accelerated, GPU - * based clearing. + * I915_BO_ALLOC_USER will make sure the object is cleared before + * any user access. */ - flags = 0; - if (mr->type == INTEL_MEMORY_LOCAL) - flags = I915_BO_ALLOC_CPU_CLEAR; + flags = I915_BO_ALLOC_USER; - ret = mr->ops->init_object(mr, obj, size, flags); + ret = mr->ops->init_object(mr, obj, size, 0, flags); if (ret) - return ret; + goto object_free; GEM_BUG_ON(size != obj->base.size); trace_i915_gem_object_create(obj); - return 0; + return obj; + +object_free: + if (obj->mm.n_placements > 1) + kfree(obj->mm.placements); + i915_gem_object_free(obj); + return ERR_PTR(ret); } int @@ -113,7 +155,6 @@ i915_gem_dumb_create(struct drm_file *file, enum intel_memory_type mem_type; int cpp = DIV_ROUND_UP(args->bpp, 8); u32 format; - int ret; switch (cpp) { case 1: @@ -146,22 +187,13 @@ i915_gem_dumb_create(struct drm_file *file, if (HAS_LMEM(to_i915(dev))) mem_type = INTEL_MEMORY_LOCAL; - obj = i915_gem_object_alloc(); - if (!obj) - return -ENOMEM; - mr = intel_memory_region_by_type(to_i915(dev), mem_type); - object_set_placements(obj, &mr, 1); - ret = i915_gem_setup(obj, args->size); - if (ret) - goto object_free; + obj = __i915_gem_object_create_user(to_i915(dev), args->size, &mr, 1); + if (IS_ERR(obj)) + return PTR_ERR(obj); return i915_gem_publish(obj, file, &args->size, &args->handle); - -object_free: - i915_gem_object_free(obj); - return ret; } /** @@ -178,31 +210,20 @@ i915_gem_create_ioctl(struct drm_device *dev, void *data, struct drm_i915_gem_create *args = data; struct drm_i915_gem_object *obj; struct intel_memory_region *mr; - int ret; - - i915_gem_flush_free_objects(i915); - - obj = i915_gem_object_alloc(); - if (!obj) - return -ENOMEM; mr = intel_memory_region_by_type(i915, INTEL_MEMORY_SYSTEM); - object_set_placements(obj, &mr, 1); - ret = i915_gem_setup(obj, args->size); - if (ret) - goto object_free; + obj = __i915_gem_object_create_user(i915, args->size, &mr, 1); + if (IS_ERR(obj)) + return PTR_ERR(obj); return i915_gem_publish(obj, file, &args->size, &args->handle); - -object_free: - i915_gem_object_free(obj); - return ret; } struct create_ext { struct drm_i915_private *i915; - struct drm_i915_gem_object *vanilla_object; + struct intel_memory_region *placements[INTEL_REGION_UNKNOWN]; + unsigned int n_placements; }; static void repr_placements(char *buf, size_t size, @@ -233,8 +254,7 @@ static int set_placements(struct drm_i915_gem_create_ext_memory_regions *args, struct drm_i915_private *i915 = ext_data->i915; struct drm_i915_gem_memory_class_instance __user *uregions = u64_to_user_ptr(args->regions); - struct drm_i915_gem_object *obj = ext_data->vanilla_object; - struct intel_memory_region **placements; + struct intel_memory_region *placements[INTEL_REGION_UNKNOWN]; u32 mask; int i, ret = 0; @@ -248,6 +268,8 @@ static int set_placements(struct drm_i915_gem_create_ext_memory_regions *args, ret = -EINVAL; } + BUILD_BUG_ON(ARRAY_SIZE(i915->mm.regions) != ARRAY_SIZE(placements)); + BUILD_BUG_ON(ARRAY_SIZE(ext_data->placements) != ARRAY_SIZE(placements)); if (args->num_regions > ARRAY_SIZE(i915->mm.regions)) { drm_dbg(&i915->drm, "num_regions is too large\n"); ret = -EINVAL; @@ -256,21 +278,13 @@ static int set_placements(struct drm_i915_gem_create_ext_memory_regions *args, if (ret) return ret; - placements = kmalloc_array(args->num_regions, - sizeof(struct intel_memory_region *), - GFP_KERNEL); - if (!placements) - return -ENOMEM; - mask = 0; for (i = 0; i < args->num_regions; i++) { struct drm_i915_gem_memory_class_instance region; struct intel_memory_region *mr; - if (copy_from_user(®ion, uregions, sizeof(region))) { - ret = -EFAULT; - goto out_free; - } + if (copy_from_user(®ion, uregions, sizeof(region))) + return -EFAULT; mr = intel_memory_region_lookup(i915, region.memory_class, @@ -296,14 +310,14 @@ static int set_placements(struct drm_i915_gem_create_ext_memory_regions *args, ++uregions; } - if (obj->mm.placements) { + if (ext_data->n_placements) { ret = -EINVAL; goto out_dump; } - object_set_placements(obj, placements, args->num_regions); - if (args->num_regions == 1) - kfree(placements); + ext_data->n_placements = args->num_regions; + for (i = 0; i < args->num_regions; i++) + ext_data->placements[i] = placements[i]; return 0; @@ -311,11 +325,11 @@ out_dump: if (1) { char buf[256]; - if (obj->mm.placements) { + if (ext_data->n_placements) { repr_placements(buf, sizeof(buf), - obj->mm.placements, - obj->mm.n_placements); + ext_data->placements, + ext_data->n_placements); drm_dbg(&i915->drm, "Placements were already set in previous EXT. Existing placements: %s\n", buf); @@ -325,8 +339,6 @@ out_dump: drm_dbg(&i915->drm, "New placements(so far validated): %s\n", buf); } -out_free: - kfree(placements); return ret; } @@ -361,44 +373,30 @@ i915_gem_create_ext_ioctl(struct drm_device *dev, void *data, struct drm_i915_private *i915 = to_i915(dev); struct drm_i915_gem_create_ext *args = data; struct create_ext ext_data = { .i915 = i915 }; - struct intel_memory_region **placements_ext; struct drm_i915_gem_object *obj; int ret; if (args->flags) return -EINVAL; - i915_gem_flush_free_objects(i915); - - obj = i915_gem_object_alloc(); - if (!obj) - return -ENOMEM; - - ext_data.vanilla_object = obj; ret = i915_user_extensions(u64_to_user_ptr(args->extensions), create_extensions, ARRAY_SIZE(create_extensions), &ext_data); - placements_ext = obj->mm.placements; if (ret) - goto object_free; + return ret; - if (!placements_ext) { - struct intel_memory_region *mr = + if (!ext_data.n_placements) { + ext_data.placements[0] = intel_memory_region_by_type(i915, INTEL_MEMORY_SYSTEM); - - object_set_placements(obj, &mr, 1); + ext_data.n_placements = 1; } - ret = i915_gem_setup(obj, args->size); - if (ret) - goto object_free; + obj = __i915_gem_object_create_user(i915, args->size, + ext_data.placements, + ext_data.n_placements); + if (IS_ERR(obj)) + return PTR_ERR(obj); return i915_gem_publish(obj, file, &args->size, &args->handle); - -object_free: - if (obj->mm.n_placements > 1) - kfree(placements_ext); - i915_gem_object_free(obj); - return ret; } diff --git a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c index 616c3a2f1baf..afa34111de02 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c @@ -12,6 +12,8 @@ #include "i915_gem_object.h" #include "i915_scatterlist.h" +I915_SELFTEST_DECLARE(static bool force_different_devices;) + static struct drm_i915_gem_object *dma_buf_to_obj(struct dma_buf *buf) { return to_intel_bo(buf->priv); @@ -25,15 +27,11 @@ static struct sg_table *i915_gem_map_dma_buf(struct dma_buf_attachment *attachme struct scatterlist *src, *dst; int ret, i; - ret = i915_gem_object_pin_pages_unlocked(obj); - if (ret) - goto err; - /* Copy sg so that we make an independent mapping */ st = kmalloc(sizeof(struct sg_table), GFP_KERNEL); if (st == NULL) { ret = -ENOMEM; - goto err_unpin_pages; + goto err; } ret = sg_alloc_table(st, obj->mm.pages->nents, GFP_KERNEL); @@ -58,8 +56,6 @@ err_free_sg: sg_free_table(st); err_free: kfree(st); -err_unpin_pages: - i915_gem_object_unpin_pages(obj); err: return ERR_PTR(ret); } @@ -68,13 +64,9 @@ static void i915_gem_unmap_dma_buf(struct dma_buf_attachment *attachment, struct sg_table *sg, enum dma_data_direction dir) { - struct drm_i915_gem_object *obj = dma_buf_to_obj(attachment->dmabuf); - dma_unmap_sgtable(attachment->dev, sg, dir, DMA_ATTR_SKIP_CPU_SYNC); sg_free_table(sg); kfree(sg); - - i915_gem_object_unpin_pages(obj); } static int i915_gem_dmabuf_vmap(struct dma_buf *dma_buf, struct dma_buf_map *map) @@ -168,7 +160,46 @@ retry: return err; } +static int i915_gem_dmabuf_attach(struct dma_buf *dmabuf, + struct dma_buf_attachment *attach) +{ + struct drm_i915_gem_object *obj = dma_buf_to_obj(dmabuf); + struct i915_gem_ww_ctx ww; + int err; + + if (!i915_gem_object_can_migrate(obj, INTEL_REGION_SMEM)) + return -EOPNOTSUPP; + + for_i915_gem_ww(&ww, err, true) { + err = i915_gem_object_lock(obj, &ww); + if (err) + continue; + + err = i915_gem_object_migrate(obj, &ww, INTEL_REGION_SMEM); + if (err) + continue; + + err = i915_gem_object_wait_migration(obj, 0); + if (err) + continue; + + err = i915_gem_object_pin_pages(obj); + } + + return err; +} + +static void i915_gem_dmabuf_detach(struct dma_buf *dmabuf, + struct dma_buf_attachment *attach) +{ + struct drm_i915_gem_object *obj = dma_buf_to_obj(dmabuf); + + i915_gem_object_unpin_pages(obj); +} + static const struct dma_buf_ops i915_dmabuf_ops = { + .attach = i915_gem_dmabuf_attach, + .detach = i915_gem_dmabuf_detach, .map_dma_buf = i915_gem_map_dma_buf, .unmap_dma_buf = i915_gem_unmap_dma_buf, .release = drm_gem_dmabuf_release, @@ -204,6 +235,8 @@ static int i915_gem_object_get_pages_dmabuf(struct drm_i915_gem_object *obj) struct sg_table *pages; unsigned int sg_page_sizes; + assert_object_held(obj); + pages = dma_buf_map_attachment(obj->base.import_attach, DMA_BIDIRECTIONAL); if (IS_ERR(pages)) @@ -241,7 +274,8 @@ struct drm_gem_object *i915_gem_prime_import(struct drm_device *dev, if (dma_buf->ops == &i915_dmabuf_ops) { obj = dma_buf_to_obj(dma_buf); /* is it from our device? */ - if (obj->base.dev == dev) { + if (obj->base.dev == dev && + !I915_SELFTEST_ONLY(force_different_devices)) { /* * Importing dmabuf exported from out own gem increases * refcount on gem itself instead of f_count of dmabuf. diff --git a/drivers/gpu/drm/i915/gem/i915_gem_domain.c b/drivers/gpu/drm/i915/gem/i915_gem_domain.c index 073822100da7..b684a62bf3b0 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_domain.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_domain.c @@ -268,6 +268,9 @@ int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data, struct drm_i915_gem_object *obj; int err = 0; + if (IS_DGFX(to_i915(dev))) + return -ENODEV; + rcu_read_lock(); obj = i915_gem_object_lookup_rcu(file, args->handle); if (!obj) { @@ -303,6 +306,9 @@ int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data, enum i915_cache_level level; int ret = 0; + if (IS_DGFX(i915)) + return -ENODEV; + switch (args->caching) { case I915_CACHING_NONE: level = I915_CACHE_NONE; @@ -375,7 +381,7 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, struct i915_vma *vma; int ret; - /* Frame buffer must be in LMEM (no migration yet) */ + /* Frame buffer must be in LMEM */ if (HAS_LMEM(i915) && !i915_gem_object_is_lmem(obj)) return ERR_PTR(-EINVAL); @@ -484,6 +490,9 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void *data, u32 write_domain = args->write_domain; int err; + if (IS_DGFX(to_i915(dev))) + return -ENODEV; + /* Only handle setting domains to types used by the CPU. */ if ((write_domain | read_domains) & I915_GEM_GPU_DOMAINS) return -EINVAL; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index 4a6419d7be93..1aa249908b64 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -277,18 +277,9 @@ struct i915_execbuffer { bool has_llc : 1; bool has_fence : 1; bool needs_unfenced : 1; - - struct i915_request *rq; - u32 *rq_cmd; - unsigned int rq_size; - struct intel_gt_buffer_pool_node *pool; } reloc_cache; - struct intel_gt_buffer_pool_node *reloc_pool; /** relocation pool for -EDEADLK handling */ - struct intel_context *reloc_context; - u64 invalid_flags; /** Set of execobj.flags that are invalid */ - u32 context_flags; /** Set of execobj.flags to insert from the ctx */ u64 batch_len; /** Length of batch within object */ u32 batch_start_offset; /** Location within object of batch */ @@ -539,9 +530,6 @@ eb_validate_vma(struct i915_execbuffer *eb, entry->flags |= EXEC_OBJECT_NEEDS_GTT | __EXEC_OBJECT_NEEDS_MAP; } - if (!(entry->flags & EXEC_OBJECT_PINNED)) - entry->flags |= eb->context_flags; - return 0; } @@ -741,17 +729,13 @@ static int eb_select_context(struct i915_execbuffer *eb) struct i915_gem_context *ctx; ctx = i915_gem_context_lookup(eb->file->driver_priv, eb->args->rsvd1); - if (unlikely(!ctx)) - return -ENOENT; + if (unlikely(IS_ERR(ctx))) + return PTR_ERR(ctx); eb->gem_context = ctx; if (rcu_access_pointer(ctx->vm)) eb->invalid_flags |= EXEC_OBJECT_NEEDS_GTT; - eb->context_flags = 0; - if (test_bit(UCONTEXT_NO_ZEROMAP, &ctx->user_flags)) - eb->context_flags |= __EXEC_OBJECT_NEEDS_BIAS; - return 0; } @@ -920,21 +904,38 @@ err: return err; } -static int eb_validate_vmas(struct i915_execbuffer *eb) +static int eb_lock_vmas(struct i915_execbuffer *eb) { unsigned int i; int err; - INIT_LIST_HEAD(&eb->unbound); - for (i = 0; i < eb->buffer_count; i++) { - struct drm_i915_gem_exec_object2 *entry = &eb->exec[i]; struct eb_vma *ev = &eb->vma[i]; struct i915_vma *vma = ev->vma; err = i915_gem_object_lock(vma->obj, &eb->ww); if (err) return err; + } + + return 0; +} + +static int eb_validate_vmas(struct i915_execbuffer *eb) +{ + unsigned int i; + int err; + + INIT_LIST_HEAD(&eb->unbound); + + err = eb_lock_vmas(eb); + if (err) + return err; + + for (i = 0; i < eb->buffer_count; i++) { + struct drm_i915_gem_exec_object2 *entry = &eb->exec[i]; + struct eb_vma *ev = &eb->vma[i]; + struct i915_vma *vma = ev->vma; err = eb_pin_vma(eb, entry, ev); if (err == -EDEADLK) @@ -992,7 +993,7 @@ eb_get_vma(const struct i915_execbuffer *eb, unsigned long handle) } } -static void eb_release_vmas(struct i915_execbuffer *eb, bool final, bool release_userptr) +static void eb_release_vmas(struct i915_execbuffer *eb, bool final) { const unsigned int count = eb->buffer_count; unsigned int i; @@ -1006,11 +1007,6 @@ static void eb_release_vmas(struct i915_execbuffer *eb, bool final, bool release eb_unreserve_vma(ev); - if (release_userptr && ev->flags & __EXEC_OBJECT_USERPTR_INIT) { - ev->flags &= ~__EXEC_OBJECT_USERPTR_INIT; - i915_gem_object_userptr_submit_fini(vma->obj); - } - if (final) i915_vma_put(vma); } @@ -1020,8 +1016,6 @@ static void eb_release_vmas(struct i915_execbuffer *eb, bool final, bool release static void eb_destroy(const struct i915_execbuffer *eb) { - GEM_BUG_ON(eb->reloc_cache.rq); - if (eb->lut_size > 0) kfree(eb->buckets); } @@ -1033,14 +1027,6 @@ relocation_target(const struct drm_i915_gem_relocation_entry *reloc, return gen8_canonical_addr((int)reloc->delta + target->node.start); } -static void reloc_cache_clear(struct reloc_cache *cache) -{ - cache->rq = NULL; - cache->rq_cmd = NULL; - cache->pool = NULL; - cache->rq_size = 0; -} - static void reloc_cache_init(struct reloc_cache *cache, struct drm_i915_private *i915) { @@ -1053,7 +1039,6 @@ static void reloc_cache_init(struct reloc_cache *cache, cache->has_fence = cache->graphics_ver < 4; cache->needs_unfenced = INTEL_INFO(i915)->unfenced_needs_alignment; cache->node.flags = 0; - reloc_cache_clear(cache); } static inline void *unmask_page(unsigned long p) @@ -1075,48 +1060,10 @@ static inline struct i915_ggtt *cache_to_ggtt(struct reloc_cache *cache) return &i915->ggtt; } -static void reloc_cache_put_pool(struct i915_execbuffer *eb, struct reloc_cache *cache) -{ - if (!cache->pool) - return; - - /* - * This is a bit nasty, normally we keep objects locked until the end - * of execbuffer, but we already submit this, and have to unlock before - * dropping the reference. Fortunately we can only hold 1 pool node at - * a time, so this should be harmless. - */ - i915_gem_ww_unlock_single(cache->pool->obj); - intel_gt_buffer_pool_put(cache->pool); - cache->pool = NULL; -} - -static void reloc_gpu_flush(struct i915_execbuffer *eb, struct reloc_cache *cache) -{ - struct drm_i915_gem_object *obj = cache->rq->batch->obj; - - GEM_BUG_ON(cache->rq_size >= obj->base.size / sizeof(u32)); - cache->rq_cmd[cache->rq_size] = MI_BATCH_BUFFER_END; - - i915_gem_object_flush_map(obj); - i915_gem_object_unpin_map(obj); - - intel_gt_chipset_flush(cache->rq->engine->gt); - - i915_request_add(cache->rq); - reloc_cache_put_pool(eb, cache); - reloc_cache_clear(cache); - - eb->reloc_pool = NULL; -} - static void reloc_cache_reset(struct reloc_cache *cache, struct i915_execbuffer *eb) { void *vaddr; - if (cache->rq) - reloc_gpu_flush(eb, cache); - if (!cache->vaddr) return; @@ -1298,295 +1245,6 @@ static void clflush_write32(u32 *addr, u32 value, unsigned int flushes) *addr = value; } -static int reloc_move_to_gpu(struct i915_request *rq, struct i915_vma *vma) -{ - struct drm_i915_gem_object *obj = vma->obj; - int err; - - assert_vma_held(vma); - - if (obj->cache_dirty & ~obj->cache_coherent) - i915_gem_clflush_object(obj, 0); - obj->write_domain = 0; - - err = i915_request_await_object(rq, vma->obj, true); - if (err == 0) - err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE); - - return err; -} - -static int __reloc_gpu_alloc(struct i915_execbuffer *eb, - struct intel_engine_cs *engine, - struct i915_vma *vma, - unsigned int len) -{ - struct reloc_cache *cache = &eb->reloc_cache; - struct intel_gt_buffer_pool_node *pool = eb->reloc_pool; - struct i915_request *rq; - struct i915_vma *batch; - u32 *cmd; - int err; - - if (!pool) { - pool = intel_gt_get_buffer_pool(engine->gt, PAGE_SIZE, - cache->has_llc ? - I915_MAP_WB : - I915_MAP_WC); - if (IS_ERR(pool)) - return PTR_ERR(pool); - } - eb->reloc_pool = NULL; - - err = i915_gem_object_lock(pool->obj, &eb->ww); - if (err) - goto err_pool; - - cmd = i915_gem_object_pin_map(pool->obj, pool->type); - if (IS_ERR(cmd)) { - err = PTR_ERR(cmd); - goto err_pool; - } - intel_gt_buffer_pool_mark_used(pool); - - memset32(cmd, 0, pool->obj->base.size / sizeof(u32)); - - batch = i915_vma_instance(pool->obj, vma->vm, NULL); - if (IS_ERR(batch)) { - err = PTR_ERR(batch); - goto err_unmap; - } - - err = i915_vma_pin_ww(batch, &eb->ww, 0, 0, PIN_USER | PIN_NONBLOCK); - if (err) - goto err_unmap; - - if (engine == eb->context->engine) { - rq = i915_request_create(eb->context); - } else { - struct intel_context *ce = eb->reloc_context; - - if (!ce) { - ce = intel_context_create(engine); - if (IS_ERR(ce)) { - err = PTR_ERR(ce); - goto err_unpin; - } - - i915_vm_put(ce->vm); - ce->vm = i915_vm_get(eb->context->vm); - eb->reloc_context = ce; - } - - err = intel_context_pin_ww(ce, &eb->ww); - if (err) - goto err_unpin; - - rq = i915_request_create(ce); - intel_context_unpin(ce); - } - if (IS_ERR(rq)) { - err = PTR_ERR(rq); - goto err_unpin; - } - - err = intel_gt_buffer_pool_mark_active(pool, rq); - if (err) - goto err_request; - - err = reloc_move_to_gpu(rq, vma); - if (err) - goto err_request; - - err = eb->engine->emit_bb_start(rq, - batch->node.start, PAGE_SIZE, - cache->graphics_ver > 5 ? 0 : I915_DISPATCH_SECURE); - if (err) - goto skip_request; - - assert_vma_held(batch); - err = i915_request_await_object(rq, batch->obj, false); - if (err == 0) - err = i915_vma_move_to_active(batch, rq, 0); - if (err) - goto skip_request; - - rq->batch = batch; - i915_vma_unpin(batch); - - cache->rq = rq; - cache->rq_cmd = cmd; - cache->rq_size = 0; - cache->pool = pool; - - /* Return with batch mapping (cmd) still pinned */ - return 0; - -skip_request: - i915_request_set_error_once(rq, err); -err_request: - i915_request_add(rq); -err_unpin: - i915_vma_unpin(batch); -err_unmap: - i915_gem_object_unpin_map(pool->obj); -err_pool: - eb->reloc_pool = pool; - return err; -} - -static bool reloc_can_use_engine(const struct intel_engine_cs *engine) -{ - return engine->class != VIDEO_DECODE_CLASS || GRAPHICS_VER(engine->i915) != 6; -} - -static u32 *reloc_gpu(struct i915_execbuffer *eb, - struct i915_vma *vma, - unsigned int len) -{ - struct reloc_cache *cache = &eb->reloc_cache; - u32 *cmd; - - if (cache->rq_size > PAGE_SIZE/sizeof(u32) - (len + 1)) - reloc_gpu_flush(eb, cache); - - if (unlikely(!cache->rq)) { - int err; - struct intel_engine_cs *engine = eb->engine; - - /* If we need to copy for the cmdparser, we will stall anyway */ - if (eb_use_cmdparser(eb)) - return ERR_PTR(-EWOULDBLOCK); - - if (!reloc_can_use_engine(engine)) { - engine = engine->gt->engine_class[COPY_ENGINE_CLASS][0]; - if (!engine) - return ERR_PTR(-ENODEV); - } - - err = __reloc_gpu_alloc(eb, engine, vma, len); - if (unlikely(err)) - return ERR_PTR(err); - } - - cmd = cache->rq_cmd + cache->rq_size; - cache->rq_size += len; - - return cmd; -} - -static inline bool use_reloc_gpu(struct i915_vma *vma) -{ - if (DBG_FORCE_RELOC == FORCE_GPU_RELOC) - return true; - - if (DBG_FORCE_RELOC) - return false; - - return !dma_resv_test_signaled(vma->resv, true); -} - -static unsigned long vma_phys_addr(struct i915_vma *vma, u32 offset) -{ - struct page *page; - unsigned long addr; - - GEM_BUG_ON(vma->pages != vma->obj->mm.pages); - - page = i915_gem_object_get_page(vma->obj, offset >> PAGE_SHIFT); - addr = PFN_PHYS(page_to_pfn(page)); - GEM_BUG_ON(overflows_type(addr, u32)); /* expected dma32 */ - - return addr + offset_in_page(offset); -} - -static int __reloc_entry_gpu(struct i915_execbuffer *eb, - struct i915_vma *vma, - u64 offset, - u64 target_addr) -{ - const unsigned int ver = eb->reloc_cache.graphics_ver; - unsigned int len; - u32 *batch; - u64 addr; - - if (ver >= 8) - len = offset & 7 ? 8 : 5; - else if (ver >= 4) - len = 4; - else - len = 3; - - batch = reloc_gpu(eb, vma, len); - if (batch == ERR_PTR(-EDEADLK)) - return -EDEADLK; - else if (IS_ERR(batch)) - return false; - - addr = gen8_canonical_addr(vma->node.start + offset); - if (ver >= 8) { - if (offset & 7) { - *batch++ = MI_STORE_DWORD_IMM_GEN4; - *batch++ = lower_32_bits(addr); - *batch++ = upper_32_bits(addr); - *batch++ = lower_32_bits(target_addr); - - addr = gen8_canonical_addr(addr + 4); - - *batch++ = MI_STORE_DWORD_IMM_GEN4; - *batch++ = lower_32_bits(addr); - *batch++ = upper_32_bits(addr); - *batch++ = upper_32_bits(target_addr); - } else { - *batch++ = (MI_STORE_DWORD_IMM_GEN4 | (1 << 21)) + 1; - *batch++ = lower_32_bits(addr); - *batch++ = upper_32_bits(addr); - *batch++ = lower_32_bits(target_addr); - *batch++ = upper_32_bits(target_addr); - } - } else if (ver >= 6) { - *batch++ = MI_STORE_DWORD_IMM_GEN4; - *batch++ = 0; - *batch++ = addr; - *batch++ = target_addr; - } else if (IS_I965G(eb->i915)) { - *batch++ = MI_STORE_DWORD_IMM_GEN4; - *batch++ = 0; - *batch++ = vma_phys_addr(vma, offset); - *batch++ = target_addr; - } else if (ver >= 4) { - *batch++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; - *batch++ = 0; - *batch++ = addr; - *batch++ = target_addr; - } else if (ver >= 3 && - !(IS_I915G(eb->i915) || IS_I915GM(eb->i915))) { - *batch++ = MI_STORE_DWORD_IMM | MI_MEM_VIRTUAL; - *batch++ = addr; - *batch++ = target_addr; - } else { - *batch++ = MI_STORE_DWORD_IMM; - *batch++ = vma_phys_addr(vma, offset); - *batch++ = target_addr; - } - - return true; -} - -static int reloc_entry_gpu(struct i915_execbuffer *eb, - struct i915_vma *vma, - u64 offset, - u64 target_addr) -{ - if (eb->reloc_cache.vaddr) - return false; - - if (!use_reloc_gpu(vma)) - return false; - - return __reloc_entry_gpu(eb, vma, offset, target_addr); -} - static u64 relocate_entry(struct i915_vma *vma, const struct drm_i915_gem_relocation_entry *reloc, @@ -1595,32 +1253,25 @@ relocate_entry(struct i915_vma *vma, { u64 target_addr = relocation_target(reloc, target); u64 offset = reloc->offset; - int reloc_gpu = reloc_entry_gpu(eb, vma, offset, target_addr); - - if (reloc_gpu < 0) - return reloc_gpu; - - if (!reloc_gpu) { - bool wide = eb->reloc_cache.use_64bit_reloc; - void *vaddr; + bool wide = eb->reloc_cache.use_64bit_reloc; + void *vaddr; repeat: - vaddr = reloc_vaddr(vma->obj, eb, - offset >> PAGE_SHIFT); - if (IS_ERR(vaddr)) - return PTR_ERR(vaddr); - - GEM_BUG_ON(!IS_ALIGNED(offset, sizeof(u32))); - clflush_write32(vaddr + offset_in_page(offset), - lower_32_bits(target_addr), - eb->reloc_cache.vaddr); - - if (wide) { - offset += sizeof(u32); - target_addr >>= 32; - wide = false; - goto repeat; - } + vaddr = reloc_vaddr(vma->obj, eb, + offset >> PAGE_SHIFT); + if (IS_ERR(vaddr)) + return PTR_ERR(vaddr); + + GEM_BUG_ON(!IS_ALIGNED(offset, sizeof(u32))); + clflush_write32(vaddr + offset_in_page(offset), + lower_32_bits(target_addr), + eb->reloc_cache.vaddr); + + if (wide) { + offset += sizeof(u32); + target_addr >>= 32; + wide = false; + goto repeat; } return target->node.start | UPDATE; @@ -1992,7 +1643,7 @@ repeat: } /* We may process another execbuffer during the unlock... */ - eb_release_vmas(eb, false, true); + eb_release_vmas(eb, false); i915_gem_ww_ctx_fini(&eb->ww); if (rq) { @@ -2061,9 +1712,7 @@ repeat_validate: list_for_each_entry(ev, &eb->relocs, reloc_link) { if (!have_copy) { - pagefault_disable(); err = eb_relocate_vma(eb, ev); - pagefault_enable(); if (err) break; } else { @@ -2096,7 +1745,7 @@ repeat_validate: err: if (err == -EDEADLK) { - eb_release_vmas(eb, false, false); + eb_release_vmas(eb, false); err = i915_gem_ww_ctx_backoff(&eb->ww); if (!err) goto repeat_validate; @@ -2193,7 +1842,7 @@ retry: err: if (err == -EDEADLK) { - eb_release_vmas(eb, false, false); + eb_release_vmas(eb, false); err = i915_gem_ww_ctx_backoff(&eb->ww); if (!err) goto retry; @@ -2270,7 +1919,7 @@ static int eb_move_to_gpu(struct i915_execbuffer *eb) #ifdef CONFIG_MMU_NOTIFIER if (!err && (eb->args->flags & __EXEC_USERPTR_USED)) { - spin_lock(&eb->i915->mm.notifier_lock); + read_lock(&eb->i915->mm.notifier_lock); /* * count is always at least 1, otherwise __EXEC_USERPTR_USED @@ -2288,7 +1937,7 @@ static int eb_move_to_gpu(struct i915_execbuffer *eb) break; } - spin_unlock(&eb->i915->mm.notifier_lock); + read_unlock(&eb->i915->mm.notifier_lock); } #endif @@ -2782,7 +2431,7 @@ __free_fence_array(struct eb_fence *fences, unsigned int n) while (n--) { drm_syncobj_put(ptr_mask_bits(fences[n].syncobj, 2)); dma_fence_put(fences[n].dma_fence); - kfree(fences[n].chain_fence); + dma_fence_chain_free(fences[n].chain_fence); } kvfree(fences); } @@ -2896,9 +2545,7 @@ add_timeline_fence_array(struct i915_execbuffer *eb, return -EINVAL; } - f->chain_fence = - kmalloc(sizeof(*f->chain_fence), - GFP_KERNEL); + f->chain_fence = dma_fence_chain_alloc(); if (!f->chain_fence) { drm_syncobj_put(syncobj); dma_fence_put(fence); @@ -3158,8 +2805,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, eb.exec = exec; eb.vma = (struct eb_vma *)(exec + args->buffer_count + 1); eb.vma[0].vma = NULL; - eb.reloc_pool = eb.batch_pool = NULL; - eb.reloc_context = NULL; + eb.batch_pool = NULL; eb.invalid_flags = __EXEC_OBJECT_UNKNOWN_FLAGS; reloc_cache_init(&eb.reloc_cache, eb.i915); @@ -3234,7 +2880,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, err = eb_lookup_vmas(&eb); if (err) { - eb_release_vmas(&eb, true, true); + eb_release_vmas(&eb, true); goto err_engine; } @@ -3257,9 +2903,6 @@ i915_gem_do_execbuffer(struct drm_device *dev, batch = eb.batch->vma; - /* All GPU relocation batches must be submitted prior to the user rq */ - GEM_BUG_ON(eb.reloc_cache.rq); - /* Allocate a request for this batch buffer nice and early. */ eb.request = i915_request_create(eb.context); if (IS_ERR(eb.request)) { @@ -3267,11 +2910,20 @@ i915_gem_do_execbuffer(struct drm_device *dev, goto err_vma; } + if (unlikely(eb.gem_context->syncobj)) { + struct dma_fence *fence; + + fence = drm_syncobj_fence_get(eb.gem_context->syncobj); + err = i915_request_await_dma_fence(eb.request, fence); + dma_fence_put(fence); + if (err) + goto err_ext; + } + if (in_fence) { if (args->flags & I915_EXEC_FENCE_SUBMIT) err = i915_request_await_execution(eb.request, - in_fence, - eb.engine->bond_execute); + in_fence); else err = i915_request_await_dma_fence(eb.request, in_fence); @@ -3324,10 +2976,16 @@ err_request: fput(out_fence->file); } } + + if (unlikely(eb.gem_context->syncobj)) { + drm_syncobj_replace_fence(eb.gem_context->syncobj, + &eb.request->fence); + } + i915_request_put(eb.request); err_vma: - eb_release_vmas(&eb, true, true); + eb_release_vmas(&eb, true); if (eb.trampoline) i915_vma_unpin(eb.trampoline); WARN_ON(err == -EDEADLK); @@ -3335,10 +2993,6 @@ err_vma: if (eb.batch_pool) intel_gt_buffer_pool_put(eb.batch_pool); - if (eb.reloc_pool) - intel_gt_buffer_pool_put(eb.reloc_pool); - if (eb.reloc_context) - intel_context_put(eb.reloc_context); err_engine: eb_put_engine(&eb); err_context: @@ -3452,7 +3106,3 @@ end:; kvfree(exec2_list); return err; } - -#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) -#include "selftests/i915_gem_execbuffer.c" -#endif diff --git a/drivers/gpu/drm/i915/gem/i915_gem_internal.c b/drivers/gpu/drm/i915/gem/i915_gem_internal.c index ce6b664b10aa..13b217f75055 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_internal.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_internal.c @@ -177,8 +177,8 @@ i915_gem_object_create_internal(struct drm_i915_private *i915, return ERR_PTR(-ENOMEM); drm_gem_private_object_init(&i915->drm, &obj->base, size); - i915_gem_object_init(obj, &i915_gem_object_internal_ops, &lock_class, - I915_BO_ALLOC_STRUCT_PAGE); + i915_gem_object_init(obj, &i915_gem_object_internal_ops, &lock_class, 0); + obj->mem_flags |= I915_BO_FLAG_STRUCT_PAGE; /* * Mark the object as volatile, such that the pages are marked as diff --git a/drivers/gpu/drm/i915/gem/i915_gem_lmem.c b/drivers/gpu/drm/i915/gem/i915_gem_lmem.c index 3b4aa28a076d..eb345305dc52 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_lmem.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_lmem.c @@ -4,74 +4,10 @@ */ #include "intel_memory_region.h" -#include "intel_region_ttm.h" #include "gem/i915_gem_region.h" #include "gem/i915_gem_lmem.h" #include "i915_drv.h" -static void lmem_put_pages(struct drm_i915_gem_object *obj, - struct sg_table *pages) -{ - intel_region_ttm_node_free(obj->mm.region, obj->mm.st_mm_node); - obj->mm.dirty = false; - sg_free_table(pages); - kfree(pages); -} - -static int lmem_get_pages(struct drm_i915_gem_object *obj) -{ - unsigned int flags; - struct sg_table *pages; - - flags = I915_ALLOC_MIN_PAGE_SIZE; - if (obj->flags & I915_BO_ALLOC_CONTIGUOUS) - flags |= I915_ALLOC_CONTIGUOUS; - - obj->mm.st_mm_node = intel_region_ttm_node_alloc(obj->mm.region, - obj->base.size, - flags); - if (IS_ERR(obj->mm.st_mm_node)) - return PTR_ERR(obj->mm.st_mm_node); - - /* Range manager is always contigous */ - if (obj->mm.region->is_range_manager) - obj->flags |= I915_BO_ALLOC_CONTIGUOUS; - pages = intel_region_ttm_node_to_st(obj->mm.region, obj->mm.st_mm_node); - if (IS_ERR(pages)) { - intel_region_ttm_node_free(obj->mm.region, obj->mm.st_mm_node); - return PTR_ERR(pages); - } - - __i915_gem_object_set_pages(obj, pages, i915_sg_dma_sizes(pages->sgl)); - - if (obj->flags & I915_BO_ALLOC_CPU_CLEAR) { - void __iomem *vaddr = - i915_gem_object_lmem_io_map(obj, 0, obj->base.size); - - if (!vaddr) { - struct sg_table *pages = - __i915_gem_object_unset_pages(obj); - - if (!IS_ERR_OR_NULL(pages)) - lmem_put_pages(obj, pages); - } - - memset_io(vaddr, 0, obj->base.size); - io_mapping_unmap(vaddr); - } - - return 0; -} - -const struct drm_i915_gem_object_ops i915_gem_lmem_obj_ops = { - .name = "i915_gem_object_lmem", - .flags = I915_GEM_OBJECT_HAS_IOMEM, - - .get_pages = lmem_get_pages, - .put_pages = lmem_put_pages, - .release = i915_gem_object_release_memory_region, -}; - void __iomem * i915_gem_object_lmem_io_map(struct drm_i915_gem_object *obj, unsigned long n, @@ -87,39 +23,91 @@ i915_gem_object_lmem_io_map(struct drm_i915_gem_object *obj, return io_mapping_map_wc(&obj->mm.region->iomap, offset, size); } +/** + * i915_gem_object_is_lmem - Whether the object is resident in + * lmem + * @obj: The object to check. + * + * Even if an object is allowed to migrate and change memory region, + * this function checks whether it will always be present in lmem when + * valid *or* if that's not the case, whether it's currently resident in lmem. + * For migratable and evictable objects, the latter only makes sense when + * the object is locked. + * + * Return: Whether the object migratable but resident in lmem, or not + * migratable and will be present in lmem when valid. + */ bool i915_gem_object_is_lmem(struct drm_i915_gem_object *obj) { - struct intel_memory_region *mr = obj->mm.region; + struct intel_memory_region *mr = READ_ONCE(obj->mm.region); +#ifdef CONFIG_LOCKDEP + if (i915_gem_object_migratable(obj) && + i915_gem_object_evictable(obj)) + assert_object_held(obj); +#endif return mr && (mr->type == INTEL_MEMORY_LOCAL || mr->type == INTEL_MEMORY_STOLEN_LOCAL); } +/** + * __i915_gem_object_is_lmem - Whether the object is resident in + * lmem while in the fence signaling critical path. + * @obj: The object to check. + * + * This function is intended to be called from within the fence signaling + * path where the fence keeps the object from being migrated. For example + * during gpu reset or similar. + * + * Return: Whether the object is resident in lmem. + */ +bool __i915_gem_object_is_lmem(struct drm_i915_gem_object *obj) +{ + struct intel_memory_region *mr = READ_ONCE(obj->mm.region); + +#ifdef CONFIG_LOCKDEP + GEM_WARN_ON(dma_resv_test_signaled(obj->base.resv, true)); +#endif + return mr && (mr->type == INTEL_MEMORY_LOCAL || + mr->type == INTEL_MEMORY_STOLEN_LOCAL); +} + +/** + * __i915_gem_object_create_lmem_with_ps - Create lmem object and force the + * minimum page size for the backing pages. + * @i915: The i915 instance. + * @size: The size in bytes for the object. Note that we need to round the size + * up depending on the @page_size. The final object size can be fished out from + * the drm GEM object. + * @page_size: The requested minimum page size in bytes for this object. This is + * useful if we need something bigger than the regions min_page_size due to some + * hw restriction, or in some very specialised cases where it needs to be + * smaller, where the internal fragmentation cost is too great when rounding up + * the object size. + * @flags: The optional BO allocation flags. + * + * Note that this interface assumes you know what you are doing when forcing the + * @page_size. If this is smaller than the regions min_page_size then it can + * never be inserted into any GTT, otherwise it might lead to undefined + * behaviour. + * + * Return: The object pointer, which might be an ERR_PTR in the case of failure. + */ struct drm_i915_gem_object * -i915_gem_object_create_lmem(struct drm_i915_private *i915, - resource_size_t size, - unsigned int flags) +__i915_gem_object_create_lmem_with_ps(struct drm_i915_private *i915, + resource_size_t size, + resource_size_t page_size, + unsigned int flags) { return i915_gem_object_create_region(i915->mm.regions[INTEL_REGION_LMEM], - size, flags); + size, page_size, flags); } -int __i915_gem_lmem_object_init(struct intel_memory_region *mem, - struct drm_i915_gem_object *obj, - resource_size_t size, - unsigned int flags) +struct drm_i915_gem_object * +i915_gem_object_create_lmem(struct drm_i915_private *i915, + resource_size_t size, + unsigned int flags) { - static struct lock_class_key lock_class; - struct drm_i915_private *i915 = mem->i915; - - drm_gem_private_object_init(&i915->drm, &obj->base, size); - i915_gem_object_init(obj, &i915_gem_lmem_obj_ops, &lock_class, flags); - - obj->read_domains = I915_GEM_DOMAIN_WC | I915_GEM_DOMAIN_GTT; - - i915_gem_object_set_cache_coherency(obj, I915_CACHE_NONE); - - i915_gem_object_init_memory_region(obj, mem); - - return 0; + return i915_gem_object_create_region(i915->mm.regions[INTEL_REGION_LMEM], + size, 0, flags); } diff --git a/drivers/gpu/drm/i915/gem/i915_gem_lmem.h b/drivers/gpu/drm/i915/gem/i915_gem_lmem.h index fac6bc5a5ebb..4ee81fc66302 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_lmem.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_lmem.h @@ -21,14 +21,16 @@ i915_gem_object_lmem_io_map(struct drm_i915_gem_object *obj, bool i915_gem_object_is_lmem(struct drm_i915_gem_object *obj); +bool __i915_gem_object_is_lmem(struct drm_i915_gem_object *obj); + +struct drm_i915_gem_object * +__i915_gem_object_create_lmem_with_ps(struct drm_i915_private *i915, + resource_size_t size, + resource_size_t page_size, + unsigned int flags); struct drm_i915_gem_object * i915_gem_object_create_lmem(struct drm_i915_private *i915, resource_size_t size, unsigned int flags); -int __i915_gem_lmem_object_init(struct intel_memory_region *mem, - struct drm_i915_gem_object *obj, - resource_size_t size, - unsigned int flags); - #endif /* !__I915_GEM_LMEM_H */ diff --git a/drivers/gpu/drm/i915/gem/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/i915_gem_mman.c index 215326764606..5130e8ed9564 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_mman.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_mman.c @@ -19,6 +19,7 @@ #include "i915_gem_mman.h" #include "i915_trace.h" #include "i915_user_extensions.h" +#include "i915_gem_ttm.h" #include "i915_vma.h" static inline bool @@ -624,6 +625,8 @@ mmap_offset_attach(struct drm_i915_gem_object *obj, struct i915_mmap_offset *mmo; int err; + GEM_BUG_ON(obj->ops->mmap_offset || obj->ops->mmap_ops); + mmo = lookup_mmo(obj, mmap_type); if (mmo) goto out; @@ -642,7 +645,8 @@ mmap_offset_attach(struct drm_i915_gem_object *obj, goto insert; /* Attempt to reap some mmap space from dead objects */ - err = intel_gt_retire_requests_timeout(&i915->gt, MAX_SCHEDULE_TIMEOUT); + err = intel_gt_retire_requests_timeout(&i915->gt, MAX_SCHEDULE_TIMEOUT, + NULL); if (err) goto err; @@ -666,40 +670,58 @@ err: } static int -__assign_mmap_offset(struct drm_file *file, - u32 handle, +__assign_mmap_offset(struct drm_i915_gem_object *obj, enum i915_mmap_type mmap_type, - u64 *offset) + u64 *offset, struct drm_file *file) { - struct drm_i915_gem_object *obj; struct i915_mmap_offset *mmo; - int err; - obj = i915_gem_object_lookup(file, handle); - if (!obj) - return -ENOENT; + if (i915_gem_object_never_mmap(obj)) + return -ENODEV; - if (i915_gem_object_never_mmap(obj)) { - err = -ENODEV; - goto out; + if (obj->ops->mmap_offset) { + if (mmap_type != I915_MMAP_TYPE_FIXED) + return -ENODEV; + + *offset = obj->ops->mmap_offset(obj); + return 0; } + if (mmap_type == I915_MMAP_TYPE_FIXED) + return -ENODEV; + if (mmap_type != I915_MMAP_TYPE_GTT && !i915_gem_object_has_struct_page(obj) && - !i915_gem_object_type_has(obj, I915_GEM_OBJECT_HAS_IOMEM)) { - err = -ENODEV; - goto out; - } + !i915_gem_object_has_iomem(obj)) + return -ENODEV; mmo = mmap_offset_attach(obj, mmap_type, file); - if (IS_ERR(mmo)) { - err = PTR_ERR(mmo); - goto out; - } + if (IS_ERR(mmo)) + return PTR_ERR(mmo); *offset = drm_vma_node_offset_addr(&mmo->vma_node); - err = 0; -out: + return 0; +} + +static int +__assign_mmap_offset_handle(struct drm_file *file, + u32 handle, + enum i915_mmap_type mmap_type, + u64 *offset) +{ + struct drm_i915_gem_object *obj; + int err; + + obj = i915_gem_object_lookup(file, handle); + if (!obj) + return -ENOENT; + + err = i915_gem_object_lock_interruptible(obj, NULL); + if (err) + goto out_put; + err = __assign_mmap_offset(obj, mmap_type, offset, file); + i915_gem_object_unlock(obj); +out_put: i915_gem_object_put(obj); return err; } @@ -712,14 +734,16 @@ i915_gem_dumb_mmap_offset(struct drm_file *file, { enum i915_mmap_type mmap_type; - if (boot_cpu_has(X86_FEATURE_PAT)) + if (HAS_LMEM(to_i915(dev))) + mmap_type = I915_MMAP_TYPE_FIXED; + else if (boot_cpu_has(X86_FEATURE_PAT)) mmap_type = I915_MMAP_TYPE_WC; else if (!i915_ggtt_has_aperture(&to_i915(dev)->ggtt)) return -ENODEV; else mmap_type = I915_MMAP_TYPE_GTT; - return __assign_mmap_offset(file, handle, mmap_type, offset); + return __assign_mmap_offset_handle(file, handle, mmap_type, offset); } /** @@ -783,11 +807,15 @@ i915_gem_mmap_offset_ioctl(struct drm_device *dev, void *data, type = I915_MMAP_TYPE_UC; break; + case I915_MMAP_OFFSET_FIXED: + type = I915_MMAP_TYPE_FIXED; + break; + default: return -EINVAL; } - return __assign_mmap_offset(file, args->handle, type, &args->offset); + return __assign_mmap_offset_handle(file, args->handle, type, &args->offset); } static void vm_open(struct vm_area_struct *vma) @@ -891,8 +919,18 @@ int i915_gem_mmap(struct file *filp, struct vm_area_struct *vma) * destroyed and will be invalid when the vma manager lock * is released. */ - mmo = container_of(node, struct i915_mmap_offset, vma_node); - obj = i915_gem_object_get_rcu(mmo->obj); + if (!node->driver_private) { + mmo = container_of(node, struct i915_mmap_offset, vma_node); + obj = i915_gem_object_get_rcu(mmo->obj); + + GEM_BUG_ON(obj && obj->ops->mmap_ops); + } else { + obj = i915_gem_object_get_rcu + (container_of(node, struct drm_i915_gem_object, + base.vma_node)); + + GEM_BUG_ON(obj && !obj->ops->mmap_ops); + } } drm_vma_offset_unlock_lookup(dev->vma_offset_manager); rcu_read_unlock(); @@ -913,8 +951,7 @@ int i915_gem_mmap(struct file *filp, struct vm_area_struct *vma) return PTR_ERR(anon); } - vma->vm_flags |= VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP; - vma->vm_private_data = mmo; + vma->vm_flags |= VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP | VM_IO; /* * We keep the ref on mmo->obj, not vm_file, but we require @@ -928,6 +965,15 @@ int i915_gem_mmap(struct file *filp, struct vm_area_struct *vma) /* Drop the initial creation reference, the vma is now holding one. */ fput(anon); + if (obj->ops->mmap_ops) { + vma->vm_page_prot = pgprot_decrypted(vm_get_page_prot(vma->vm_flags)); + vma->vm_ops = obj->ops->mmap_ops; + vma->vm_private_data = node->driver_private; + return 0; + } + + vma->vm_private_data = mmo; + switch (mmo->mmap_type) { case I915_MMAP_TYPE_WC: vma->vm_page_prot = @@ -935,6 +981,9 @@ int i915_gem_mmap(struct file *filp, struct vm_area_struct *vma) vma->vm_ops = &vm_ops_cpu; break; + case I915_MMAP_TYPE_FIXED: + GEM_WARN_ON(1); + fallthrough; case I915_MMAP_TYPE_WB: vma->vm_page_prot = vm_get_page_prot(vma->vm_flags); vma->vm_ops = &vm_ops_cpu; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c index 5706d471692d..6fb9afb65034 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c @@ -30,14 +30,10 @@ #include "i915_gem_context.h" #include "i915_gem_mman.h" #include "i915_gem_object.h" -#include "i915_globals.h" #include "i915_memcpy.h" #include "i915_trace.h" -static struct i915_global_object { - struct i915_global base; - struct kmem_cache *slab_objects; -} global; +static struct kmem_cache *slab_objects; static const struct drm_gem_object_funcs i915_gem_object_funcs; @@ -45,7 +41,7 @@ struct drm_i915_gem_object *i915_gem_object_alloc(void) { struct drm_i915_gem_object *obj; - obj = kmem_cache_zalloc(global.slab_objects, GFP_KERNEL); + obj = kmem_cache_zalloc(slab_objects, GFP_KERNEL); if (!obj) return NULL; obj->base.funcs = &i915_gem_object_funcs; @@ -55,7 +51,7 @@ struct drm_i915_gem_object *i915_gem_object_alloc(void) void i915_gem_object_free(struct drm_i915_gem_object *obj) { - return kmem_cache_free(global.slab_objects, obj); + return kmem_cache_free(slab_objects, obj); } void i915_gem_object_init(struct drm_i915_gem_object *obj, @@ -172,7 +168,7 @@ static void i915_gem_close_object(struct drm_gem_object *gem, struct drm_file *f } } -static void __i915_gem_free_object_rcu(struct rcu_head *head) +void __i915_gem_free_object_rcu(struct rcu_head *head) { struct drm_i915_gem_object *obj = container_of(head, typeof(*obj), rcu); @@ -208,59 +204,69 @@ static void __i915_gem_object_free_mmaps(struct drm_i915_gem_object *obj) } } -static void __i915_gem_free_objects(struct drm_i915_private *i915, - struct llist_node *freed) +void __i915_gem_free_object(struct drm_i915_gem_object *obj) { - struct drm_i915_gem_object *obj, *on; + trace_i915_gem_object_destroy(obj); - llist_for_each_entry_safe(obj, on, freed, freed) { - trace_i915_gem_object_destroy(obj); + if (!list_empty(&obj->vma.list)) { + struct i915_vma *vma; + + /* + * Note that the vma keeps an object reference while + * it is active, so it *should* not sleep while we + * destroy it. Our debug code errs insits it *might*. + * For the moment, play along. + */ + spin_lock(&obj->vma.lock); + while ((vma = list_first_entry_or_null(&obj->vma.list, + struct i915_vma, + obj_link))) { + GEM_BUG_ON(vma->obj != obj); + spin_unlock(&obj->vma.lock); - if (!list_empty(&obj->vma.list)) { - struct i915_vma *vma; + __i915_vma_put(vma); - /* - * Note that the vma keeps an object reference while - * it is active, so it *should* not sleep while we - * destroy it. Our debug code errs insits it *might*. - * For the moment, play along. - */ spin_lock(&obj->vma.lock); - while ((vma = list_first_entry_or_null(&obj->vma.list, - struct i915_vma, - obj_link))) { - GEM_BUG_ON(vma->obj != obj); - spin_unlock(&obj->vma.lock); + } + spin_unlock(&obj->vma.lock); + } - __i915_vma_put(vma); + __i915_gem_object_free_mmaps(obj); - spin_lock(&obj->vma.lock); - } - spin_unlock(&obj->vma.lock); - } + GEM_BUG_ON(!list_empty(&obj->lut_list)); - __i915_gem_object_free_mmaps(obj); + atomic_set(&obj->mm.pages_pin_count, 0); + __i915_gem_object_put_pages(obj); + GEM_BUG_ON(i915_gem_object_has_pages(obj)); + bitmap_free(obj->bit_17); - GEM_BUG_ON(!list_empty(&obj->lut_list)); + if (obj->base.import_attach) + drm_prime_gem_destroy(&obj->base, NULL); - atomic_set(&obj->mm.pages_pin_count, 0); - __i915_gem_object_put_pages(obj); - GEM_BUG_ON(i915_gem_object_has_pages(obj)); - bitmap_free(obj->bit_17); + drm_gem_free_mmap_offset(&obj->base); - if (obj->base.import_attach) - drm_prime_gem_destroy(&obj->base, NULL); + if (obj->ops->release) + obj->ops->release(obj); - drm_gem_free_mmap_offset(&obj->base); + if (obj->mm.n_placements > 1) + kfree(obj->mm.placements); - if (obj->ops->release) - obj->ops->release(obj); + if (obj->shares_resv_from) + i915_vm_resv_put(obj->shares_resv_from); +} - if (obj->mm.n_placements > 1) - kfree(obj->mm.placements); +static void __i915_gem_free_objects(struct drm_i915_private *i915, + struct llist_node *freed) +{ + struct drm_i915_gem_object *obj, *on; - if (obj->shares_resv_from) - i915_vm_resv_put(obj->shares_resv_from); + llist_for_each_entry_safe(obj, on, freed, freed) { + might_sleep(); + if (obj->ops->delayed_free) { + obj->ops->delayed_free(obj); + continue; + } + __i915_gem_free_object(obj); /* But keep the pointer alive for RCU-protected lookups */ call_rcu(&obj->rcu, __i915_gem_free_object_rcu); @@ -318,6 +324,7 @@ static void i915_gem_free_object(struct drm_gem_object *gem_obj) * worker and performing frees directly from subsequent allocations for * crude but effective memory throttling. */ + if (llist_add(&obj->freed, &i915->mm.free_list)) queue_work(i915->wq, &i915->mm.free_work); } @@ -410,34 +417,254 @@ int i915_gem_object_read_from_page(struct drm_i915_gem_object *obj, u64 offset, return 0; } -void i915_gem_init__objects(struct drm_i915_private *i915) +/** + * i915_gem_object_evictable - Whether object is likely evictable after unbind. + * @obj: The object to check + * + * This function checks whether the object is likely unvictable after unbind. + * If the object is not locked when checking, the result is only advisory. + * If the object is locked when checking, and the function returns true, + * then an eviction should indeed be possible. But since unlocked vma + * unpinning and unbinding is currently possible, the object can actually + * become evictable even if this function returns false. + * + * Return: true if the object may be evictable. False otherwise. + */ +bool i915_gem_object_evictable(struct drm_i915_gem_object *obj) { - INIT_WORK(&i915->mm.free_work, __i915_gem_free_work); + struct i915_vma *vma; + int pin_count = atomic_read(&obj->mm.pages_pin_count); + + if (!pin_count) + return true; + + spin_lock(&obj->vma.lock); + list_for_each_entry(vma, &obj->vma.list, obj_link) { + if (i915_vma_is_pinned(vma)) { + spin_unlock(&obj->vma.lock); + return false; + } + if (atomic_read(&vma->pages_count)) + pin_count--; + } + spin_unlock(&obj->vma.lock); + GEM_WARN_ON(pin_count < 0); + + return pin_count == 0; +} + +/** + * i915_gem_object_migratable - Whether the object is migratable out of the + * current region. + * @obj: Pointer to the object. + * + * Return: Whether the object is allowed to be resident in other + * regions than the current while pages are present. + */ +bool i915_gem_object_migratable(struct drm_i915_gem_object *obj) +{ + struct intel_memory_region *mr = READ_ONCE(obj->mm.region); + + if (!mr) + return false; + + return obj->mm.n_placements > 1; } -static void i915_global_objects_shrink(void) +/** + * i915_gem_object_has_struct_page - Whether the object is page-backed + * @obj: The object to query. + * + * This function should only be called while the object is locked or pinned, + * otherwise the page backing may change under the caller. + * + * Return: True if page-backed, false otherwise. + */ +bool i915_gem_object_has_struct_page(const struct drm_i915_gem_object *obj) { - kmem_cache_shrink(global.slab_objects); +#ifdef CONFIG_LOCKDEP + if (IS_DGFX(to_i915(obj->base.dev)) && + i915_gem_object_evictable((void __force *)obj)) + assert_object_held_shared(obj); +#endif + return obj->mem_flags & I915_BO_FLAG_STRUCT_PAGE; } -static void i915_global_objects_exit(void) +/** + * i915_gem_object_has_iomem - Whether the object is iomem-backed + * @obj: The object to query. + * + * This function should only be called while the object is locked or pinned, + * otherwise the iomem backing may change under the caller. + * + * Return: True if iomem-backed, false otherwise. + */ +bool i915_gem_object_has_iomem(const struct drm_i915_gem_object *obj) { - kmem_cache_destroy(global.slab_objects); +#ifdef CONFIG_LOCKDEP + if (IS_DGFX(to_i915(obj->base.dev)) && + i915_gem_object_evictable((void __force *)obj)) + assert_object_held_shared(obj); +#endif + return obj->mem_flags & I915_BO_FLAG_IOMEM; } -static struct i915_global_object global = { { - .shrink = i915_global_objects_shrink, - .exit = i915_global_objects_exit, -} }; +/** + * i915_gem_object_can_migrate - Whether an object likely can be migrated + * + * @obj: The object to migrate + * @id: The region intended to migrate to + * + * Check whether the object backend supports migration to the + * given region. Note that pinning may affect the ability to migrate as + * returned by this function. + * + * This function is primarily intended as a helper for checking the + * possibility to migrate objects and might be slightly less permissive + * than i915_gem_object_migrate() when it comes to objects with the + * I915_BO_ALLOC_USER flag set. + * + * Return: true if migration is possible, false otherwise. + */ +bool i915_gem_object_can_migrate(struct drm_i915_gem_object *obj, + enum intel_region_id id) +{ + struct drm_i915_private *i915 = to_i915(obj->base.dev); + unsigned int num_allowed = obj->mm.n_placements; + struct intel_memory_region *mr; + unsigned int i; + + GEM_BUG_ON(id >= INTEL_REGION_UNKNOWN); + GEM_BUG_ON(obj->mm.madv != I915_MADV_WILLNEED); + + mr = i915->mm.regions[id]; + if (!mr) + return false; + + if (obj->mm.region == mr) + return true; + + if (!i915_gem_object_evictable(obj)) + return false; + + if (!obj->ops->migrate) + return false; + + if (!(obj->flags & I915_BO_ALLOC_USER)) + return true; + + if (num_allowed == 0) + return false; + + for (i = 0; i < num_allowed; ++i) { + if (mr == obj->mm.placements[i]) + return true; + } + + return false; +} + +/** + * i915_gem_object_migrate - Migrate an object to the desired region id + * @obj: The object to migrate. + * @ww: An optional struct i915_gem_ww_ctx. If NULL, the backend may + * not be successful in evicting other objects to make room for this object. + * @id: The region id to migrate to. + * + * Attempt to migrate the object to the desired memory region. The + * object backend must support migration and the object may not be + * pinned, (explicitly pinned pages or pinned vmas). The object must + * be locked. + * On successful completion, the object will have pages pointing to + * memory in the new region, but an async migration task may not have + * completed yet, and to accomplish that, i915_gem_object_wait_migration() + * must be called. + * + * Note: the @ww parameter is not used yet, but included to make sure + * callers put some effort into obtaining a valid ww ctx if one is + * available. + * + * Return: 0 on success. Negative error code on failure. In particular may + * return -ENXIO on lack of region space, -EDEADLK for deadlock avoidance + * if @ww is set, -EINTR or -ERESTARTSYS if signal pending, and + * -EBUSY if the object is pinned. + */ +int i915_gem_object_migrate(struct drm_i915_gem_object *obj, + struct i915_gem_ww_ctx *ww, + enum intel_region_id id) +{ + struct drm_i915_private *i915 = to_i915(obj->base.dev); + struct intel_memory_region *mr; + + GEM_BUG_ON(id >= INTEL_REGION_UNKNOWN); + GEM_BUG_ON(obj->mm.madv != I915_MADV_WILLNEED); + assert_object_held(obj); + + mr = i915->mm.regions[id]; + GEM_BUG_ON(!mr); + + if (!i915_gem_object_can_migrate(obj, id)) + return -EINVAL; + + if (!obj->ops->migrate) { + if (GEM_WARN_ON(obj->mm.region != mr)) + return -EINVAL; + return 0; + } + + return obj->ops->migrate(obj, mr); +} + +/** + * i915_gem_object_placement_possible - Check whether the object can be + * placed at certain memory type + * @obj: Pointer to the object + * @type: The memory type to check + * + * Return: True if the object can be placed in @type. False otherwise. + */ +bool i915_gem_object_placement_possible(struct drm_i915_gem_object *obj, + enum intel_memory_type type) +{ + unsigned int i; + + if (!obj->mm.n_placements) { + switch (type) { + case INTEL_MEMORY_LOCAL: + return i915_gem_object_has_iomem(obj); + case INTEL_MEMORY_SYSTEM: + return i915_gem_object_has_pages(obj); + default: + /* Ignore stolen for now */ + GEM_BUG_ON(1); + return false; + } + } + + for (i = 0; i < obj->mm.n_placements; i++) { + if (obj->mm.placements[i]->type == type) + return true; + } + + return false; +} + +void i915_gem_init__objects(struct drm_i915_private *i915) +{ + INIT_WORK(&i915->mm.free_work, __i915_gem_free_work); +} + +void i915_objects_module_exit(void) +{ + kmem_cache_destroy(slab_objects); +} -int __init i915_global_objects_init(void) +int __init i915_objects_module_init(void) { - global.slab_objects = - KMEM_CACHE(drm_i915_gem_object, SLAB_HWCACHE_ALIGN); - if (!global.slab_objects) + slab_objects = KMEM_CACHE(drm_i915_gem_object, SLAB_HWCACHE_ALIGN); + if (!slab_objects) return -ENOMEM; - i915_global_register(&global.base); return 0; } @@ -450,6 +677,7 @@ static const struct drm_gem_object_funcs i915_gem_object_funcs = { #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) #include "selftests/huge_gem_object.c" #include "selftests/huge_pages.c" +#include "selftests/i915_gem_migrate.c" #include "selftests/i915_gem_object.c" #include "selftests/i915_gem_coherency.c" #endif diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h index 7c0eb425cb3b..48112b9d76df 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h @@ -12,10 +12,14 @@ #include <drm/drm_device.h> #include "display/intel_frontbuffer.h" +#include "intel_memory_region.h" #include "i915_gem_object_types.h" #include "i915_gem_gtt.h" +#include "i915_gem_ww.h" #include "i915_vma_types.h" +enum intel_region_id; + /* * XXX: There is a prevalence of the assumption that we fit the * object's page count inside a 32bit _signed_ variable. Let's document @@ -44,6 +48,9 @@ static inline bool i915_gem_object_size_2big(u64 size) void i915_gem_init__objects(struct drm_i915_private *i915); +void i915_objects_module_exit(void); +int i915_objects_module_init(void); + struct drm_i915_gem_object *i915_gem_object_alloc(void); void i915_gem_object_free(struct drm_i915_gem_object *obj); @@ -57,6 +64,10 @@ i915_gem_object_create_shmem(struct drm_i915_private *i915, struct drm_i915_gem_object * i915_gem_object_create_shmem_from_data(struct drm_i915_private *i915, const void *data, resource_size_t size); +struct drm_i915_gem_object * +__i915_gem_object_create_user(struct drm_i915_private *i915, u64 size, + struct intel_memory_region **placements, + unsigned int n_placements); extern const struct drm_i915_gem_object_ops i915_gem_shmem_ops; @@ -147,7 +158,7 @@ i915_gem_object_put(struct drm_i915_gem_object *obj) /* * If more than one potential simultaneous locker, assert held. */ -static inline void assert_object_held_shared(struct drm_i915_gem_object *obj) +static inline void assert_object_held_shared(const struct drm_i915_gem_object *obj) { /* * Note mm list lookup is protected by @@ -169,13 +180,17 @@ static inline int __i915_gem_object_lock(struct drm_i915_gem_object *obj, else ret = dma_resv_lock(obj->base.resv, ww ? &ww->ctx : NULL); - if (!ret && ww) + if (!ret && ww) { + i915_gem_object_get(obj); list_add_tail(&obj->obj_link, &ww->obj_list); + } if (ret == -EALREADY) ret = 0; - if (ret == -EDEADLK) + if (ret == -EDEADLK) { + i915_gem_object_get(obj); ww->contended = obj; + } return ret; } @@ -200,6 +215,9 @@ static inline bool i915_gem_object_trylock(struct drm_i915_gem_object *obj) static inline void i915_gem_object_unlock(struct drm_i915_gem_object *obj) { + if (obj->ops->adjust_lru) + obj->ops->adjust_lru(obj); + dma_resv_unlock(obj->base.resv); } @@ -258,17 +276,9 @@ i915_gem_object_type_has(const struct drm_i915_gem_object *obj, return obj->ops->flags & flags; } -static inline bool -i915_gem_object_has_struct_page(const struct drm_i915_gem_object *obj) -{ - return obj->flags & I915_BO_ALLOC_STRUCT_PAGE; -} +bool i915_gem_object_has_struct_page(const struct drm_i915_gem_object *obj); -static inline bool -i915_gem_object_has_iomem(const struct drm_i915_gem_object *obj) -{ - return i915_gem_object_type_has(obj, I915_GEM_OBJECT_HAS_IOMEM); -} +bool i915_gem_object_has_iomem(const struct drm_i915_gem_object *obj); static inline bool i915_gem_object_is_shrinkable(const struct drm_i915_gem_object *obj) @@ -339,22 +349,22 @@ struct scatterlist * __i915_gem_object_get_sg(struct drm_i915_gem_object *obj, struct i915_gem_object_page_iter *iter, unsigned int n, - unsigned int *offset, bool allow_alloc); + unsigned int *offset, bool dma); static inline struct scatterlist * i915_gem_object_get_sg(struct drm_i915_gem_object *obj, unsigned int n, - unsigned int *offset, bool allow_alloc) + unsigned int *offset) { - return __i915_gem_object_get_sg(obj, &obj->mm.get_page, n, offset, allow_alloc); + return __i915_gem_object_get_sg(obj, &obj->mm.get_page, n, offset, false); } static inline struct scatterlist * i915_gem_object_get_sg_dma(struct drm_i915_gem_object *obj, unsigned int n, - unsigned int *offset, bool allow_alloc) + unsigned int *offset) { - return __i915_gem_object_get_sg(obj, &obj->mm.get_dma_page, n, offset, allow_alloc); + return __i915_gem_object_get_sg(obj, &obj->mm.get_dma_page, n, offset, true); } struct page * @@ -587,6 +597,27 @@ int i915_gem_object_read_from_page(struct drm_i915_gem_object *obj, u64 offset, bool i915_gem_object_is_shmem(const struct drm_i915_gem_object *obj); +void __i915_gem_free_object_rcu(struct rcu_head *head); + +void __i915_gem_free_object(struct drm_i915_gem_object *obj); + +bool i915_gem_object_evictable(struct drm_i915_gem_object *obj); + +bool i915_gem_object_migratable(struct drm_i915_gem_object *obj); + +int i915_gem_object_migrate(struct drm_i915_gem_object *obj, + struct i915_gem_ww_ctx *ww, + enum intel_region_id id); + +bool i915_gem_object_can_migrate(struct drm_i915_gem_object *obj, + enum intel_region_id id); + +int i915_gem_object_wait_migration(struct drm_i915_gem_object *obj, + unsigned int flags); + +bool i915_gem_object_placement_possible(struct drm_i915_gem_object *obj, + enum intel_memory_type type); + #ifdef CONFIG_MMU_NOTIFIER static inline bool i915_gem_object_is_userptr(struct drm_i915_gem_object *obj) @@ -596,14 +627,12 @@ i915_gem_object_is_userptr(struct drm_i915_gem_object *obj) int i915_gem_object_userptr_submit_init(struct drm_i915_gem_object *obj); int i915_gem_object_userptr_submit_done(struct drm_i915_gem_object *obj); -void i915_gem_object_userptr_submit_fini(struct drm_i915_gem_object *obj); int i915_gem_object_userptr_validate(struct drm_i915_gem_object *obj); #else static inline bool i915_gem_object_is_userptr(struct drm_i915_gem_object *obj) { return false; } static inline int i915_gem_object_userptr_submit_init(struct drm_i915_gem_object *obj) { GEM_BUG_ON(1); return -ENODEV; } static inline int i915_gem_object_userptr_submit_done(struct drm_i915_gem_object *obj) { GEM_BUG_ON(1); return -ENODEV; } -static inline void i915_gem_object_userptr_submit_fini(struct drm_i915_gem_object *obj) { GEM_BUG_ON(1); } static inline int i915_gem_object_userptr_validate(struct drm_i915_gem_object *obj) { GEM_BUG_ON(1); return -ENODEV; } #endif diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c b/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c deleted file mode 100644 index 3e28c68fda3e..000000000000 --- a/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c +++ /dev/null @@ -1,461 +0,0 @@ -// SPDX-License-Identifier: MIT -/* - * Copyright © 2019 Intel Corporation - */ - -#include "i915_drv.h" -#include "gt/intel_context.h" -#include "gt/intel_engine_pm.h" -#include "gt/intel_gpu_commands.h" -#include "gt/intel_gt.h" -#include "gt/intel_gt_buffer_pool.h" -#include "gt/intel_ring.h" -#include "i915_gem_clflush.h" -#include "i915_gem_object_blt.h" - -struct i915_vma *intel_emit_vma_fill_blt(struct intel_context *ce, - struct i915_vma *vma, - struct i915_gem_ww_ctx *ww, - u32 value) -{ - struct drm_i915_private *i915 = ce->vm->i915; - const u32 block_size = SZ_8M; /* ~1ms at 8GiB/s preemption delay */ - struct intel_gt_buffer_pool_node *pool; - struct i915_vma *batch; - u64 offset; - u64 count; - u64 rem; - u32 size; - u32 *cmd; - int err; - - GEM_BUG_ON(intel_engine_is_virtual(ce->engine)); - intel_engine_pm_get(ce->engine); - - count = div_u64(round_up(vma->size, block_size), block_size); - size = (1 + 8 * count) * sizeof(u32); - size = round_up(size, PAGE_SIZE); - pool = intel_gt_get_buffer_pool(ce->engine->gt, size, I915_MAP_WC); - if (IS_ERR(pool)) { - err = PTR_ERR(pool); - goto out_pm; - } - - err = i915_gem_object_lock(pool->obj, ww); - if (err) - goto out_put; - - batch = i915_vma_instance(pool->obj, ce->vm, NULL); - if (IS_ERR(batch)) { - err = PTR_ERR(batch); - goto out_put; - } - - err = i915_vma_pin_ww(batch, ww, 0, 0, PIN_USER); - if (unlikely(err)) - goto out_put; - - /* we pinned the pool, mark it as such */ - intel_gt_buffer_pool_mark_used(pool); - - cmd = i915_gem_object_pin_map(pool->obj, pool->type); - if (IS_ERR(cmd)) { - err = PTR_ERR(cmd); - goto out_unpin; - } - - rem = vma->size; - offset = vma->node.start; - - do { - u32 size = min_t(u64, rem, block_size); - - GEM_BUG_ON(size >> PAGE_SHIFT > S16_MAX); - - if (GRAPHICS_VER(i915) >= 8) { - *cmd++ = XY_COLOR_BLT_CMD | BLT_WRITE_RGBA | (7 - 2); - *cmd++ = BLT_DEPTH_32 | BLT_ROP_COLOR_COPY | PAGE_SIZE; - *cmd++ = 0; - *cmd++ = size >> PAGE_SHIFT << 16 | PAGE_SIZE / 4; - *cmd++ = lower_32_bits(offset); - *cmd++ = upper_32_bits(offset); - *cmd++ = value; - } else { - *cmd++ = XY_COLOR_BLT_CMD | BLT_WRITE_RGBA | (6 - 2); - *cmd++ = BLT_DEPTH_32 | BLT_ROP_COLOR_COPY | PAGE_SIZE; - *cmd++ = 0; - *cmd++ = size >> PAGE_SHIFT << 16 | PAGE_SIZE / 4; - *cmd++ = offset; - *cmd++ = value; - } - - /* Allow ourselves to be preempted in between blocks. */ - *cmd++ = MI_ARB_CHECK; - - offset += size; - rem -= size; - } while (rem); - - *cmd = MI_BATCH_BUFFER_END; - - i915_gem_object_flush_map(pool->obj); - i915_gem_object_unpin_map(pool->obj); - - intel_gt_chipset_flush(ce->vm->gt); - - batch->private = pool; - return batch; - -out_unpin: - i915_vma_unpin(batch); -out_put: - intel_gt_buffer_pool_put(pool); -out_pm: - intel_engine_pm_put(ce->engine); - return ERR_PTR(err); -} - -int intel_emit_vma_mark_active(struct i915_vma *vma, struct i915_request *rq) -{ - int err; - - err = i915_request_await_object(rq, vma->obj, false); - if (err == 0) - err = i915_vma_move_to_active(vma, rq, 0); - if (unlikely(err)) - return err; - - return intel_gt_buffer_pool_mark_active(vma->private, rq); -} - -void intel_emit_vma_release(struct intel_context *ce, struct i915_vma *vma) -{ - i915_vma_unpin(vma); - intel_gt_buffer_pool_put(vma->private); - intel_engine_pm_put(ce->engine); -} - -static int -move_obj_to_gpu(struct drm_i915_gem_object *obj, - struct i915_request *rq, - bool write) -{ - if (obj->cache_dirty & ~obj->cache_coherent) - i915_gem_clflush_object(obj, 0); - - return i915_request_await_object(rq, obj, write); -} - -int i915_gem_object_fill_blt(struct drm_i915_gem_object *obj, - struct intel_context *ce, - u32 value) -{ - struct i915_gem_ww_ctx ww; - struct i915_request *rq; - struct i915_vma *batch; - struct i915_vma *vma; - int err; - - vma = i915_vma_instance(obj, ce->vm, NULL); - if (IS_ERR(vma)) - return PTR_ERR(vma); - - i915_gem_ww_ctx_init(&ww, true); - intel_engine_pm_get(ce->engine); -retry: - err = i915_gem_object_lock(obj, &ww); - if (err) - goto out; - - err = intel_context_pin_ww(ce, &ww); - if (err) - goto out; - - err = i915_vma_pin_ww(vma, &ww, 0, 0, PIN_USER); - if (err) - goto out_ctx; - - batch = intel_emit_vma_fill_blt(ce, vma, &ww, value); - if (IS_ERR(batch)) { - err = PTR_ERR(batch); - goto out_vma; - } - - rq = i915_request_create(ce); - if (IS_ERR(rq)) { - err = PTR_ERR(rq); - goto out_batch; - } - - err = intel_emit_vma_mark_active(batch, rq); - if (unlikely(err)) - goto out_request; - - err = move_obj_to_gpu(vma->obj, rq, true); - if (err == 0) - err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE); - if (unlikely(err)) - goto out_request; - - if (ce->engine->emit_init_breadcrumb) - err = ce->engine->emit_init_breadcrumb(rq); - - if (likely(!err)) - err = ce->engine->emit_bb_start(rq, - batch->node.start, - batch->node.size, - 0); -out_request: - if (unlikely(err)) - i915_request_set_error_once(rq, err); - - i915_request_add(rq); -out_batch: - intel_emit_vma_release(ce, batch); -out_vma: - i915_vma_unpin(vma); -out_ctx: - intel_context_unpin(ce); -out: - if (err == -EDEADLK) { - err = i915_gem_ww_ctx_backoff(&ww); - if (!err) - goto retry; - } - i915_gem_ww_ctx_fini(&ww); - intel_engine_pm_put(ce->engine); - return err; -} - -/* Wa_1209644611:icl,ehl */ -static bool wa_1209644611_applies(struct drm_i915_private *i915, u32 size) -{ - u32 height = size >> PAGE_SHIFT; - - if (GRAPHICS_VER(i915) != 11) - return false; - - return height % 4 == 3 && height <= 8; -} - -struct i915_vma *intel_emit_vma_copy_blt(struct intel_context *ce, - struct i915_gem_ww_ctx *ww, - struct i915_vma *src, - struct i915_vma *dst) -{ - struct drm_i915_private *i915 = ce->vm->i915; - const u32 block_size = SZ_8M; /* ~1ms at 8GiB/s preemption delay */ - struct intel_gt_buffer_pool_node *pool; - struct i915_vma *batch; - u64 src_offset, dst_offset; - u64 count, rem; - u32 size, *cmd; - int err; - - GEM_BUG_ON(src->size != dst->size); - - GEM_BUG_ON(intel_engine_is_virtual(ce->engine)); - intel_engine_pm_get(ce->engine); - - count = div_u64(round_up(dst->size, block_size), block_size); - size = (1 + 11 * count) * sizeof(u32); - size = round_up(size, PAGE_SIZE); - pool = intel_gt_get_buffer_pool(ce->engine->gt, size, I915_MAP_WC); - if (IS_ERR(pool)) { - err = PTR_ERR(pool); - goto out_pm; - } - - err = i915_gem_object_lock(pool->obj, ww); - if (err) - goto out_put; - - batch = i915_vma_instance(pool->obj, ce->vm, NULL); - if (IS_ERR(batch)) { - err = PTR_ERR(batch); - goto out_put; - } - - err = i915_vma_pin_ww(batch, ww, 0, 0, PIN_USER); - if (unlikely(err)) - goto out_put; - - /* we pinned the pool, mark it as such */ - intel_gt_buffer_pool_mark_used(pool); - - cmd = i915_gem_object_pin_map(pool->obj, pool->type); - if (IS_ERR(cmd)) { - err = PTR_ERR(cmd); - goto out_unpin; - } - - rem = src->size; - src_offset = src->node.start; - dst_offset = dst->node.start; - - do { - size = min_t(u64, rem, block_size); - GEM_BUG_ON(size >> PAGE_SHIFT > S16_MAX); - - if (GRAPHICS_VER(i915) >= 9 && - !wa_1209644611_applies(i915, size)) { - *cmd++ = GEN9_XY_FAST_COPY_BLT_CMD | (10 - 2); - *cmd++ = BLT_DEPTH_32 | PAGE_SIZE; - *cmd++ = 0; - *cmd++ = size >> PAGE_SHIFT << 16 | PAGE_SIZE / 4; - *cmd++ = lower_32_bits(dst_offset); - *cmd++ = upper_32_bits(dst_offset); - *cmd++ = 0; - *cmd++ = PAGE_SIZE; - *cmd++ = lower_32_bits(src_offset); - *cmd++ = upper_32_bits(src_offset); - } else if (GRAPHICS_VER(i915) >= 8) { - *cmd++ = XY_SRC_COPY_BLT_CMD | BLT_WRITE_RGBA | (10 - 2); - *cmd++ = BLT_DEPTH_32 | BLT_ROP_SRC_COPY | PAGE_SIZE; - *cmd++ = 0; - *cmd++ = size >> PAGE_SHIFT << 16 | PAGE_SIZE / 4; - *cmd++ = lower_32_bits(dst_offset); - *cmd++ = upper_32_bits(dst_offset); - *cmd++ = 0; - *cmd++ = PAGE_SIZE; - *cmd++ = lower_32_bits(src_offset); - *cmd++ = upper_32_bits(src_offset); - } else { - *cmd++ = SRC_COPY_BLT_CMD | BLT_WRITE_RGBA | (6 - 2); - *cmd++ = BLT_DEPTH_32 | BLT_ROP_SRC_COPY | PAGE_SIZE; - *cmd++ = size >> PAGE_SHIFT << 16 | PAGE_SIZE; - *cmd++ = dst_offset; - *cmd++ = PAGE_SIZE; - *cmd++ = src_offset; - } - - /* Allow ourselves to be preempted in between blocks. */ - *cmd++ = MI_ARB_CHECK; - - src_offset += size; - dst_offset += size; - rem -= size; - } while (rem); - - *cmd = MI_BATCH_BUFFER_END; - - i915_gem_object_flush_map(pool->obj); - i915_gem_object_unpin_map(pool->obj); - - intel_gt_chipset_flush(ce->vm->gt); - batch->private = pool; - return batch; - -out_unpin: - i915_vma_unpin(batch); -out_put: - intel_gt_buffer_pool_put(pool); -out_pm: - intel_engine_pm_put(ce->engine); - return ERR_PTR(err); -} - -int i915_gem_object_copy_blt(struct drm_i915_gem_object *src, - struct drm_i915_gem_object *dst, - struct intel_context *ce) -{ - struct i915_address_space *vm = ce->vm; - struct i915_vma *vma[2], *batch; - struct i915_gem_ww_ctx ww; - struct i915_request *rq; - int err, i; - - vma[0] = i915_vma_instance(src, vm, NULL); - if (IS_ERR(vma[0])) - return PTR_ERR(vma[0]); - - vma[1] = i915_vma_instance(dst, vm, NULL); - if (IS_ERR(vma[1])) - return PTR_ERR(vma[1]); - - i915_gem_ww_ctx_init(&ww, true); - intel_engine_pm_get(ce->engine); -retry: - err = i915_gem_object_lock(src, &ww); - if (!err) - err = i915_gem_object_lock(dst, &ww); - if (!err) - err = intel_context_pin_ww(ce, &ww); - if (err) - goto out; - - err = i915_vma_pin_ww(vma[0], &ww, 0, 0, PIN_USER); - if (err) - goto out_ctx; - - err = i915_vma_pin_ww(vma[1], &ww, 0, 0, PIN_USER); - if (unlikely(err)) - goto out_unpin_src; - - batch = intel_emit_vma_copy_blt(ce, &ww, vma[0], vma[1]); - if (IS_ERR(batch)) { - err = PTR_ERR(batch); - goto out_unpin_dst; - } - - rq = i915_request_create(ce); - if (IS_ERR(rq)) { - err = PTR_ERR(rq); - goto out_batch; - } - - err = intel_emit_vma_mark_active(batch, rq); - if (unlikely(err)) - goto out_request; - - for (i = 0; i < ARRAY_SIZE(vma); i++) { - err = move_obj_to_gpu(vma[i]->obj, rq, i); - if (unlikely(err)) - goto out_request; - } - - for (i = 0; i < ARRAY_SIZE(vma); i++) { - unsigned int flags = i ? EXEC_OBJECT_WRITE : 0; - - err = i915_vma_move_to_active(vma[i], rq, flags); - if (unlikely(err)) - goto out_request; - } - - if (rq->engine->emit_init_breadcrumb) { - err = rq->engine->emit_init_breadcrumb(rq); - if (unlikely(err)) - goto out_request; - } - - err = rq->engine->emit_bb_start(rq, - batch->node.start, batch->node.size, - 0); - -out_request: - if (unlikely(err)) - i915_request_set_error_once(rq, err); - - i915_request_add(rq); -out_batch: - intel_emit_vma_release(ce, batch); -out_unpin_dst: - i915_vma_unpin(vma[1]); -out_unpin_src: - i915_vma_unpin(vma[0]); -out_ctx: - intel_context_unpin(ce); -out: - if (err == -EDEADLK) { - err = i915_gem_ww_ctx_backoff(&ww); - if (!err) - goto retry; - } - i915_gem_ww_ctx_fini(&ww); - intel_engine_pm_put(ce->engine); - return err; -} - -#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) -#include "selftests/i915_gem_object_blt.c" -#endif diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_blt.h b/drivers/gpu/drm/i915/gem/i915_gem_object_blt.h deleted file mode 100644 index 2409fdcccf0e..000000000000 --- a/drivers/gpu/drm/i915/gem/i915_gem_object_blt.h +++ /dev/null @@ -1,39 +0,0 @@ -/* SPDX-License-Identifier: MIT */ -/* - * Copyright © 2019 Intel Corporation - */ - -#ifndef __I915_GEM_OBJECT_BLT_H__ -#define __I915_GEM_OBJECT_BLT_H__ - -#include <linux/types.h> - -#include "gt/intel_context.h" -#include "gt/intel_engine_pm.h" -#include "i915_vma.h" - -struct drm_i915_gem_object; -struct i915_gem_ww_ctx; - -struct i915_vma *intel_emit_vma_fill_blt(struct intel_context *ce, - struct i915_vma *vma, - struct i915_gem_ww_ctx *ww, - u32 value); - -struct i915_vma *intel_emit_vma_copy_blt(struct intel_context *ce, - struct i915_gem_ww_ctx *ww, - struct i915_vma *src, - struct i915_vma *dst); - -int intel_emit_vma_mark_active(struct i915_vma *vma, struct i915_request *rq); -void intel_emit_vma_release(struct intel_context *ce, struct i915_vma *vma); - -int i915_gem_object_fill_blt(struct drm_i915_gem_object *obj, - struct intel_context *ce, - u32 value); - -int i915_gem_object_copy_blt(struct drm_i915_gem_object *src, - struct drm_i915_gem_object *dst, - struct intel_context *ce); - -#endif diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h index d047ea126029..2471f36aaff3 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h @@ -18,6 +18,7 @@ struct drm_i915_gem_object; struct intel_fronbuffer; +struct intel_memory_region; /* * struct i915_lut_handle tracks the fast lookups from handle to vma used @@ -33,10 +34,9 @@ struct i915_lut_handle { struct drm_i915_gem_object_ops { unsigned int flags; -#define I915_GEM_OBJECT_HAS_IOMEM BIT(1) -#define I915_GEM_OBJECT_IS_SHRINKABLE BIT(2) -#define I915_GEM_OBJECT_IS_PROXY BIT(3) -#define I915_GEM_OBJECT_NO_MMAP BIT(4) +#define I915_GEM_OBJECT_IS_SHRINKABLE BIT(1) +#define I915_GEM_OBJECT_IS_PROXY BIT(2) +#define I915_GEM_OBJECT_NO_MMAP BIT(3) /* Interface between the GEM object and its backing storage. * get_pages() is called once prior to the use of the associated set @@ -61,13 +61,117 @@ struct drm_i915_gem_object_ops { const struct drm_i915_gem_pread *arg); int (*pwrite)(struct drm_i915_gem_object *obj, const struct drm_i915_gem_pwrite *arg); + u64 (*mmap_offset)(struct drm_i915_gem_object *obj); int (*dmabuf_export)(struct drm_i915_gem_object *obj); + + /** + * adjust_lru - notify that the madvise value was updated + * @obj: The gem object + * + * The madvise value may have been updated, or object was recently + * referenced so act accordingly (Perhaps changing an LRU list etc). + */ + void (*adjust_lru)(struct drm_i915_gem_object *obj); + + /** + * delayed_free - Override the default delayed free implementation + */ + void (*delayed_free)(struct drm_i915_gem_object *obj); + + /** + * migrate - Migrate object to a different region either for + * pinning or for as long as the object lock is held. + */ + int (*migrate)(struct drm_i915_gem_object *obj, + struct intel_memory_region *mr); + void (*release)(struct drm_i915_gem_object *obj); + const struct vm_operations_struct *mmap_ops; const char *name; /* friendly name for debug, e.g. lockdep classes */ }; +/** + * enum i915_cache_level - The supported GTT caching values for system memory + * pages. + * + * These translate to some special GTT PTE bits when binding pages into some + * address space. It also determines whether an object, or rather its pages are + * coherent with the GPU, when also reading or writing through the CPU cache + * with those pages. + * + * Userspace can also control this through struct drm_i915_gem_caching. + */ +enum i915_cache_level { + /** + * @I915_CACHE_NONE: + * + * GPU access is not coherent with the CPU cache. If the cache is dirty + * and we need the underlying pages to be coherent with some later GPU + * access then we need to manually flush the pages. + * + * On shared LLC platforms reads and writes through the CPU cache are + * still coherent even with this setting. See also + * &drm_i915_gem_object.cache_coherent for more details. Due to this we + * should only ever use uncached for scanout surfaces, otherwise we end + * up over-flushing in some places. + * + * This is the default on non-LLC platforms. + */ + I915_CACHE_NONE = 0, + /** + * @I915_CACHE_LLC: + * + * GPU access is coherent with the CPU cache. If the cache is dirty, + * then the GPU will ensure that access remains coherent, when both + * reading and writing through the CPU cache. GPU writes can dirty the + * CPU cache. + * + * Not used for scanout surfaces. + * + * Applies to both platforms with shared LLC(HAS_LLC), and snooping + * based platforms(HAS_SNOOP). + * + * This is the default on shared LLC platforms. The only exception is + * scanout objects, where the display engine is not coherent with the + * CPU cache. For such objects I915_CACHE_NONE or I915_CACHE_WT is + * automatically applied by the kernel in pin_for_display, if userspace + * has not done so already. + */ + I915_CACHE_LLC, + /** + * @I915_CACHE_L3_LLC: + * + * Explicitly enable the Gfx L3 cache, with coherent LLC. + * + * The Gfx L3 sits between the domain specific caches, e.g + * sampler/render caches, and the larger LLC. LLC is coherent with the + * GPU, but L3 is only visible to the GPU, so likely needs to be flushed + * when the workload completes. + * + * Not used for scanout surfaces. + * + * Only exposed on some gen7 + GGTT. More recent hardware has dropped + * this explicit setting, where it should now be enabled by default. + */ + I915_CACHE_L3_LLC, + /** + * @I915_CACHE_WT: + * + * Write-through. Used for scanout surfaces. + * + * The GPU can utilise the caches, while still having the display engine + * be coherent with GPU writes, as a result we don't need to flush the + * CPU caches when moving out of the render domain. This is the default + * setting chosen by the kernel, if supported by the HW, otherwise we + * fallback to I915_CACHE_NONE. On the CPU side writes through the CPU + * cache still need to be flushed, to remain coherent with the display + * engine. + */ + I915_CACHE_WT, +}; + enum i915_map_type { I915_MAP_WB = 0, I915_MAP_WC, @@ -81,6 +185,7 @@ enum i915_mmap_type { I915_MMAP_TYPE_WC, I915_MMAP_TYPE_WB, I915_MMAP_TYPE_UC, + I915_MMAP_TYPE_FIXED, }; struct i915_mmap_offset { @@ -185,23 +290,138 @@ struct drm_i915_gem_object { unsigned long flags; #define I915_BO_ALLOC_CONTIGUOUS BIT(0) #define I915_BO_ALLOC_VOLATILE BIT(1) -#define I915_BO_ALLOC_STRUCT_PAGE BIT(2) -#define I915_BO_ALLOC_CPU_CLEAR BIT(3) +#define I915_BO_ALLOC_CPU_CLEAR BIT(2) +#define I915_BO_ALLOC_USER BIT(3) #define I915_BO_ALLOC_FLAGS (I915_BO_ALLOC_CONTIGUOUS | \ I915_BO_ALLOC_VOLATILE | \ - I915_BO_ALLOC_STRUCT_PAGE | \ - I915_BO_ALLOC_CPU_CLEAR) + I915_BO_ALLOC_CPU_CLEAR | \ + I915_BO_ALLOC_USER) #define I915_BO_READONLY BIT(4) #define I915_TILING_QUIRK_BIT 5 /* unknown swizzling; do not release! */ - /* - * Is the object to be mapped as read-only to the GPU - * Only honoured if hardware has relevant pte bit + /** + * @mem_flags - Mutable placement-related flags + * + * These are flags that indicate specifics of the memory region + * the object is currently in. As such they are only stable + * either under the object lock or if the object is pinned. + */ + unsigned int mem_flags; +#define I915_BO_FLAG_STRUCT_PAGE BIT(0) /* Object backed by struct pages */ +#define I915_BO_FLAG_IOMEM BIT(1) /* Object backed by IO memory */ + /** + * @cache_level: The desired GTT caching level. + * + * See enum i915_cache_level for possible values, along with what + * each does. */ unsigned int cache_level:3; - unsigned int cache_coherent:2; + /** + * @cache_coherent: + * + * Track whether the pages are coherent with the GPU if reading or + * writing through the CPU caches. The largely depends on the + * @cache_level setting. + * + * On platforms which don't have the shared LLC(HAS_SNOOP), like on Atom + * platforms, coherency must be explicitly requested with some special + * GTT caching bits(see enum i915_cache_level). When enabling coherency + * it does come at a performance and power cost on such platforms. On + * the flip side the kernel does not need to manually flush any buffers + * which need to be coherent with the GPU, if the object is not coherent + * i.e @cache_coherent is zero. + * + * On platforms that share the LLC with the CPU(HAS_LLC), all GT memory + * access will automatically snoop the CPU caches(even with CACHE_NONE). + * The one exception is when dealing with the display engine, like with + * scanout surfaces. To handle this the kernel will always flush the + * surface out of the CPU caches when preparing it for scanout. Also + * note that since scanout surfaces are only ever read by the display + * engine we only need to care about flushing any writes through the CPU + * cache, reads on the other hand will always be coherent. + * + * Something strange here is why @cache_coherent is not a simple + * boolean, i.e coherent vs non-coherent. The reasoning for this is back + * to the display engine not being fully coherent. As a result scanout + * surfaces will either be marked as I915_CACHE_NONE or I915_CACHE_WT. + * In the case of seeing I915_CACHE_NONE the kernel makes the assumption + * that this is likely a scanout surface, and will set @cache_coherent + * as only I915_BO_CACHE_COHERENT_FOR_READ, on platforms with the shared + * LLC. The kernel uses this to always flush writes through the CPU + * cache as early as possible, where it can, in effect keeping + * @cache_dirty clean, so we can potentially avoid stalling when + * flushing the surface just before doing the scanout. This does mean + * we might unnecessarily flush non-scanout objects in some places, but + * the default assumption is that all normal objects should be using + * I915_CACHE_LLC, at least on platforms with the shared LLC. + * + * Supported values: + * + * I915_BO_CACHE_COHERENT_FOR_READ: + * + * On shared LLC platforms, we use this for special scanout surfaces, + * where the display engine is not coherent with the CPU cache. As such + * we need to ensure we flush any writes before doing the scanout. As an + * optimisation we try to flush any writes as early as possible to avoid + * stalling later. + * + * Thus for scanout surfaces using I915_CACHE_NONE, on shared LLC + * platforms, we use: + * + * cache_coherent = I915_BO_CACHE_COHERENT_FOR_READ + * + * While for normal objects that are fully coherent, including special + * scanout surfaces marked as I915_CACHE_WT, we use: + * + * cache_coherent = I915_BO_CACHE_COHERENT_FOR_READ | + * I915_BO_CACHE_COHERENT_FOR_WRITE + * + * And then for objects that are not coherent at all we use: + * + * cache_coherent = 0 + * + * I915_BO_CACHE_COHERENT_FOR_WRITE: + * + * When writing through the CPU cache, the GPU is still coherent. Note + * that this also implies I915_BO_CACHE_COHERENT_FOR_READ. + */ #define I915_BO_CACHE_COHERENT_FOR_READ BIT(0) #define I915_BO_CACHE_COHERENT_FOR_WRITE BIT(1) + unsigned int cache_coherent:2; + + /** + * @cache_dirty: + * + * Track if we are we dirty with writes through the CPU cache for this + * object. As a result reading directly from main memory might yield + * stale data. + * + * This also ties into whether the kernel is tracking the object as + * coherent with the GPU, as per @cache_coherent, as it determines if + * flushing might be needed at various points. + * + * Another part of @cache_dirty is managing flushing when first + * acquiring the pages for system memory, at this point the pages are + * considered foreign, so the default assumption is that the cache is + * dirty, for example the page zeroing done by the kernel might leave + * writes though the CPU cache, or swapping-in, while the actual data in + * main memory is potentially stale. Note that this is a potential + * security issue when dealing with userspace objects and zeroing. Now, + * whether we actually need apply the big sledgehammer of flushing all + * the pages on acquire depends on if @cache_coherent is marked as + * I915_BO_CACHE_COHERENT_FOR_WRITE, i.e that the GPU will be coherent + * for both reads and writes though the CPU cache. + * + * Note that on shared LLC platforms we still apply the heavy flush for + * I915_CACHE_NONE objects, under the assumption that this is going to + * be used for scanout. + * + * Update: On some hardware there is now also the 'Bypass LLC' MOCS + * entry, which defeats our @cache_coherent tracking, since userspace + * can freely bypass the CPU cache when touching the pages with the GPU, + * where the kernel is completely unaware. On such platform we need + * apply the sledgehammer-on-acquire regardless of the @cache_coherent. + */ unsigned int cache_dirty:1; /** @@ -247,9 +467,10 @@ struct drm_i915_gem_object { struct intel_memory_region *region; /** - * Memory manager node allocated for this object. + * Memory manager resource allocated for this object. Only + * needed for the mock region. */ - void *st_mm_node; + struct ttm_resource *res; /** * Element within memory_region->objects or region->purgeable @@ -310,6 +531,12 @@ struct drm_i915_gem_object { bool dirty:1; } mm; + struct { + struct sg_table *cached_io_st; + struct i915_gem_object_page_iter get_io_page; + bool created:1; + } ttm; + /** Record of address bit 17 of each page at last unbind. */ unsigned long *bit_17; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pages.c b/drivers/gpu/drm/i915/gem/i915_gem_pages.c index 6444e097016d..8eb1c3a6fc9c 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_pages.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_pages.c @@ -321,8 +321,7 @@ static void *i915_gem_object_map_pfn(struct drm_i915_gem_object *obj, dma_addr_t addr; void *vaddr; - if (type != I915_MAP_WC) - return ERR_PTR(-ENODEV); + GEM_BUG_ON(type != I915_MAP_WC); if (n_pfn > ARRAY_SIZE(stack)) { /* Too big for stack -- allocate temporary array instead */ @@ -351,7 +350,7 @@ void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj, int err; if (!i915_gem_object_has_struct_page(obj) && - !i915_gem_object_type_has(obj, I915_GEM_OBJECT_HAS_IOMEM)) + !i915_gem_object_has_iomem(obj)) return ERR_PTR(-ENXIO); assert_object_held(obj); @@ -374,6 +373,34 @@ void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj, } GEM_BUG_ON(!i915_gem_object_has_pages(obj)); + /* + * For discrete our CPU mappings needs to be consistent in order to + * function correctly on !x86. When mapping things through TTM, we use + * the same rules to determine the caching type. + * + * The caching rules, starting from DG1: + * + * - If the object can be placed in device local-memory, then the + * pages should be allocated and mapped as write-combined only. + * + * - Everything else is always allocated and mapped as write-back, + * with the guarantee that everything is also coherent with the + * GPU. + * + * Internal users of lmem are already expected to get this right, so no + * fudging needed there. + */ + if (i915_gem_object_placement_possible(obj, INTEL_MEMORY_LOCAL)) { + if (type != I915_MAP_WC && !obj->mm.n_placements) { + ptr = ERR_PTR(-ENODEV); + goto err_unpin; + } + + type = I915_MAP_WC; + } else if (IS_DGFX(to_i915(obj->base.dev))) { + type = I915_MAP_WB; + } + ptr = page_unpack_bits(obj->mm.mapping, &has_type); if (ptr && has_type != type) { if (pinned) { @@ -467,9 +494,8 @@ __i915_gem_object_get_sg(struct drm_i915_gem_object *obj, struct i915_gem_object_page_iter *iter, unsigned int n, unsigned int *offset, - bool allow_alloc) + bool dma) { - const bool dma = iter == &obj->mm.get_dma_page; struct scatterlist *sg; unsigned int idx, count; @@ -490,9 +516,6 @@ __i915_gem_object_get_sg(struct drm_i915_gem_object *obj, if (n < READ_ONCE(iter->sg_idx)) goto lookup; - if (!allow_alloc) - goto manual_lookup; - mutex_lock(&iter->lock); /* We prefer to reuse the last sg so that repeated lookup of this @@ -542,16 +565,7 @@ scan: if (unlikely(n < idx)) /* insertion completed by another thread */ goto lookup; - goto manual_walk; - -manual_lookup: - idx = 0; - sg = obj->mm.pages->sgl; - count = __sg_page_count(sg); - -manual_walk: - /* - * In case we failed to insert the entry into the radixtree, we need + /* In case we failed to insert the entry into the radixtree, we need * to look beyond the current sg. */ while (idx + count <= n) { @@ -598,7 +612,7 @@ i915_gem_object_get_page(struct drm_i915_gem_object *obj, unsigned int n) GEM_BUG_ON(!i915_gem_object_has_struct_page(obj)); - sg = i915_gem_object_get_sg(obj, n, &offset, true); + sg = i915_gem_object_get_sg(obj, n, &offset); return nth_page(sg_page(sg), offset); } @@ -624,7 +638,7 @@ i915_gem_object_get_dma_address_len(struct drm_i915_gem_object *obj, struct scatterlist *sg; unsigned int offset; - sg = i915_gem_object_get_sg_dma(obj, n, &offset, true); + sg = i915_gem_object_get_sg_dma(obj, n, &offset); if (len) *len = sg_dma_len(sg) - (offset << PAGE_SHIFT); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_phys.c b/drivers/gpu/drm/i915/gem/i915_gem_phys.c index be72ad0634ba..7986612f48fa 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_phys.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_phys.c @@ -76,7 +76,7 @@ static int i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj) intel_gt_chipset_flush(&to_i915(obj->base.dev)->gt); /* We're no longer struct page backed */ - obj->flags &= ~I915_BO_ALLOC_STRUCT_PAGE; + obj->mem_flags &= ~I915_BO_FLAG_STRUCT_PAGE; __i915_gem_object_set_pages(obj, st, sg->length); return 0; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_region.c b/drivers/gpu/drm/i915/gem/i915_gem_region.c index f25e6646c5b7..1f557b2178ed 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_region.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_region.c @@ -13,16 +13,8 @@ void i915_gem_object_init_memory_region(struct drm_i915_gem_object *obj, { obj->mm.region = intel_memory_region_get(mem); - if (obj->base.size <= mem->min_page_size) - obj->flags |= I915_BO_ALLOC_CONTIGUOUS; - mutex_lock(&mem->objects.lock); - - if (obj->flags & I915_BO_ALLOC_VOLATILE) - list_add(&obj->mm.region_link, &mem->objects.purgeable); - else - list_add(&obj->mm.region_link, &mem->objects.list); - + list_add(&obj->mm.region_link, &mem->objects.list); mutex_unlock(&mem->objects.lock); } @@ -40,9 +32,11 @@ void i915_gem_object_release_memory_region(struct drm_i915_gem_object *obj) struct drm_i915_gem_object * i915_gem_object_create_region(struct intel_memory_region *mem, resource_size_t size, + resource_size_t page_size, unsigned int flags) { struct drm_i915_gem_object *obj; + resource_size_t default_page_size; int err; /* @@ -56,7 +50,14 @@ i915_gem_object_create_region(struct intel_memory_region *mem, if (!mem) return ERR_PTR(-ENODEV); - size = round_up(size, mem->min_page_size); + default_page_size = mem->min_page_size; + if (page_size) + default_page_size = page_size; + + GEM_BUG_ON(!is_power_of_2_u64(default_page_size)); + GEM_BUG_ON(default_page_size < PAGE_SIZE); + + size = round_up(size, default_page_size); GEM_BUG_ON(!size); GEM_BUG_ON(!IS_ALIGNED(size, I915_GTT_MIN_ALIGNMENT)); @@ -68,7 +69,7 @@ i915_gem_object_create_region(struct intel_memory_region *mem, if (!obj) return ERR_PTR(-ENOMEM); - err = mem->ops->init_object(mem, obj, size, flags); + err = mem->ops->init_object(mem, obj, size, page_size, flags); if (err) goto err_object_free; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_region.h b/drivers/gpu/drm/i915/gem/i915_gem_region.h index 84fcb3297400..1008e580a89a 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_region.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_region.h @@ -19,6 +19,7 @@ void i915_gem_object_release_memory_region(struct drm_i915_gem_object *obj); struct drm_i915_gem_object * i915_gem_object_create_region(struct intel_memory_region *mem, resource_size_t size, + resource_size_t page_size, unsigned int flags); #endif diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c index 5d16c4462fda..11f072193f3b 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c @@ -182,6 +182,24 @@ rebuild_st: if (i915_gem_object_needs_bit17_swizzle(obj)) i915_gem_object_do_bit_17_swizzle(obj, st); + /* + * EHL and JSL add the 'Bypass LLC' MOCS entry, which should make it + * possible for userspace to bypass the GTT caching bits set by the + * kernel, as per the given object cache_level. This is troublesome + * since the heavy flush we apply when first gathering the pages is + * skipped if the kernel thinks the object is coherent with the GPU. As + * a result it might be possible to bypass the cache and read the + * contents of the page directly, which could be stale data. If it's + * just a case of userspace shooting themselves in the foot then so be + * it, but since i915 takes the stance of always zeroing memory before + * handing it to userspace, we need to prevent this. + * + * By setting cache_dirty here we make the clflush in set_pages + * unconditional on such platforms. + */ + if (IS_JSL_EHL(i915) && obj->flags & I915_BO_ALLOC_USER) + obj->cache_dirty = true; + __i915_gem_object_set_pages(obj, st, sg_page_sizes); return 0; @@ -302,6 +320,7 @@ void i915_gem_object_put_pages_shmem(struct drm_i915_gem_object *obj, struct sg_ struct pagevec pvec; struct page *page; + GEM_WARN_ON(IS_DGFX(to_i915(obj->base.dev))); __i915_gem_object_release_shmem(obj, pages, true); i915_gem_gtt_finish_pages(obj, pages); @@ -444,7 +463,7 @@ shmem_pread(struct drm_i915_gem_object *obj, static void shmem_release(struct drm_i915_gem_object *obj) { - if (obj->flags & I915_BO_ALLOC_STRUCT_PAGE) + if (i915_gem_object_has_struct_page(obj)) i915_gem_object_release_memory_region(obj); fput(obj->base.filp); @@ -489,6 +508,7 @@ static int __create_shmem(struct drm_i915_private *i915, static int shmem_object_init(struct intel_memory_region *mem, struct drm_i915_gem_object *obj, resource_size_t size, + resource_size_t page_size, unsigned int flags) { static struct lock_class_key lock_class; @@ -513,9 +533,8 @@ static int shmem_object_init(struct intel_memory_region *mem, mapping_set_gfp_mask(mapping, mask); GEM_BUG_ON(!(mapping_gfp_mask(mapping) & __GFP_RECLAIM)); - i915_gem_object_init(obj, &i915_gem_shmem_ops, &lock_class, - I915_BO_ALLOC_STRUCT_PAGE); - + i915_gem_object_init(obj, &i915_gem_shmem_ops, &lock_class, 0); + obj->mem_flags |= I915_BO_FLAG_STRUCT_PAGE; obj->write_domain = I915_GEM_DOMAIN_CPU; obj->read_domains = I915_GEM_DOMAIN_CPU; @@ -548,7 +567,7 @@ i915_gem_object_create_shmem(struct drm_i915_private *i915, resource_size_t size) { return i915_gem_object_create_region(i915->mm.regions[INTEL_REGION_SMEM], - size, 0); + size, 0, 0); } /* Allocate a new GEM object and fill it with the supplied data */ @@ -561,6 +580,7 @@ i915_gem_object_create_shmem_from_data(struct drm_i915_private *dev_priv, resource_size_t offset; int err; + GEM_WARN_ON(IS_DGFX(dev_priv)); obj = i915_gem_object_create_shmem(dev_priv, round_up(size, PAGE_SIZE)); if (IS_ERR(obj)) return obj; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c index b0c3a7dc60d1..ddd37ccb1362 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c @@ -447,7 +447,6 @@ static int i915_gem_init_stolen(struct intel_memory_region *mem) break; case 8: case 9: - case 10: if (IS_LP(i915)) chv_get_stolen_reserved(i915, uncore, &reserved_base, &reserved_size); @@ -670,6 +669,7 @@ static int __i915_gem_object_create_stolen(struct intel_memory_region *mem, static int _i915_gem_object_stolen_init(struct intel_memory_region *mem, struct drm_i915_gem_object *obj, resource_size_t size, + resource_size_t page_size, unsigned int flags) { struct drm_i915_private *i915 = mem->i915; @@ -708,7 +708,7 @@ struct drm_i915_gem_object * i915_gem_object_create_stolen(struct drm_i915_private *i915, resource_size_t size) { - return i915_gem_object_create_region(i915->mm.stolen_region, size, 0); + return i915_gem_object_create_region(i915->mm.stolen_region, size, 0, 0); } static int init_stolen_smem(struct intel_memory_region *mem) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c new file mode 100644 index 000000000000..771eb2963123 --- /dev/null +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c @@ -0,0 +1,965 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2021 Intel Corporation + */ + +#include <drm/ttm/ttm_bo_driver.h> +#include <drm/ttm/ttm_placement.h> + +#include "i915_drv.h" +#include "intel_memory_region.h" +#include "intel_region_ttm.h" + +#include "gem/i915_gem_object.h" +#include "gem/i915_gem_region.h" +#include "gem/i915_gem_ttm.h" +#include "gem/i915_gem_mman.h" + +#include "gt/intel_migrate.h" +#include "gt/intel_engine_pm.h" + +#define I915_PL_LMEM0 TTM_PL_PRIV +#define I915_PL_SYSTEM TTM_PL_SYSTEM +#define I915_PL_STOLEN TTM_PL_VRAM +#define I915_PL_GGTT TTM_PL_TT + +#define I915_TTM_PRIO_PURGE 0 +#define I915_TTM_PRIO_NO_PAGES 1 +#define I915_TTM_PRIO_HAS_PAGES 2 + +/* + * Size of struct ttm_place vector in on-stack struct ttm_placement allocs + */ +#define I915_TTM_MAX_PLACEMENTS INTEL_REGION_UNKNOWN + +/** + * struct i915_ttm_tt - TTM page vector with additional private information + * @ttm: The base TTM page vector. + * @dev: The struct device used for dma mapping and unmapping. + * @cached_st: The cached scatter-gather table. + * + * Note that DMA may be going on right up to the point where the page- + * vector is unpopulated in delayed destroy. Hence keep the + * scatter-gather table mapped and cached up to that point. This is + * different from the cached gem object io scatter-gather table which + * doesn't have an associated dma mapping. + */ +struct i915_ttm_tt { + struct ttm_tt ttm; + struct device *dev; + struct sg_table *cached_st; +}; + +static const struct ttm_place sys_placement_flags = { + .fpfn = 0, + .lpfn = 0, + .mem_type = I915_PL_SYSTEM, + .flags = 0, +}; + +static struct ttm_placement i915_sys_placement = { + .num_placement = 1, + .placement = &sys_placement_flags, + .num_busy_placement = 1, + .busy_placement = &sys_placement_flags, +}; + +static int i915_ttm_err_to_gem(int err) +{ + /* Fastpath */ + if (likely(!err)) + return 0; + + switch (err) { + case -EBUSY: + /* + * TTM likes to convert -EDEADLK to -EBUSY, and wants us to + * restart the operation, since we don't record the contending + * lock. We use -EAGAIN to restart. + */ + return -EAGAIN; + case -ENOSPC: + /* + * Memory type / region is full, and we can't evict. + * Except possibly system, that returns -ENOMEM; + */ + return -ENXIO; + default: + break; + } + + return err; +} + +static bool gpu_binds_iomem(struct ttm_resource *mem) +{ + return mem->mem_type != TTM_PL_SYSTEM; +} + +static bool cpu_maps_iomem(struct ttm_resource *mem) +{ + /* Once / if we support GGTT, this is also false for cached ttm_tts */ + return mem->mem_type != TTM_PL_SYSTEM; +} + +static enum i915_cache_level +i915_ttm_cache_level(struct drm_i915_private *i915, struct ttm_resource *res, + struct ttm_tt *ttm) +{ + return ((HAS_LLC(i915) || HAS_SNOOP(i915)) && !gpu_binds_iomem(res) && + ttm->caching == ttm_cached) ? I915_CACHE_LLC : + I915_CACHE_NONE; +} + +static void i915_ttm_adjust_lru(struct drm_i915_gem_object *obj); + +static enum ttm_caching +i915_ttm_select_tt_caching(const struct drm_i915_gem_object *obj) +{ + /* + * Objects only allowed in system get cached cpu-mappings. + * Other objects get WC mapping for now. Even if in system. + */ + if (obj->mm.region->type == INTEL_MEMORY_SYSTEM && + obj->mm.n_placements <= 1) + return ttm_cached; + + return ttm_write_combined; +} + +static void +i915_ttm_place_from_region(const struct intel_memory_region *mr, + struct ttm_place *place, + unsigned int flags) +{ + memset(place, 0, sizeof(*place)); + place->mem_type = intel_region_to_ttm_type(mr); + + if (flags & I915_BO_ALLOC_CONTIGUOUS) + place->flags = TTM_PL_FLAG_CONTIGUOUS; +} + +static void +i915_ttm_placement_from_obj(const struct drm_i915_gem_object *obj, + struct ttm_place *requested, + struct ttm_place *busy, + struct ttm_placement *placement) +{ + unsigned int num_allowed = obj->mm.n_placements; + unsigned int flags = obj->flags; + unsigned int i; + + placement->num_placement = 1; + i915_ttm_place_from_region(num_allowed ? obj->mm.placements[0] : + obj->mm.region, requested, flags); + + /* Cache this on object? */ + placement->num_busy_placement = num_allowed; + for (i = 0; i < placement->num_busy_placement; ++i) + i915_ttm_place_from_region(obj->mm.placements[i], busy + i, flags); + + if (num_allowed == 0) { + *busy = *requested; + placement->num_busy_placement = 1; + } + + placement->placement = requested; + placement->busy_placement = busy; +} + +static struct ttm_tt *i915_ttm_tt_create(struct ttm_buffer_object *bo, + uint32_t page_flags) +{ + struct ttm_resource_manager *man = + ttm_manager_type(bo->bdev, bo->resource->mem_type); + struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo); + struct i915_ttm_tt *i915_tt; + int ret; + + i915_tt = kzalloc(sizeof(*i915_tt), GFP_KERNEL); + if (!i915_tt) + return NULL; + + if (obj->flags & I915_BO_ALLOC_CPU_CLEAR && + man->use_tt) + page_flags |= TTM_PAGE_FLAG_ZERO_ALLOC; + + ret = ttm_tt_init(&i915_tt->ttm, bo, page_flags, + i915_ttm_select_tt_caching(obj)); + if (ret) { + kfree(i915_tt); + return NULL; + } + + i915_tt->dev = obj->base.dev->dev; + + return &i915_tt->ttm; +} + +static void i915_ttm_tt_unpopulate(struct ttm_device *bdev, struct ttm_tt *ttm) +{ + struct i915_ttm_tt *i915_tt = container_of(ttm, typeof(*i915_tt), ttm); + + if (i915_tt->cached_st) { + dma_unmap_sgtable(i915_tt->dev, i915_tt->cached_st, + DMA_BIDIRECTIONAL, 0); + sg_free_table(i915_tt->cached_st); + kfree(i915_tt->cached_st); + i915_tt->cached_st = NULL; + } + ttm_pool_free(&bdev->pool, ttm); +} + +static void i915_ttm_tt_destroy(struct ttm_device *bdev, struct ttm_tt *ttm) +{ + struct i915_ttm_tt *i915_tt = container_of(ttm, typeof(*i915_tt), ttm); + + ttm_tt_destroy_common(bdev, ttm); + ttm_tt_fini(ttm); + kfree(i915_tt); +} + +static bool i915_ttm_eviction_valuable(struct ttm_buffer_object *bo, + const struct ttm_place *place) +{ + struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo); + + /* Will do for now. Our pinned objects are still on TTM's LRU lists */ + return i915_gem_object_evictable(obj); +} + +static void i915_ttm_evict_flags(struct ttm_buffer_object *bo, + struct ttm_placement *placement) +{ + *placement = i915_sys_placement; +} + +static int i915_ttm_move_notify(struct ttm_buffer_object *bo) +{ + struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo); + int ret; + + ret = i915_gem_object_unbind(obj, I915_GEM_OBJECT_UNBIND_ACTIVE); + if (ret) + return ret; + + ret = __i915_gem_object_put_pages(obj); + if (ret) + return ret; + + return 0; +} + +static void i915_ttm_free_cached_io_st(struct drm_i915_gem_object *obj) +{ + struct radix_tree_iter iter; + void __rcu **slot; + + if (!obj->ttm.cached_io_st) + return; + + rcu_read_lock(); + radix_tree_for_each_slot(slot, &obj->ttm.get_io_page.radix, &iter, 0) + radix_tree_delete(&obj->ttm.get_io_page.radix, iter.index); + rcu_read_unlock(); + + sg_free_table(obj->ttm.cached_io_st); + kfree(obj->ttm.cached_io_st); + obj->ttm.cached_io_st = NULL; +} + +static void +i915_ttm_adjust_domains_after_move(struct drm_i915_gem_object *obj) +{ + struct ttm_buffer_object *bo = i915_gem_to_ttm(obj); + + if (cpu_maps_iomem(bo->resource) || bo->ttm->caching != ttm_cached) { + obj->write_domain = I915_GEM_DOMAIN_WC; + obj->read_domains = I915_GEM_DOMAIN_WC; + } else { + obj->write_domain = I915_GEM_DOMAIN_CPU; + obj->read_domains = I915_GEM_DOMAIN_CPU; + } +} + +static void i915_ttm_adjust_gem_after_move(struct drm_i915_gem_object *obj) +{ + struct ttm_buffer_object *bo = i915_gem_to_ttm(obj); + unsigned int cache_level; + unsigned int i; + + /* + * If object was moved to an allowable region, update the object + * region to consider it migrated. Note that if it's currently not + * in an allowable region, it's evicted and we don't update the + * object region. + */ + if (intel_region_to_ttm_type(obj->mm.region) != bo->resource->mem_type) { + for (i = 0; i < obj->mm.n_placements; ++i) { + struct intel_memory_region *mr = obj->mm.placements[i]; + + if (intel_region_to_ttm_type(mr) == bo->resource->mem_type && + mr != obj->mm.region) { + i915_gem_object_release_memory_region(obj); + i915_gem_object_init_memory_region(obj, mr); + break; + } + } + } + + obj->mem_flags &= ~(I915_BO_FLAG_STRUCT_PAGE | I915_BO_FLAG_IOMEM); + + obj->mem_flags |= cpu_maps_iomem(bo->resource) ? I915_BO_FLAG_IOMEM : + I915_BO_FLAG_STRUCT_PAGE; + + cache_level = i915_ttm_cache_level(to_i915(bo->base.dev), bo->resource, + bo->ttm); + i915_gem_object_set_cache_coherency(obj, cache_level); +} + +static void i915_ttm_purge(struct drm_i915_gem_object *obj) +{ + struct ttm_buffer_object *bo = i915_gem_to_ttm(obj); + struct ttm_operation_ctx ctx = { + .interruptible = true, + .no_wait_gpu = false, + }; + struct ttm_placement place = {}; + int ret; + + if (obj->mm.madv == __I915_MADV_PURGED) + return; + + /* TTM's purge interface. Note that we might be reentering. */ + ret = ttm_bo_validate(bo, &place, &ctx); + if (!ret) { + obj->write_domain = 0; + obj->read_domains = 0; + i915_ttm_adjust_gem_after_move(obj); + i915_ttm_free_cached_io_st(obj); + obj->mm.madv = __I915_MADV_PURGED; + } +} + +static void i915_ttm_swap_notify(struct ttm_buffer_object *bo) +{ + struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo); + int ret = i915_ttm_move_notify(bo); + + GEM_WARN_ON(ret); + GEM_WARN_ON(obj->ttm.cached_io_st); + if (!ret && obj->mm.madv != I915_MADV_WILLNEED) + i915_ttm_purge(obj); +} + +static void i915_ttm_delete_mem_notify(struct ttm_buffer_object *bo) +{ + struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo); + + if (likely(obj)) { + /* This releases all gem object bindings to the backend. */ + i915_ttm_free_cached_io_st(obj); + __i915_gem_free_object(obj); + } +} + +static struct intel_memory_region * +i915_ttm_region(struct ttm_device *bdev, int ttm_mem_type) +{ + struct drm_i915_private *i915 = container_of(bdev, typeof(*i915), bdev); + + /* There's some room for optimization here... */ + GEM_BUG_ON(ttm_mem_type != I915_PL_SYSTEM && + ttm_mem_type < I915_PL_LMEM0); + if (ttm_mem_type == I915_PL_SYSTEM) + return intel_memory_region_lookup(i915, INTEL_MEMORY_SYSTEM, + 0); + + return intel_memory_region_lookup(i915, INTEL_MEMORY_LOCAL, + ttm_mem_type - I915_PL_LMEM0); +} + +static struct sg_table *i915_ttm_tt_get_st(struct ttm_tt *ttm) +{ + struct i915_ttm_tt *i915_tt = container_of(ttm, typeof(*i915_tt), ttm); + struct scatterlist *sg; + struct sg_table *st; + int ret; + + if (i915_tt->cached_st) + return i915_tt->cached_st; + + st = kzalloc(sizeof(*st), GFP_KERNEL); + if (!st) + return ERR_PTR(-ENOMEM); + + sg = __sg_alloc_table_from_pages + (st, ttm->pages, ttm->num_pages, 0, + (unsigned long)ttm->num_pages << PAGE_SHIFT, + i915_sg_segment_size(), NULL, 0, GFP_KERNEL); + if (IS_ERR(sg)) { + kfree(st); + return ERR_CAST(sg); + } + + ret = dma_map_sgtable(i915_tt->dev, st, DMA_BIDIRECTIONAL, 0); + if (ret) { + sg_free_table(st); + kfree(st); + return ERR_PTR(ret); + } + + i915_tt->cached_st = st; + return st; +} + +static struct sg_table * +i915_ttm_resource_get_st(struct drm_i915_gem_object *obj, + struct ttm_resource *res) +{ + struct ttm_buffer_object *bo = i915_gem_to_ttm(obj); + + if (!gpu_binds_iomem(res)) + return i915_ttm_tt_get_st(bo->ttm); + + /* + * If CPU mapping differs, we need to add the ttm_tt pages to + * the resulting st. Might make sense for GGTT. + */ + GEM_WARN_ON(!cpu_maps_iomem(res)); + return intel_region_ttm_resource_to_st(obj->mm.region, res); +} + +static int i915_ttm_accel_move(struct ttm_buffer_object *bo, + struct ttm_resource *dst_mem, + struct sg_table *dst_st) +{ + struct drm_i915_private *i915 = container_of(bo->bdev, typeof(*i915), + bdev); + struct ttm_resource_manager *src_man = + ttm_manager_type(bo->bdev, bo->resource->mem_type); + struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo); + struct sg_table *src_st; + struct i915_request *rq; + struct ttm_tt *ttm = bo->ttm; + enum i915_cache_level src_level, dst_level; + int ret; + + if (!i915->gt.migrate.context) + return -EINVAL; + + dst_level = i915_ttm_cache_level(i915, dst_mem, ttm); + if (!ttm || !ttm_tt_is_populated(ttm)) { + if (bo->type == ttm_bo_type_kernel) + return -EINVAL; + + if (ttm && !(ttm->page_flags & TTM_PAGE_FLAG_ZERO_ALLOC)) + return 0; + + intel_engine_pm_get(i915->gt.migrate.context->engine); + ret = intel_context_migrate_clear(i915->gt.migrate.context, NULL, + dst_st->sgl, dst_level, + gpu_binds_iomem(dst_mem), + 0, &rq); + + if (!ret && rq) { + i915_request_wait(rq, 0, MAX_SCHEDULE_TIMEOUT); + i915_request_put(rq); + } + intel_engine_pm_put(i915->gt.migrate.context->engine); + } else { + src_st = src_man->use_tt ? i915_ttm_tt_get_st(ttm) : + obj->ttm.cached_io_st; + + src_level = i915_ttm_cache_level(i915, bo->resource, ttm); + intel_engine_pm_get(i915->gt.migrate.context->engine); + ret = intel_context_migrate_copy(i915->gt.migrate.context, + NULL, src_st->sgl, src_level, + gpu_binds_iomem(bo->resource), + dst_st->sgl, dst_level, + gpu_binds_iomem(dst_mem), + &rq); + if (!ret && rq) { + i915_request_wait(rq, 0, MAX_SCHEDULE_TIMEOUT); + i915_request_put(rq); + } + intel_engine_pm_put(i915->gt.migrate.context->engine); + } + + return ret; +} + +static int i915_ttm_move(struct ttm_buffer_object *bo, bool evict, + struct ttm_operation_ctx *ctx, + struct ttm_resource *dst_mem, + struct ttm_place *hop) +{ + struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo); + struct ttm_resource_manager *dst_man = + ttm_manager_type(bo->bdev, dst_mem->mem_type); + struct intel_memory_region *dst_reg, *src_reg; + union { + struct ttm_kmap_iter_tt tt; + struct ttm_kmap_iter_iomap io; + } _dst_iter, _src_iter; + struct ttm_kmap_iter *dst_iter, *src_iter; + struct sg_table *dst_st; + int ret; + + dst_reg = i915_ttm_region(bo->bdev, dst_mem->mem_type); + src_reg = i915_ttm_region(bo->bdev, bo->resource->mem_type); + GEM_BUG_ON(!dst_reg || !src_reg); + + /* Sync for now. We could do the actual copy async. */ + ret = ttm_bo_wait_ctx(bo, ctx); + if (ret) + return ret; + + ret = i915_ttm_move_notify(bo); + if (ret) + return ret; + + if (obj->mm.madv != I915_MADV_WILLNEED) { + i915_ttm_purge(obj); + ttm_resource_free(bo, &dst_mem); + return 0; + } + + /* Populate ttm with pages if needed. Typically system memory. */ + if (bo->ttm && (dst_man->use_tt || + (bo->ttm->page_flags & TTM_PAGE_FLAG_SWAPPED))) { + ret = ttm_tt_populate(bo->bdev, bo->ttm, ctx); + if (ret) + return ret; + } + + dst_st = i915_ttm_resource_get_st(obj, dst_mem); + if (IS_ERR(dst_st)) + return PTR_ERR(dst_st); + + ret = i915_ttm_accel_move(bo, dst_mem, dst_st); + if (ret) { + /* If we start mapping GGTT, we can no longer use man::use_tt here. */ + dst_iter = !cpu_maps_iomem(dst_mem) ? + ttm_kmap_iter_tt_init(&_dst_iter.tt, bo->ttm) : + ttm_kmap_iter_iomap_init(&_dst_iter.io, &dst_reg->iomap, + dst_st, dst_reg->region.start); + + src_iter = !cpu_maps_iomem(bo->resource) ? + ttm_kmap_iter_tt_init(&_src_iter.tt, bo->ttm) : + ttm_kmap_iter_iomap_init(&_src_iter.io, &src_reg->iomap, + obj->ttm.cached_io_st, + src_reg->region.start); + + ttm_move_memcpy(bo, dst_mem->num_pages, dst_iter, src_iter); + } + /* Below dst_mem becomes bo->resource. */ + ttm_bo_move_sync_cleanup(bo, dst_mem); + i915_ttm_adjust_domains_after_move(obj); + i915_ttm_free_cached_io_st(obj); + + if (gpu_binds_iomem(dst_mem) || cpu_maps_iomem(dst_mem)) { + obj->ttm.cached_io_st = dst_st; + obj->ttm.get_io_page.sg_pos = dst_st->sgl; + obj->ttm.get_io_page.sg_idx = 0; + } + + i915_ttm_adjust_gem_after_move(obj); + return 0; +} + +static int i915_ttm_io_mem_reserve(struct ttm_device *bdev, struct ttm_resource *mem) +{ + if (!cpu_maps_iomem(mem)) + return 0; + + mem->bus.caching = ttm_write_combined; + mem->bus.is_iomem = true; + + return 0; +} + +static unsigned long i915_ttm_io_mem_pfn(struct ttm_buffer_object *bo, + unsigned long page_offset) +{ + struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo); + unsigned long base = obj->mm.region->iomap.base - obj->mm.region->region.start; + struct scatterlist *sg; + unsigned int ofs; + + GEM_WARN_ON(bo->ttm); + + sg = __i915_gem_object_get_sg(obj, &obj->ttm.get_io_page, page_offset, &ofs, true); + + return ((base + sg_dma_address(sg)) >> PAGE_SHIFT) + ofs; +} + +static struct ttm_device_funcs i915_ttm_bo_driver = { + .ttm_tt_create = i915_ttm_tt_create, + .ttm_tt_unpopulate = i915_ttm_tt_unpopulate, + .ttm_tt_destroy = i915_ttm_tt_destroy, + .eviction_valuable = i915_ttm_eviction_valuable, + .evict_flags = i915_ttm_evict_flags, + .move = i915_ttm_move, + .swap_notify = i915_ttm_swap_notify, + .delete_mem_notify = i915_ttm_delete_mem_notify, + .io_mem_reserve = i915_ttm_io_mem_reserve, + .io_mem_pfn = i915_ttm_io_mem_pfn, +}; + +/** + * i915_ttm_driver - Return a pointer to the TTM device funcs + * + * Return: Pointer to statically allocated TTM device funcs. + */ +struct ttm_device_funcs *i915_ttm_driver(void) +{ + return &i915_ttm_bo_driver; +} + +static int __i915_ttm_get_pages(struct drm_i915_gem_object *obj, + struct ttm_placement *placement) +{ + struct ttm_buffer_object *bo = i915_gem_to_ttm(obj); + struct ttm_operation_ctx ctx = { + .interruptible = true, + .no_wait_gpu = false, + }; + struct sg_table *st; + int real_num_busy; + int ret; + + /* First try only the requested placement. No eviction. */ + real_num_busy = fetch_and_zero(&placement->num_busy_placement); + ret = ttm_bo_validate(bo, placement, &ctx); + if (ret) { + ret = i915_ttm_err_to_gem(ret); + /* + * Anything that wants to restart the operation gets to + * do that. + */ + if (ret == -EDEADLK || ret == -EINTR || ret == -ERESTARTSYS || + ret == -EAGAIN) + return ret; + + /* + * If the initial attempt fails, allow all accepted placements, + * evicting if necessary. + */ + placement->num_busy_placement = real_num_busy; + ret = ttm_bo_validate(bo, placement, &ctx); + if (ret) + return i915_ttm_err_to_gem(ret); + } + + i915_ttm_adjust_lru(obj); + if (bo->ttm && !ttm_tt_is_populated(bo->ttm)) { + ret = ttm_tt_populate(bo->bdev, bo->ttm, &ctx); + if (ret) + return ret; + + i915_ttm_adjust_domains_after_move(obj); + i915_ttm_adjust_gem_after_move(obj); + } + + if (!i915_gem_object_has_pages(obj)) { + /* Object either has a page vector or is an iomem object */ + st = bo->ttm ? i915_ttm_tt_get_st(bo->ttm) : obj->ttm.cached_io_st; + if (IS_ERR(st)) + return PTR_ERR(st); + + __i915_gem_object_set_pages(obj, st, i915_sg_dma_sizes(st->sgl)); + } + + return ret; +} + +static int i915_ttm_get_pages(struct drm_i915_gem_object *obj) +{ + struct ttm_place requested, busy[I915_TTM_MAX_PLACEMENTS]; + struct ttm_placement placement; + + GEM_BUG_ON(obj->mm.n_placements > I915_TTM_MAX_PLACEMENTS); + + /* Move to the requested placement. */ + i915_ttm_placement_from_obj(obj, &requested, busy, &placement); + + return __i915_ttm_get_pages(obj, &placement); +} + +/** + * DOC: Migration vs eviction + * + * GEM migration may not be the same as TTM migration / eviction. If + * the TTM core decides to evict an object it may be evicted to a + * TTM memory type that is not in the object's allowable GEM regions, or + * in fact theoretically to a TTM memory type that doesn't correspond to + * a GEM memory region. In that case the object's GEM region is not + * updated, and the data is migrated back to the GEM region at + * get_pages time. TTM may however set up CPU ptes to the object even + * when it is evicted. + * Gem forced migration using the i915_ttm_migrate() op, is allowed even + * to regions that are not in the object's list of allowable placements. + */ +static int i915_ttm_migrate(struct drm_i915_gem_object *obj, + struct intel_memory_region *mr) +{ + struct ttm_place requested; + struct ttm_placement placement; + int ret; + + i915_ttm_place_from_region(mr, &requested, obj->flags); + placement.num_placement = 1; + placement.num_busy_placement = 1; + placement.placement = &requested; + placement.busy_placement = &requested; + + ret = __i915_ttm_get_pages(obj, &placement); + if (ret) + return ret; + + /* + * Reinitialize the region bindings. This is primarily + * required for objects where the new region is not in + * its allowable placements. + */ + if (obj->mm.region != mr) { + i915_gem_object_release_memory_region(obj); + i915_gem_object_init_memory_region(obj, mr); + } + + return 0; +} + +static void i915_ttm_put_pages(struct drm_i915_gem_object *obj, + struct sg_table *st) +{ + /* + * We're currently not called from a shrinker, so put_pages() + * typically means the object is about to destroyed, or called + * from move_notify(). So just avoid doing much for now. + * If the object is not destroyed next, The TTM eviction logic + * and shrinkers will move it out if needed. + */ + + i915_ttm_adjust_lru(obj); +} + +static void i915_ttm_adjust_lru(struct drm_i915_gem_object *obj) +{ + struct ttm_buffer_object *bo = i915_gem_to_ttm(obj); + + /* + * Don't manipulate the TTM LRUs while in TTM bo destruction. + * We're called through i915_ttm_delete_mem_notify(). + */ + if (!kref_read(&bo->kref)) + return; + + /* + * Put on the correct LRU list depending on the MADV status + */ + spin_lock(&bo->bdev->lru_lock); + if (obj->mm.madv != I915_MADV_WILLNEED) { + bo->priority = I915_TTM_PRIO_PURGE; + } else if (!i915_gem_object_has_pages(obj)) { + if (bo->priority < I915_TTM_PRIO_HAS_PAGES) + bo->priority = I915_TTM_PRIO_HAS_PAGES; + } else { + if (bo->priority > I915_TTM_PRIO_NO_PAGES) + bo->priority = I915_TTM_PRIO_NO_PAGES; + } + + ttm_bo_move_to_lru_tail(bo, bo->resource, NULL); + spin_unlock(&bo->bdev->lru_lock); +} + +/* + * TTM-backed gem object destruction requires some clarification. + * Basically we have two possibilities here. We can either rely on the + * i915 delayed destruction and put the TTM object when the object + * is idle. This would be detected by TTM which would bypass the + * TTM delayed destroy handling. The other approach is to put the TTM + * object early and rely on the TTM destroyed handling, and then free + * the leftover parts of the GEM object once TTM's destroyed list handling is + * complete. For now, we rely on the latter for two reasons: + * a) TTM can evict an object even when it's on the delayed destroy list, + * which in theory allows for complete eviction. + * b) There is work going on in TTM to allow freeing an object even when + * it's not idle, and using the TTM destroyed list handling could help us + * benefit from that. + */ +static void i915_ttm_delayed_free(struct drm_i915_gem_object *obj) +{ + if (obj->ttm.created) { + ttm_bo_put(i915_gem_to_ttm(obj)); + } else { + __i915_gem_free_object(obj); + call_rcu(&obj->rcu, __i915_gem_free_object_rcu); + } +} + +static vm_fault_t vm_fault_ttm(struct vm_fault *vmf) +{ + struct vm_area_struct *area = vmf->vma; + struct drm_i915_gem_object *obj = + i915_ttm_to_gem(area->vm_private_data); + + /* Sanity check that we allow writing into this object */ + if (unlikely(i915_gem_object_is_readonly(obj) && + area->vm_flags & VM_WRITE)) + return VM_FAULT_SIGBUS; + + return ttm_bo_vm_fault(vmf); +} + +static int +vm_access_ttm(struct vm_area_struct *area, unsigned long addr, + void *buf, int len, int write) +{ + struct drm_i915_gem_object *obj = + i915_ttm_to_gem(area->vm_private_data); + + if (i915_gem_object_is_readonly(obj) && write) + return -EACCES; + + return ttm_bo_vm_access(area, addr, buf, len, write); +} + +static void ttm_vm_open(struct vm_area_struct *vma) +{ + struct drm_i915_gem_object *obj = + i915_ttm_to_gem(vma->vm_private_data); + + GEM_BUG_ON(!obj); + i915_gem_object_get(obj); +} + +static void ttm_vm_close(struct vm_area_struct *vma) +{ + struct drm_i915_gem_object *obj = + i915_ttm_to_gem(vma->vm_private_data); + + GEM_BUG_ON(!obj); + i915_gem_object_put(obj); +} + +static const struct vm_operations_struct vm_ops_ttm = { + .fault = vm_fault_ttm, + .access = vm_access_ttm, + .open = ttm_vm_open, + .close = ttm_vm_close, +}; + +static u64 i915_ttm_mmap_offset(struct drm_i915_gem_object *obj) +{ + /* The ttm_bo must be allocated with I915_BO_ALLOC_USER */ + GEM_BUG_ON(!drm_mm_node_allocated(&obj->base.vma_node.vm_node)); + + return drm_vma_node_offset_addr(&obj->base.vma_node); +} + +static const struct drm_i915_gem_object_ops i915_gem_ttm_obj_ops = { + .name = "i915_gem_object_ttm", + + .get_pages = i915_ttm_get_pages, + .put_pages = i915_ttm_put_pages, + .truncate = i915_ttm_purge, + .adjust_lru = i915_ttm_adjust_lru, + .delayed_free = i915_ttm_delayed_free, + .migrate = i915_ttm_migrate, + .mmap_offset = i915_ttm_mmap_offset, + .mmap_ops = &vm_ops_ttm, +}; + +void i915_ttm_bo_destroy(struct ttm_buffer_object *bo) +{ + struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo); + + i915_gem_object_release_memory_region(obj); + mutex_destroy(&obj->ttm.get_io_page.lock); + if (obj->ttm.created) + call_rcu(&obj->rcu, __i915_gem_free_object_rcu); +} + +/** + * __i915_gem_ttm_object_init - Initialize a ttm-backed i915 gem object + * @mem: The initial memory region for the object. + * @obj: The gem object. + * @size: Object size in bytes. + * @flags: gem object flags. + * + * Return: 0 on success, negative error code on failure. + */ +int __i915_gem_ttm_object_init(struct intel_memory_region *mem, + struct drm_i915_gem_object *obj, + resource_size_t size, + resource_size_t page_size, + unsigned int flags) +{ + static struct lock_class_key lock_class; + struct drm_i915_private *i915 = mem->i915; + struct ttm_operation_ctx ctx = { + .interruptible = true, + .no_wait_gpu = false, + }; + enum ttm_bo_type bo_type; + int ret; + + drm_gem_private_object_init(&i915->drm, &obj->base, size); + i915_gem_object_init(obj, &i915_gem_ttm_obj_ops, &lock_class, flags); + i915_gem_object_init_memory_region(obj, mem); + i915_gem_object_make_unshrinkable(obj); + INIT_RADIX_TREE(&obj->ttm.get_io_page.radix, GFP_KERNEL | __GFP_NOWARN); + mutex_init(&obj->ttm.get_io_page.lock); + bo_type = (obj->flags & I915_BO_ALLOC_USER) ? ttm_bo_type_device : + ttm_bo_type_kernel; + + obj->base.vma_node.driver_private = i915_gem_to_ttm(obj); + + /* Forcing the page size is kernel internal only */ + GEM_BUG_ON(page_size && obj->mm.n_placements); + + /* + * If this function fails, it will call the destructor, but + * our caller still owns the object. So no freeing in the + * destructor until obj->ttm.created is true. + * Similarly, in delayed_destroy, we can't call ttm_bo_put() + * until successful initialization. + */ + ret = ttm_bo_init_reserved(&i915->bdev, i915_gem_to_ttm(obj), size, + bo_type, &i915_sys_placement, + page_size >> PAGE_SHIFT, + &ctx, NULL, NULL, i915_ttm_bo_destroy); + if (ret) + return i915_ttm_err_to_gem(ret); + + obj->ttm.created = true; + i915_ttm_adjust_domains_after_move(obj); + i915_ttm_adjust_gem_after_move(obj); + i915_gem_object_unlock(obj); + + return 0; +} + +static const struct intel_memory_region_ops ttm_system_region_ops = { + .init_object = __i915_gem_ttm_object_init, +}; + +struct intel_memory_region * +i915_gem_ttm_system_setup(struct drm_i915_private *i915, + u16 type, u16 instance) +{ + struct intel_memory_region *mr; + + mr = intel_memory_region_create(i915, 0, + totalram_pages() << PAGE_SHIFT, + PAGE_SIZE, 0, + type, instance, + &ttm_system_region_ops); + if (IS_ERR(mr)) + return mr; + + intel_memory_region_set_name(mr, "system-ttm"); + return mr; +} diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.h b/drivers/gpu/drm/i915/gem/i915_gem_ttm.h new file mode 100644 index 000000000000..40927f67b6d9 --- /dev/null +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.h @@ -0,0 +1,49 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2021 Intel Corporation + */ +#ifndef _I915_GEM_TTM_H_ +#define _I915_GEM_TTM_H_ + +#include "gem/i915_gem_object_types.h" + +/** + * i915_gem_to_ttm - Convert a struct drm_i915_gem_object to a + * struct ttm_buffer_object. + * @obj: Pointer to the gem object. + * + * Return: Pointer to the embedded struct ttm_buffer_object. + */ +static inline struct ttm_buffer_object * +i915_gem_to_ttm(struct drm_i915_gem_object *obj) +{ + return &obj->__do_not_access; +} + +/* + * i915 ttm gem object destructor. Internal use only. + */ +void i915_ttm_bo_destroy(struct ttm_buffer_object *bo); + +/** + * i915_ttm_to_gem - Convert a struct ttm_buffer_object to an embedding + * struct drm_i915_gem_object. + * + * Return: Pointer to the embedding struct ttm_buffer_object, or NULL + * if the object was not an i915 ttm object. + */ +static inline struct drm_i915_gem_object * +i915_ttm_to_gem(struct ttm_buffer_object *bo) +{ + if (GEM_WARN_ON(bo->destroy != i915_ttm_bo_destroy)) + return NULL; + + return container_of(bo, struct drm_i915_gem_object, __do_not_access); +} + +int __i915_gem_ttm_object_init(struct intel_memory_region *mem, + struct drm_i915_gem_object *obj, + resource_size_t size, + resource_size_t page_size, + unsigned int flags); +#endif diff --git a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c index 7487bab11f0b..468a7a617fbf 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c @@ -67,11 +67,11 @@ static bool i915_gem_userptr_invalidate(struct mmu_interval_notifier *mni, if (!mmu_notifier_range_blockable(range)) return false; - spin_lock(&i915->mm.notifier_lock); + write_lock(&i915->mm.notifier_lock); mmu_interval_set_seq(mni, cur_seq); - spin_unlock(&i915->mm.notifier_lock); + write_unlock(&i915->mm.notifier_lock); /* * We don't wait when the process is exiting. This is valid @@ -107,16 +107,15 @@ i915_gem_userptr_init__mmu_notifier(struct drm_i915_gem_object *obj) static void i915_gem_object_userptr_drop_ref(struct drm_i915_gem_object *obj) { - struct drm_i915_private *i915 = to_i915(obj->base.dev); struct page **pvec = NULL; - spin_lock(&i915->mm.notifier_lock); + assert_object_held_shared(obj); + if (!--obj->userptr.page_ref) { pvec = obj->userptr.pvec; obj->userptr.pvec = NULL; } GEM_BUG_ON(obj->userptr.page_ref < 0); - spin_unlock(&i915->mm.notifier_lock); if (pvec) { const unsigned long num_pages = obj->base.size >> PAGE_SHIFT; @@ -128,7 +127,6 @@ static void i915_gem_object_userptr_drop_ref(struct drm_i915_gem_object *obj) static int i915_gem_userptr_get_pages(struct drm_i915_gem_object *obj) { - struct drm_i915_private *i915 = to_i915(obj->base.dev); const unsigned long num_pages = obj->base.size >> PAGE_SHIFT; unsigned int max_segment = i915_sg_segment_size(); struct sg_table *st; @@ -141,16 +139,13 @@ static int i915_gem_userptr_get_pages(struct drm_i915_gem_object *obj) if (!st) return -ENOMEM; - spin_lock(&i915->mm.notifier_lock); - if (GEM_WARN_ON(!obj->userptr.page_ref)) { - spin_unlock(&i915->mm.notifier_lock); - ret = -EFAULT; + if (!obj->userptr.page_ref) { + ret = -EAGAIN; goto err_free; } obj->userptr.page_ref++; pvec = obj->userptr.pvec; - spin_unlock(&i915->mm.notifier_lock); alloc_table: sg = __sg_alloc_table_from_pages(st, pvec, num_pages, 0, @@ -241,7 +236,7 @@ i915_gem_userptr_put_pages(struct drm_i915_gem_object *obj, i915_gem_object_userptr_drop_ref(obj); } -static int i915_gem_object_userptr_unbind(struct drm_i915_gem_object *obj, bool get_pages) +static int i915_gem_object_userptr_unbind(struct drm_i915_gem_object *obj) { struct sg_table *pages; int err; @@ -259,15 +254,11 @@ static int i915_gem_object_userptr_unbind(struct drm_i915_gem_object *obj, bool if (!IS_ERR_OR_NULL(pages)) i915_gem_userptr_put_pages(obj, pages); - if (get_pages) - err = ____i915_gem_object_get_pages(obj); - return err; } int i915_gem_object_userptr_submit_init(struct drm_i915_gem_object *obj) { - struct drm_i915_private *i915 = to_i915(obj->base.dev); const unsigned long num_pages = obj->base.size >> PAGE_SHIFT; struct page **pvec; unsigned int gup_flags = 0; @@ -277,39 +268,22 @@ int i915_gem_object_userptr_submit_init(struct drm_i915_gem_object *obj) if (obj->userptr.notifier.mm != current->mm) return -EFAULT; + notifier_seq = mmu_interval_read_begin(&obj->userptr.notifier); + ret = i915_gem_object_lock_interruptible(obj, NULL); if (ret) return ret; - /* optimistically try to preserve current pages while unlocked */ - if (i915_gem_object_has_pages(obj) && - !mmu_interval_check_retry(&obj->userptr.notifier, - obj->userptr.notifier_seq)) { - spin_lock(&i915->mm.notifier_lock); - if (obj->userptr.pvec && - !mmu_interval_read_retry(&obj->userptr.notifier, - obj->userptr.notifier_seq)) { - obj->userptr.page_ref++; - - /* We can keep using the current binding, this is the fastpath */ - ret = 1; - } - spin_unlock(&i915->mm.notifier_lock); + if (notifier_seq == obj->userptr.notifier_seq && obj->userptr.pvec) { + i915_gem_object_unlock(obj); + return 0; } - if (!ret) { - /* Make sure userptr is unbound for next attempt, so we don't use stale pages. */ - ret = i915_gem_object_userptr_unbind(obj, false); - } + ret = i915_gem_object_userptr_unbind(obj); i915_gem_object_unlock(obj); - if (ret < 0) + if (ret) return ret; - if (ret > 0) - return 0; - - notifier_seq = mmu_interval_read_begin(&obj->userptr.notifier); - pvec = kvmalloc_array(num_pages, sizeof(struct page *), GFP_KERNEL); if (!pvec) return -ENOMEM; @@ -329,7 +303,9 @@ int i915_gem_object_userptr_submit_init(struct drm_i915_gem_object *obj) } ret = 0; - spin_lock(&i915->mm.notifier_lock); + ret = i915_gem_object_lock_interruptible(obj, NULL); + if (ret) + goto out; if (mmu_interval_read_retry(&obj->userptr.notifier, !obj->userptr.page_ref ? notifier_seq : @@ -341,12 +317,14 @@ int i915_gem_object_userptr_submit_init(struct drm_i915_gem_object *obj) if (!obj->userptr.page_ref++) { obj->userptr.pvec = pvec; obj->userptr.notifier_seq = notifier_seq; - pvec = NULL; + ret = ____i915_gem_object_get_pages(obj); } + obj->userptr.page_ref--; + out_unlock: - spin_unlock(&i915->mm.notifier_lock); + i915_gem_object_unlock(obj); out: if (pvec) { @@ -369,11 +347,6 @@ int i915_gem_object_userptr_submit_done(struct drm_i915_gem_object *obj) return 0; } -void i915_gem_object_userptr_submit_fini(struct drm_i915_gem_object *obj) -{ - i915_gem_object_userptr_drop_ref(obj); -} - int i915_gem_object_userptr_validate(struct drm_i915_gem_object *obj) { int err; @@ -396,7 +369,6 @@ int i915_gem_object_userptr_validate(struct drm_i915_gem_object *obj) i915_gem_object_unlock(obj); } - i915_gem_object_userptr_submit_fini(obj); return err; } @@ -450,6 +422,34 @@ static const struct drm_i915_gem_object_ops i915_gem_userptr_ops = { #endif +static int +probe_range(struct mm_struct *mm, unsigned long addr, unsigned long len) +{ + const unsigned long end = addr + len; + struct vm_area_struct *vma; + int ret = -EFAULT; + + mmap_read_lock(mm); + for (vma = find_vma(mm, addr); vma; vma = vma->vm_next) { + /* Check for holes, note that we also update the addr below */ + if (vma->vm_start > addr) + break; + + if (vma->vm_flags & (VM_PFNMAP | VM_MIXEDMAP)) + break; + + if (vma->vm_end >= end) { + ret = 0; + break; + } + + addr = vma->vm_end; + } + mmap_read_unlock(mm); + + return ret; +} + /* * Creates a new mm object that wraps some normal memory from the process * context - user memory. @@ -505,7 +505,8 @@ i915_gem_userptr_ioctl(struct drm_device *dev, } if (args->flags & ~(I915_USERPTR_READ_ONLY | - I915_USERPTR_UNSYNCHRONIZED)) + I915_USERPTR_UNSYNCHRONIZED | + I915_USERPTR_PROBE)) return -EINVAL; if (i915_gem_object_size_2big(args->user_size)) @@ -532,14 +533,24 @@ i915_gem_userptr_ioctl(struct drm_device *dev, return -ENODEV; } + if (args->flags & I915_USERPTR_PROBE) { + /* + * Check that the range pointed to represents real struct + * pages and not iomappings (at this moment in time!) + */ + ret = probe_range(current->mm, args->user_ptr, args->user_size); + if (ret) + return ret; + } + #ifdef CONFIG_MMU_NOTIFIER obj = i915_gem_object_alloc(); if (obj == NULL) return -ENOMEM; drm_gem_private_object_init(dev, &obj->base, args->user_size); - i915_gem_object_init(obj, &i915_gem_userptr_ops, &lock_class, - I915_BO_ALLOC_STRUCT_PAGE); + i915_gem_object_init(obj, &i915_gem_userptr_ops, &lock_class, 0); + obj->mem_flags = I915_BO_FLAG_STRUCT_PAGE; obj->read_domains = I915_GEM_DOMAIN_CPU; obj->write_domain = I915_GEM_DOMAIN_CPU; i915_gem_object_set_cache_coherency(obj, I915_CACHE_LLC); @@ -572,7 +583,7 @@ i915_gem_userptr_ioctl(struct drm_device *dev, int i915_gem_init_userptr(struct drm_i915_private *dev_priv) { #ifdef CONFIG_MMU_NOTIFIER - spin_lock_init(&dev_priv->mm.notifier_lock); + rwlock_init(&dev_priv->mm.notifier_lock); #endif return 0; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_wait.c b/drivers/gpu/drm/i915/gem/i915_gem_wait.c index 1e97520c62b2..f909aaa09d9c 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_wait.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_wait.c @@ -104,8 +104,8 @@ static void fence_set_priority(struct dma_fence *fence, engine = rq->engine; rcu_read_lock(); /* RCU serialisation for set-wedged protection */ - if (engine->schedule) - engine->schedule(rq, attr); + if (engine->sched_engine->schedule) + engine->sched_engine->schedule(rq, attr); rcu_read_unlock(); } @@ -290,3 +290,22 @@ i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file) i915_gem_object_put(obj); return ret; } + +/** + * i915_gem_object_wait_migration - Sync an accelerated migration operation + * @obj: The migrating object. + * @flags: waiting flags. Currently supports only I915_WAIT_INTERRUPTIBLE. + * + * Wait for any pending async migration operation on the object, + * whether it's explicitly (i915_gem_object_migrate()) or implicitly + * (swapin, initial clearing) initiated. + * + * Return: 0 if successful, -ERESTARTSYS if a signal was hit during waiting. + */ +int i915_gem_object_wait_migration(struct drm_i915_gem_object *obj, + unsigned int flags) +{ + might_sleep(); + /* NOP for now. */ + return 0; +} diff --git a/drivers/gpu/drm/i915/gem/selftests/huge_gem_object.c b/drivers/gpu/drm/i915/gem/selftests/huge_gem_object.c index 0c8ecfdf5405..f963b8e1e37b 100644 --- a/drivers/gpu/drm/i915/gem/selftests/huge_gem_object.c +++ b/drivers/gpu/drm/i915/gem/selftests/huge_gem_object.c @@ -114,8 +114,8 @@ huge_gem_object(struct drm_i915_private *i915, return ERR_PTR(-ENOMEM); drm_gem_private_object_init(&i915->drm, &obj->base, dma_size); - i915_gem_object_init(obj, &huge_ops, &lock_class, - I915_BO_ALLOC_STRUCT_PAGE); + i915_gem_object_init(obj, &huge_ops, &lock_class, 0); + obj->mem_flags |= I915_BO_FLAG_STRUCT_PAGE; obj->read_domains = I915_GEM_DOMAIN_CPU; obj->write_domain = I915_GEM_DOMAIN_CPU; diff --git a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c index dadd485bc52f..a094f3ce1a90 100644 --- a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c +++ b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c @@ -167,9 +167,8 @@ huge_pages_object(struct drm_i915_private *i915, return ERR_PTR(-ENOMEM); drm_gem_private_object_init(&i915->drm, &obj->base, size); - i915_gem_object_init(obj, &huge_page_ops, &lock_class, - I915_BO_ALLOC_STRUCT_PAGE); - + i915_gem_object_init(obj, &huge_page_ops, &lock_class, 0); + obj->mem_flags |= I915_BO_FLAG_STRUCT_PAGE; i915_gem_object_set_volatile(obj); obj->write_domain = I915_GEM_DOMAIN_CPU; @@ -497,7 +496,8 @@ static int igt_mock_memory_region_huge_pages(void *arg) int i; for (i = 0; i < ARRAY_SIZE(flags); ++i) { - obj = i915_gem_object_create_region(mem, page_size, + obj = i915_gem_object_create_region(mem, + page_size, page_size, flags[i]); if (IS_ERR(obj)) { err = PTR_ERR(obj); diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c index 176e6b22f87f..ecbcbb86ae1e 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c @@ -5,6 +5,7 @@ #include "i915_selftest.h" +#include "gt/intel_context.h" #include "gt/intel_engine_user.h" #include "gt/intel_gt.h" #include "gt/intel_gpu_commands.h" @@ -16,118 +17,6 @@ #include "huge_gem_object.h" #include "mock_context.h" -static int __igt_client_fill(struct intel_engine_cs *engine) -{ - struct intel_context *ce = engine->kernel_context; - struct drm_i915_gem_object *obj; - I915_RND_STATE(prng); - IGT_TIMEOUT(end); - u32 *vaddr; - int err = 0; - - intel_engine_pm_get(engine); - do { - const u32 max_block_size = S16_MAX * PAGE_SIZE; - u32 sz = min_t(u64, ce->vm->total >> 4, prandom_u32_state(&prng)); - u32 phys_sz = sz % (max_block_size + 1); - u32 val = prandom_u32_state(&prng); - u32 i; - - sz = round_up(sz, PAGE_SIZE); - phys_sz = round_up(phys_sz, PAGE_SIZE); - - pr_debug("%s with phys_sz= %x, sz=%x, val=%x\n", __func__, - phys_sz, sz, val); - - obj = huge_gem_object(engine->i915, phys_sz, sz); - if (IS_ERR(obj)) { - err = PTR_ERR(obj); - goto err_flush; - } - - vaddr = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WB); - if (IS_ERR(vaddr)) { - err = PTR_ERR(vaddr); - goto err_put; - } - - /* - * XXX: The goal is move this to get_pages, so try to dirty the - * CPU cache first to check that we do the required clflush - * before scheduling the blt for !llc platforms. This matches - * some version of reality where at get_pages the pages - * themselves may not yet be coherent with the GPU(swap-in). If - * we are missing the flush then we should see the stale cache - * values after we do the set_to_cpu_domain and pick it up as a - * test failure. - */ - memset32(vaddr, val ^ 0xdeadbeaf, - huge_gem_object_phys_size(obj) / sizeof(u32)); - - if (!(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE)) - obj->cache_dirty = true; - - err = i915_gem_schedule_fill_pages_blt(obj, ce, obj->mm.pages, - &obj->mm.page_sizes, - val); - if (err) - goto err_unpin; - - i915_gem_object_lock(obj, NULL); - err = i915_gem_object_set_to_cpu_domain(obj, false); - i915_gem_object_unlock(obj); - if (err) - goto err_unpin; - - for (i = 0; i < huge_gem_object_phys_size(obj) / sizeof(u32); ++i) { - if (vaddr[i] != val) { - pr_err("vaddr[%u]=%x, expected=%x\n", i, - vaddr[i], val); - err = -EINVAL; - goto err_unpin; - } - } - - i915_gem_object_unpin_map(obj); - i915_gem_object_put(obj); - } while (!time_after(jiffies, end)); - - goto err_flush; - -err_unpin: - i915_gem_object_unpin_map(obj); -err_put: - i915_gem_object_put(obj); -err_flush: - if (err == -ENOMEM) - err = 0; - intel_engine_pm_put(engine); - - return err; -} - -static int igt_client_fill(void *arg) -{ - int inst = 0; - - do { - struct intel_engine_cs *engine; - int err; - - engine = intel_engine_lookup_user(arg, - I915_ENGINE_CLASS_COPY, - inst++); - if (!engine) - return 0; - - err = __igt_client_fill(engine); - if (err == -ENOMEM) - err = 0; - if (err) - return err; - } while (1); -} - #define WIDTH 512 #define HEIGHT 32 @@ -693,7 +582,6 @@ static int igt_client_tiled_blits(void *arg) int i915_gem_client_blt_live_selftests(struct drm_i915_private *i915) { static const struct i915_subtest tests[] = { - SUBTEST(igt_client_fill), SUBTEST(igt_client_tiled_blits), }; diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c index dbcfa28a9d91..8eb5050f8cb3 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c @@ -680,7 +680,7 @@ static int igt_ctx_exec(void *arg) struct i915_gem_context *ctx; struct intel_context *ce; - ctx = kernel_context(i915); + ctx = kernel_context(i915, NULL); if (IS_ERR(ctx)) { err = PTR_ERR(ctx); goto out_file; @@ -813,16 +813,12 @@ static int igt_shared_ctx_exec(void *arg) struct i915_gem_context *ctx; struct intel_context *ce; - ctx = kernel_context(i915); + ctx = kernel_context(i915, ctx_vm(parent)); if (IS_ERR(ctx)) { err = PTR_ERR(ctx); goto out_test; } - mutex_lock(&ctx->mutex); - __assign_ppgtt(ctx, ctx_vm(parent)); - mutex_unlock(&ctx->mutex); - ce = i915_gem_context_get_engine(ctx, engine->legacy_idx); GEM_BUG_ON(IS_ERR(ce)); @@ -1875,125 +1871,6 @@ out_file: return err; } -static bool skip_unused_engines(struct intel_context *ce, void *data) -{ - return !ce->state; -} - -static void mock_barrier_task(void *data) -{ - unsigned int *counter = data; - - ++*counter; -} - -static int mock_context_barrier(void *arg) -{ -#undef pr_fmt -#define pr_fmt(x) "context_barrier_task():" # x - struct drm_i915_private *i915 = arg; - struct i915_gem_context *ctx; - struct i915_request *rq; - unsigned int counter; - int err; - - /* - * The context barrier provides us with a callback after it emits - * a request; useful for retiring old state after loading new. - */ - - ctx = mock_context(i915, "mock"); - if (!ctx) - return -ENOMEM; - - counter = 0; - err = context_barrier_task(ctx, 0, NULL, NULL, NULL, - mock_barrier_task, &counter); - if (err) { - pr_err("Failed at line %d, err=%d\n", __LINE__, err); - goto out; - } - if (counter == 0) { - pr_err("Did not retire immediately with 0 engines\n"); - err = -EINVAL; - goto out; - } - - counter = 0; - err = context_barrier_task(ctx, ALL_ENGINES, skip_unused_engines, - NULL, NULL, mock_barrier_task, &counter); - if (err) { - pr_err("Failed at line %d, err=%d\n", __LINE__, err); - goto out; - } - if (counter == 0) { - pr_err("Did not retire immediately for all unused engines\n"); - err = -EINVAL; - goto out; - } - - rq = igt_request_alloc(ctx, i915->gt.engine[RCS0]); - if (IS_ERR(rq)) { - pr_err("Request allocation failed!\n"); - goto out; - } - i915_request_add(rq); - - counter = 0; - context_barrier_inject_fault = BIT(RCS0); - err = context_barrier_task(ctx, ALL_ENGINES, NULL, NULL, NULL, - mock_barrier_task, &counter); - context_barrier_inject_fault = 0; - if (err == -ENXIO) - err = 0; - else - pr_err("Did not hit fault injection!\n"); - if (counter != 0) { - pr_err("Invoked callback on error!\n"); - err = -EIO; - } - if (err) - goto out; - - counter = 0; - err = context_barrier_task(ctx, ALL_ENGINES, skip_unused_engines, - NULL, NULL, mock_barrier_task, &counter); - if (err) { - pr_err("Failed at line %d, err=%d\n", __LINE__, err); - goto out; - } - mock_device_flush(i915); - if (counter == 0) { - pr_err("Did not retire on each active engines\n"); - err = -EINVAL; - goto out; - } - -out: - mock_context_close(ctx); - return err; -#undef pr_fmt -#define pr_fmt(x) x -} - -int i915_gem_context_mock_selftests(void) -{ - static const struct i915_subtest tests[] = { - SUBTEST(mock_context_barrier), - }; - struct drm_i915_private *i915; - int err; - - i915 = mock_gem_device(); - if (!i915) - return -ENOMEM; - - err = i915_subtests(tests, i915); - - mock_destroy_device(i915); - return err; -} - int i915_gem_context_live_selftests(struct drm_i915_private *i915) { static const struct i915_subtest tests[] = { diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c index dd74bc09ec88..ffae7df5e4d7 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c @@ -35,7 +35,7 @@ static int igt_dmabuf_export(void *arg) static int igt_dmabuf_import_self(void *arg) { struct drm_i915_private *i915 = arg; - struct drm_i915_gem_object *obj; + struct drm_i915_gem_object *obj, *import_obj; struct drm_gem_object *import; struct dma_buf *dmabuf; int err; @@ -65,10 +65,19 @@ static int igt_dmabuf_import_self(void *arg) err = -EINVAL; goto out_import; } + import_obj = to_intel_bo(import); + + i915_gem_object_lock(import_obj, NULL); + err = __i915_gem_object_get_pages(import_obj); + i915_gem_object_unlock(import_obj); + if (err) { + pr_err("Same object dma-buf get_pages failed!\n"); + goto out_import; + } err = 0; out_import: - i915_gem_object_put(to_intel_bo(import)); + i915_gem_object_put(import_obj); out_dmabuf: dma_buf_put(dmabuf); out: @@ -76,6 +85,180 @@ out: return err; } +static int igt_dmabuf_import_same_driver_lmem(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct intel_memory_region *lmem = i915->mm.regions[INTEL_REGION_LMEM]; + struct drm_i915_gem_object *obj; + struct drm_gem_object *import; + struct dma_buf *dmabuf; + int err; + + if (!lmem) + return 0; + + force_different_devices = true; + + obj = __i915_gem_object_create_user(i915, PAGE_SIZE, &lmem, 1); + if (IS_ERR(obj)) { + pr_err("__i915_gem_object_create_user failed with err=%ld\n", + PTR_ERR(dmabuf)); + err = PTR_ERR(obj); + goto out_ret; + } + + dmabuf = i915_gem_prime_export(&obj->base, 0); + if (IS_ERR(dmabuf)) { + pr_err("i915_gem_prime_export failed with err=%ld\n", + PTR_ERR(dmabuf)); + err = PTR_ERR(dmabuf); + goto out; + } + + /* + * We expect an import of an LMEM-only object to fail with + * -EOPNOTSUPP because it can't be migrated to SMEM. + */ + import = i915_gem_prime_import(&i915->drm, dmabuf); + if (!IS_ERR(import)) { + drm_gem_object_put(import); + pr_err("i915_gem_prime_import succeeded when it shouldn't have\n"); + err = -EINVAL; + } else if (PTR_ERR(import) != -EOPNOTSUPP) { + pr_err("i915_gem_prime_import failed with the wrong err=%ld\n", + PTR_ERR(import)); + err = PTR_ERR(import); + } + + dma_buf_put(dmabuf); +out: + i915_gem_object_put(obj); +out_ret: + force_different_devices = false; + return err; +} + +static int igt_dmabuf_import_same_driver(struct drm_i915_private *i915, + struct intel_memory_region **regions, + unsigned int num_regions) +{ + struct drm_i915_gem_object *obj, *import_obj; + struct drm_gem_object *import; + struct dma_buf *dmabuf; + struct dma_buf_attachment *import_attach; + struct sg_table *st; + long timeout; + int err; + + force_different_devices = true; + + obj = __i915_gem_object_create_user(i915, PAGE_SIZE, + regions, num_regions); + if (IS_ERR(obj)) { + pr_err("__i915_gem_object_create_user failed with err=%ld\n", + PTR_ERR(dmabuf)); + err = PTR_ERR(obj); + goto out_ret; + } + + dmabuf = i915_gem_prime_export(&obj->base, 0); + if (IS_ERR(dmabuf)) { + pr_err("i915_gem_prime_export failed with err=%ld\n", + PTR_ERR(dmabuf)); + err = PTR_ERR(dmabuf); + goto out; + } + + import = i915_gem_prime_import(&i915->drm, dmabuf); + if (IS_ERR(import)) { + pr_err("i915_gem_prime_import failed with err=%ld\n", + PTR_ERR(import)); + err = PTR_ERR(import); + goto out_dmabuf; + } + + if (import == &obj->base) { + pr_err("i915_gem_prime_import reused gem object!\n"); + err = -EINVAL; + goto out_import; + } + + import_obj = to_intel_bo(import); + + i915_gem_object_lock(import_obj, NULL); + err = __i915_gem_object_get_pages(import_obj); + if (err) { + pr_err("Different objects dma-buf get_pages failed!\n"); + i915_gem_object_unlock(import_obj); + goto out_import; + } + + /* + * If the exported object is not in system memory, something + * weird is going on. TODO: When p2p is supported, this is no + * longer considered weird. + */ + if (obj->mm.region != i915->mm.regions[INTEL_REGION_SMEM]) { + pr_err("Exported dma-buf is not in system memory\n"); + err = -EINVAL; + } + + i915_gem_object_unlock(import_obj); + + /* Now try a fake an importer */ + import_attach = dma_buf_attach(dmabuf, obj->base.dev->dev); + if (IS_ERR(import_attach)) { + err = PTR_ERR(import_attach); + goto out_import; + } + + st = dma_buf_map_attachment(import_attach, DMA_BIDIRECTIONAL); + if (IS_ERR(st)) { + err = PTR_ERR(st); + goto out_detach; + } + + timeout = dma_resv_wait_timeout(dmabuf->resv, false, true, 5 * HZ); + if (!timeout) { + pr_err("dmabuf wait for exclusive fence timed out.\n"); + timeout = -ETIME; + } + err = timeout > 0 ? 0 : timeout; + dma_buf_unmap_attachment(import_attach, st, DMA_BIDIRECTIONAL); +out_detach: + dma_buf_detach(dmabuf, import_attach); +out_import: + i915_gem_object_put(import_obj); +out_dmabuf: + dma_buf_put(dmabuf); +out: + i915_gem_object_put(obj); +out_ret: + force_different_devices = false; + return err; +} + +static int igt_dmabuf_import_same_driver_smem(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct intel_memory_region *smem = i915->mm.regions[INTEL_REGION_SMEM]; + + return igt_dmabuf_import_same_driver(i915, &smem, 1); +} + +static int igt_dmabuf_import_same_driver_lmem_smem(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct intel_memory_region *regions[2]; + + if (!i915->mm.regions[INTEL_REGION_LMEM]) + return 0; + + regions[0] = i915->mm.regions[INTEL_REGION_LMEM]; + regions[1] = i915->mm.regions[INTEL_REGION_SMEM]; + return igt_dmabuf_import_same_driver(i915, regions, 2); +} + static int igt_dmabuf_import(void *arg) { struct drm_i915_private *i915 = arg; @@ -286,6 +469,9 @@ int i915_gem_dmabuf_live_selftests(struct drm_i915_private *i915) { static const struct i915_subtest tests[] = { SUBTEST(igt_dmabuf_export), + SUBTEST(igt_dmabuf_import_same_driver_lmem), + SUBTEST(igt_dmabuf_import_same_driver_smem), + SUBTEST(igt_dmabuf_import_same_driver_lmem_smem), }; return i915_subtests(tests, i915); diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_migrate.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_migrate.c new file mode 100644 index 000000000000..28a700f08b49 --- /dev/null +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_migrate.c @@ -0,0 +1,243 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2020-2021 Intel Corporation + */ + +#include "gt/intel_migrate.h" + +static int igt_fill_check_buffer(struct drm_i915_gem_object *obj, + bool fill) +{ + struct drm_i915_private *i915 = to_i915(obj->base.dev); + unsigned int i, count = obj->base.size / sizeof(u32); + enum i915_map_type map_type = + i915_coherent_map_type(i915, obj, false); + u32 *cur; + int err = 0; + + assert_object_held(obj); + cur = i915_gem_object_pin_map(obj, map_type); + if (IS_ERR(cur)) + return PTR_ERR(cur); + + if (fill) + for (i = 0; i < count; ++i) + *cur++ = i; + else + for (i = 0; i < count; ++i) + if (*cur++ != i) { + pr_err("Object content mismatch at location %d of %d\n", i, count); + err = -EINVAL; + break; + } + + i915_gem_object_unpin_map(obj); + + return err; +} + +static int igt_create_migrate(struct intel_gt *gt, enum intel_region_id src, + enum intel_region_id dst) +{ + struct drm_i915_private *i915 = gt->i915; + struct intel_memory_region *src_mr = i915->mm.regions[src]; + struct drm_i915_gem_object *obj; + struct i915_gem_ww_ctx ww; + int err = 0; + + GEM_BUG_ON(!src_mr); + + /* Switch object backing-store on create */ + obj = i915_gem_object_create_region(src_mr, PAGE_SIZE, 0, 0); + if (IS_ERR(obj)) + return PTR_ERR(obj); + + for_i915_gem_ww(&ww, err, true) { + err = i915_gem_object_lock(obj, &ww); + if (err) + continue; + + err = igt_fill_check_buffer(obj, true); + if (err) + continue; + + err = i915_gem_object_migrate(obj, &ww, dst); + if (err) + continue; + + err = i915_gem_object_pin_pages(obj); + if (err) + continue; + + if (i915_gem_object_can_migrate(obj, src)) + err = -EINVAL; + + i915_gem_object_unpin_pages(obj); + err = i915_gem_object_wait_migration(obj, true); + if (err) + continue; + + err = igt_fill_check_buffer(obj, false); + } + i915_gem_object_put(obj); + + return err; +} + +static int igt_smem_create_migrate(void *arg) +{ + return igt_create_migrate(arg, INTEL_REGION_LMEM, INTEL_REGION_SMEM); +} + +static int igt_lmem_create_migrate(void *arg) +{ + return igt_create_migrate(arg, INTEL_REGION_SMEM, INTEL_REGION_LMEM); +} + +static int igt_same_create_migrate(void *arg) +{ + return igt_create_migrate(arg, INTEL_REGION_LMEM, INTEL_REGION_LMEM); +} + +static int lmem_pages_migrate_one(struct i915_gem_ww_ctx *ww, + struct drm_i915_gem_object *obj) +{ + int err; + + err = i915_gem_object_lock(obj, ww); + if (err) + return err; + + if (i915_gem_object_is_lmem(obj)) { + err = i915_gem_object_migrate(obj, ww, INTEL_REGION_SMEM); + if (err) { + pr_err("Object failed migration to smem\n"); + if (err) + return err; + } + + if (i915_gem_object_is_lmem(obj)) { + pr_err("object still backed by lmem\n"); + err = -EINVAL; + } + + if (!i915_gem_object_has_struct_page(obj)) { + pr_err("object not backed by struct page\n"); + err = -EINVAL; + } + + } else { + err = i915_gem_object_migrate(obj, ww, INTEL_REGION_LMEM); + if (err) { + pr_err("Object failed migration to lmem\n"); + if (err) + return err; + } + + if (i915_gem_object_has_struct_page(obj)) { + pr_err("object still backed by struct page\n"); + err = -EINVAL; + } + + if (!i915_gem_object_is_lmem(obj)) { + pr_err("object not backed by lmem\n"); + err = -EINVAL; + } + } + + return err; +} + +static int igt_lmem_pages_migrate(void *arg) +{ + struct intel_gt *gt = arg; + struct drm_i915_private *i915 = gt->i915; + struct drm_i915_gem_object *obj; + struct i915_gem_ww_ctx ww; + struct i915_request *rq; + int err; + int i; + + /* From LMEM to shmem and back again */ + + obj = i915_gem_object_create_lmem(i915, SZ_2M, 0); + if (IS_ERR(obj)) + return PTR_ERR(obj); + + /* Initial GPU fill, sync, CPU initialization. */ + for_i915_gem_ww(&ww, err, true) { + err = i915_gem_object_lock(obj, &ww); + if (err) + continue; + + err = ____i915_gem_object_get_pages(obj); + if (err) + continue; + + err = intel_migrate_clear(>->migrate, &ww, NULL, + obj->mm.pages->sgl, obj->cache_level, + i915_gem_object_is_lmem(obj), + 0xdeadbeaf, &rq); + if (rq) { + dma_resv_add_excl_fence(obj->base.resv, &rq->fence); + i915_request_put(rq); + } + if (err) + continue; + + err = i915_gem_object_wait(obj, I915_WAIT_INTERRUPTIBLE, + 5 * HZ); + if (err) + continue; + + err = igt_fill_check_buffer(obj, true); + if (err) + continue; + } + if (err) + goto out_put; + + /* + * Migrate to and from smem without explicitly syncing. + * Finalize with data in smem for fast readout. + */ + for (i = 1; i <= 5; ++i) { + for_i915_gem_ww(&ww, err, true) + err = lmem_pages_migrate_one(&ww, obj); + if (err) + goto out_put; + } + + err = i915_gem_object_lock_interruptible(obj, NULL); + if (err) + goto out_put; + + /* Finally sync migration and check content. */ + err = i915_gem_object_wait_migration(obj, true); + if (err) + goto out_unlock; + + err = igt_fill_check_buffer(obj, false); + +out_unlock: + i915_gem_object_unlock(obj); +out_put: + i915_gem_object_put(obj); + + return err; +} + +int i915_gem_migrate_live_selftests(struct drm_i915_private *i915) +{ + static const struct i915_subtest tests[] = { + SUBTEST(igt_smem_create_migrate), + SUBTEST(igt_lmem_create_migrate), + SUBTEST(igt_same_create_migrate), + SUBTEST(igt_lmem_pages_migrate), + }; + + if (!HAS_LMEM(i915)) + return 0; + + return intel_gt_live_subtests(tests, &i915->gt); +} diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c index 5575172c66f5..b20f5621f62b 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c @@ -573,21 +573,30 @@ err: return 0; } +static enum i915_mmap_type default_mapping(struct drm_i915_private *i915) +{ + if (HAS_LMEM(i915)) + return I915_MMAP_TYPE_FIXED; + + return I915_MMAP_TYPE_GTT; +} + static bool assert_mmap_offset(struct drm_i915_private *i915, unsigned long size, int expected) { struct drm_i915_gem_object *obj; - struct i915_mmap_offset *mmo; + u64 offset; + int ret; obj = i915_gem_object_create_internal(i915, size); if (IS_ERR(obj)) - return false; + return expected && expected == PTR_ERR(obj); - mmo = mmap_offset_attach(obj, I915_MMAP_OFFSET_GTT, NULL); + ret = __assign_mmap_offset(obj, default_mapping(i915), &offset, NULL); i915_gem_object_put(obj); - return PTR_ERR_OR_ZERO(mmo) == expected; + return ret == expected; } static void disable_retire_worker(struct drm_i915_private *i915) @@ -622,8 +631,8 @@ static int igt_mmap_offset_exhaustion(void *arg) struct drm_mm *mm = &i915->drm.vma_offset_manager->vm_addr_space_mm; struct drm_i915_gem_object *obj; struct drm_mm_node *hole, *next; - struct i915_mmap_offset *mmo; int loop, err = 0; + u64 offset; /* Disable background reaper */ disable_retire_worker(i915); @@ -684,13 +693,13 @@ static int igt_mmap_offset_exhaustion(void *arg) obj = i915_gem_object_create_internal(i915, PAGE_SIZE); if (IS_ERR(obj)) { err = PTR_ERR(obj); + pr_err("Unable to create object for reclaimed hole\n"); goto out; } - mmo = mmap_offset_attach(obj, I915_MMAP_OFFSET_GTT, NULL); - if (IS_ERR(mmo)) { + err = __assign_mmap_offset(obj, default_mapping(i915), &offset, NULL); + if (err) { pr_err("Unable to insert object into reclaimed hole\n"); - err = PTR_ERR(mmo); goto err_obj; } @@ -830,34 +839,25 @@ static int wc_check(struct drm_i915_gem_object *obj) static bool can_mmap(struct drm_i915_gem_object *obj, enum i915_mmap_type type) { - if (type == I915_MMAP_TYPE_GTT && - !i915_ggtt_has_aperture(&to_i915(obj->base.dev)->ggtt)) - return false; + struct drm_i915_private *i915 = to_i915(obj->base.dev); + bool no_map; - if (type != I915_MMAP_TYPE_GTT && - !i915_gem_object_has_struct_page(obj) && - !i915_gem_object_type_has(obj, I915_GEM_OBJECT_HAS_IOMEM)) + if (HAS_LMEM(i915)) + return type == I915_MMAP_TYPE_FIXED; + else if (type == I915_MMAP_TYPE_FIXED) return false; - return true; -} - -static void object_set_placements(struct drm_i915_gem_object *obj, - struct intel_memory_region **placements, - unsigned int n_placements) -{ - GEM_BUG_ON(!n_placements); + if (type == I915_MMAP_TYPE_GTT && + !i915_ggtt_has_aperture(&to_i915(obj->base.dev)->ggtt)) + return false; - if (n_placements == 1) { - struct drm_i915_private *i915 = to_i915(obj->base.dev); - struct intel_memory_region *mr = placements[0]; + i915_gem_object_lock(obj, NULL); + no_map = (type != I915_MMAP_TYPE_GTT && + !i915_gem_object_has_struct_page(obj) && + !i915_gem_object_has_iomem(obj)); + i915_gem_object_unlock(obj); - obj->mm.placements = &i915->mm.regions[mr->id]; - obj->mm.n_placements = 1; - } else { - obj->mm.placements = placements; - obj->mm.n_placements = n_placements; - } + return !no_map; } #define expand32(x) (((x) << 0) | ((x) << 8) | ((x) << 16) | ((x) << 24)) @@ -865,10 +865,10 @@ static int __igt_mmap(struct drm_i915_private *i915, struct drm_i915_gem_object *obj, enum i915_mmap_type type) { - struct i915_mmap_offset *mmo; struct vm_area_struct *area; unsigned long addr; int err, i; + u64 offset; if (!can_mmap(obj, type)) return 0; @@ -879,11 +879,11 @@ static int __igt_mmap(struct drm_i915_private *i915, if (err) return err; - mmo = mmap_offset_attach(obj, type, NULL); - if (IS_ERR(mmo)) - return PTR_ERR(mmo); + err = __assign_mmap_offset(obj, type, &offset, NULL); + if (err) + return err; - addr = igt_mmap_node(i915, &mmo->vma_node, 0, PROT_WRITE, MAP_SHARED); + addr = igt_mmap_offset(i915, offset, obj->base.size, PROT_WRITE, MAP_SHARED); if (IS_ERR_VALUE(addr)) return addr; @@ -897,13 +897,6 @@ static int __igt_mmap(struct drm_i915_private *i915, goto out_unmap; } - if (area->vm_private_data != mmo) { - pr_err("%s: vm_area_struct did not point back to our mmap_offset object!\n", - obj->mm.region->name); - err = -EINVAL; - goto out_unmap; - } - for (i = 0; i < obj->base.size / sizeof(u32); i++) { u32 __user *ux = u64_to_user_ptr((u64)(addr + i * sizeof(*ux))); u32 x; @@ -961,18 +954,18 @@ static int igt_mmap(void *arg) struct drm_i915_gem_object *obj; int err; - obj = i915_gem_object_create_region(mr, sizes[i], 0); + obj = __i915_gem_object_create_user(i915, sizes[i], &mr, 1); if (obj == ERR_PTR(-ENODEV)) continue; if (IS_ERR(obj)) return PTR_ERR(obj); - object_set_placements(obj, &mr, 1); - err = __igt_mmap(i915, obj, I915_MMAP_TYPE_GTT); if (err == 0) err = __igt_mmap(i915, obj, I915_MMAP_TYPE_WC); + if (err == 0) + err = __igt_mmap(i915, obj, I915_MMAP_TYPE_FIXED); i915_gem_object_put(obj); if (err) @@ -990,26 +983,33 @@ static const char *repr_mmap_type(enum i915_mmap_type type) case I915_MMAP_TYPE_WB: return "wb"; case I915_MMAP_TYPE_WC: return "wc"; case I915_MMAP_TYPE_UC: return "uc"; + case I915_MMAP_TYPE_FIXED: return "fixed"; default: return "unknown"; } } -static bool can_access(const struct drm_i915_gem_object *obj) +static bool can_access(struct drm_i915_gem_object *obj) { - return i915_gem_object_has_struct_page(obj) || - i915_gem_object_type_has(obj, I915_GEM_OBJECT_HAS_IOMEM); + bool access; + + i915_gem_object_lock(obj, NULL); + access = i915_gem_object_has_struct_page(obj) || + i915_gem_object_has_iomem(obj); + i915_gem_object_unlock(obj); + + return access; } static int __igt_mmap_access(struct drm_i915_private *i915, struct drm_i915_gem_object *obj, enum i915_mmap_type type) { - struct i915_mmap_offset *mmo; unsigned long __user *ptr; unsigned long A, B; unsigned long x, y; unsigned long addr; int err; + u64 offset; memset(&A, 0xAA, sizeof(A)); memset(&B, 0xBB, sizeof(B)); @@ -1017,11 +1017,11 @@ static int __igt_mmap_access(struct drm_i915_private *i915, if (!can_mmap(obj, type) || !can_access(obj)) return 0; - mmo = mmap_offset_attach(obj, type, NULL); - if (IS_ERR(mmo)) - return PTR_ERR(mmo); + err = __assign_mmap_offset(obj, type, &offset, NULL); + if (err) + return err; - addr = igt_mmap_node(i915, &mmo->vma_node, 0, PROT_WRITE, MAP_SHARED); + addr = igt_mmap_offset(i915, offset, obj->base.size, PROT_WRITE, MAP_SHARED); if (IS_ERR_VALUE(addr)) return addr; ptr = (unsigned long __user *)addr; @@ -1081,15 +1081,13 @@ static int igt_mmap_access(void *arg) struct drm_i915_gem_object *obj; int err; - obj = i915_gem_object_create_region(mr, PAGE_SIZE, 0); + obj = __i915_gem_object_create_user(i915, PAGE_SIZE, &mr, 1); if (obj == ERR_PTR(-ENODEV)) continue; if (IS_ERR(obj)) return PTR_ERR(obj); - object_set_placements(obj, &mr, 1); - err = __igt_mmap_access(i915, obj, I915_MMAP_TYPE_GTT); if (err == 0) err = __igt_mmap_access(i915, obj, I915_MMAP_TYPE_WB); @@ -1097,6 +1095,8 @@ static int igt_mmap_access(void *arg) err = __igt_mmap_access(i915, obj, I915_MMAP_TYPE_WC); if (err == 0) err = __igt_mmap_access(i915, obj, I915_MMAP_TYPE_UC); + if (err == 0) + err = __igt_mmap_access(i915, obj, I915_MMAP_TYPE_FIXED); i915_gem_object_put(obj); if (err) @@ -1111,11 +1111,11 @@ static int __igt_mmap_gpu(struct drm_i915_private *i915, enum i915_mmap_type type) { struct intel_engine_cs *engine; - struct i915_mmap_offset *mmo; unsigned long addr; u32 __user *ux; u32 bbe; int err; + u64 offset; /* * Verify that the mmap access into the backing store aligns with @@ -1132,11 +1132,11 @@ static int __igt_mmap_gpu(struct drm_i915_private *i915, if (err) return err; - mmo = mmap_offset_attach(obj, type, NULL); - if (IS_ERR(mmo)) - return PTR_ERR(mmo); + err = __assign_mmap_offset(obj, type, &offset, NULL); + if (err) + return err; - addr = igt_mmap_node(i915, &mmo->vma_node, 0, PROT_WRITE, MAP_SHARED); + addr = igt_mmap_offset(i915, offset, obj->base.size, PROT_WRITE, MAP_SHARED); if (IS_ERR_VALUE(addr)) return addr; @@ -1226,18 +1226,18 @@ static int igt_mmap_gpu(void *arg) struct drm_i915_gem_object *obj; int err; - obj = i915_gem_object_create_region(mr, PAGE_SIZE, 0); + obj = __i915_gem_object_create_user(i915, PAGE_SIZE, &mr, 1); if (obj == ERR_PTR(-ENODEV)) continue; if (IS_ERR(obj)) return PTR_ERR(obj); - object_set_placements(obj, &mr, 1); - err = __igt_mmap_gpu(i915, obj, I915_MMAP_TYPE_GTT); if (err == 0) err = __igt_mmap_gpu(i915, obj, I915_MMAP_TYPE_WC); + if (err == 0) + err = __igt_mmap_gpu(i915, obj, I915_MMAP_TYPE_FIXED); i915_gem_object_put(obj); if (err) @@ -1303,18 +1303,18 @@ static int __igt_mmap_revoke(struct drm_i915_private *i915, struct drm_i915_gem_object *obj, enum i915_mmap_type type) { - struct i915_mmap_offset *mmo; unsigned long addr; int err; + u64 offset; if (!can_mmap(obj, type)) return 0; - mmo = mmap_offset_attach(obj, type, NULL); - if (IS_ERR(mmo)) - return PTR_ERR(mmo); + err = __assign_mmap_offset(obj, type, &offset, NULL); + if (err) + return err; - addr = igt_mmap_node(i915, &mmo->vma_node, 0, PROT_WRITE, MAP_SHARED); + addr = igt_mmap_offset(i915, offset, obj->base.size, PROT_WRITE, MAP_SHARED); if (IS_ERR_VALUE(addr)) return addr; @@ -1350,10 +1350,20 @@ static int __igt_mmap_revoke(struct drm_i915_private *i915, } } - err = check_absent(addr, obj->base.size); - if (err) { - pr_err("%s: was not absent\n", obj->mm.region->name); - goto out_unmap; + if (!obj->ops->mmap_ops) { + err = check_absent(addr, obj->base.size); + if (err) { + pr_err("%s: was not absent\n", obj->mm.region->name); + goto out_unmap; + } + } else { + /* ttm allows access to evicted regions by design */ + + err = check_present(addr, obj->base.size); + if (err) { + pr_err("%s: was not present\n", obj->mm.region->name); + goto out_unmap; + } } out_unmap: @@ -1371,18 +1381,18 @@ static int igt_mmap_revoke(void *arg) struct drm_i915_gem_object *obj; int err; - obj = i915_gem_object_create_region(mr, PAGE_SIZE, 0); + obj = __i915_gem_object_create_user(i915, PAGE_SIZE, &mr, 1); if (obj == ERR_PTR(-ENODEV)) continue; if (IS_ERR(obj)) return PTR_ERR(obj); - object_set_placements(obj, &mr, 1); - err = __igt_mmap_revoke(i915, obj, I915_MMAP_TYPE_GTT); if (err == 0) err = __igt_mmap_revoke(i915, obj, I915_MMAP_TYPE_WC); + if (err == 0) + err = __igt_mmap_revoke(i915, obj, I915_MMAP_TYPE_FIXED); i915_gem_object_put(obj); if (err) diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_object_blt.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_object_blt.c deleted file mode 100644 index 8c335d1a8406..000000000000 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_object_blt.c +++ /dev/null @@ -1,597 +0,0 @@ -// SPDX-License-Identifier: MIT -/* - * Copyright © 2019 Intel Corporation - */ - -#include <linux/sort.h> - -#include "gt/intel_gt.h" -#include "gt/intel_engine_user.h" - -#include "i915_selftest.h" - -#include "gem/i915_gem_context.h" -#include "selftests/igt_flush_test.h" -#include "selftests/i915_random.h" -#include "selftests/mock_drm.h" -#include "huge_gem_object.h" -#include "mock_context.h" - -static int wrap_ktime_compare(const void *A, const void *B) -{ - const ktime_t *a = A, *b = B; - - return ktime_compare(*a, *b); -} - -static int __perf_fill_blt(struct drm_i915_gem_object *obj) -{ - struct drm_i915_private *i915 = to_i915(obj->base.dev); - int inst = 0; - - do { - struct intel_engine_cs *engine; - ktime_t t[5]; - int pass; - int err; - - engine = intel_engine_lookup_user(i915, - I915_ENGINE_CLASS_COPY, - inst++); - if (!engine) - return 0; - - intel_engine_pm_get(engine); - for (pass = 0; pass < ARRAY_SIZE(t); pass++) { - struct intel_context *ce = engine->kernel_context; - ktime_t t0, t1; - - t0 = ktime_get(); - - err = i915_gem_object_fill_blt(obj, ce, 0); - if (err) - break; - - err = i915_gem_object_wait(obj, - I915_WAIT_ALL, - MAX_SCHEDULE_TIMEOUT); - if (err) - break; - - t1 = ktime_get(); - t[pass] = ktime_sub(t1, t0); - } - intel_engine_pm_put(engine); - if (err) - return err; - - sort(t, ARRAY_SIZE(t), sizeof(*t), wrap_ktime_compare, NULL); - pr_info("%s: blt %zd KiB fill: %lld MiB/s\n", - engine->name, - obj->base.size >> 10, - div64_u64(mul_u32_u32(4 * obj->base.size, - 1000 * 1000 * 1000), - t[1] + 2 * t[2] + t[3]) >> 20); - } while (1); -} - -static int perf_fill_blt(void *arg) -{ - struct drm_i915_private *i915 = arg; - static const unsigned long sizes[] = { - SZ_4K, - SZ_64K, - SZ_2M, - SZ_64M - }; - int i; - - for (i = 0; i < ARRAY_SIZE(sizes); i++) { - struct drm_i915_gem_object *obj; - int err; - - obj = i915_gem_object_create_internal(i915, sizes[i]); - if (IS_ERR(obj)) - return PTR_ERR(obj); - - err = __perf_fill_blt(obj); - i915_gem_object_put(obj); - if (err) - return err; - } - - return 0; -} - -static int __perf_copy_blt(struct drm_i915_gem_object *src, - struct drm_i915_gem_object *dst) -{ - struct drm_i915_private *i915 = to_i915(src->base.dev); - int inst = 0; - - do { - struct intel_engine_cs *engine; - ktime_t t[5]; - int pass; - int err = 0; - - engine = intel_engine_lookup_user(i915, - I915_ENGINE_CLASS_COPY, - inst++); - if (!engine) - return 0; - - intel_engine_pm_get(engine); - for (pass = 0; pass < ARRAY_SIZE(t); pass++) { - struct intel_context *ce = engine->kernel_context; - ktime_t t0, t1; - - t0 = ktime_get(); - - err = i915_gem_object_copy_blt(src, dst, ce); - if (err) - break; - - err = i915_gem_object_wait(dst, - I915_WAIT_ALL, - MAX_SCHEDULE_TIMEOUT); - if (err) - break; - - t1 = ktime_get(); - t[pass] = ktime_sub(t1, t0); - } - intel_engine_pm_put(engine); - if (err) - return err; - - sort(t, ARRAY_SIZE(t), sizeof(*t), wrap_ktime_compare, NULL); - pr_info("%s: blt %zd KiB copy: %lld MiB/s\n", - engine->name, - src->base.size >> 10, - div64_u64(mul_u32_u32(4 * src->base.size, - 1000 * 1000 * 1000), - t[1] + 2 * t[2] + t[3]) >> 20); - } while (1); -} - -static int perf_copy_blt(void *arg) -{ - struct drm_i915_private *i915 = arg; - static const unsigned long sizes[] = { - SZ_4K, - SZ_64K, - SZ_2M, - SZ_64M - }; - int i; - - for (i = 0; i < ARRAY_SIZE(sizes); i++) { - struct drm_i915_gem_object *src, *dst; - int err; - - src = i915_gem_object_create_internal(i915, sizes[i]); - if (IS_ERR(src)) - return PTR_ERR(src); - - dst = i915_gem_object_create_internal(i915, sizes[i]); - if (IS_ERR(dst)) { - err = PTR_ERR(dst); - goto err_src; - } - - err = __perf_copy_blt(src, dst); - - i915_gem_object_put(dst); -err_src: - i915_gem_object_put(src); - if (err) - return err; - } - - return 0; -} - -struct igt_thread_arg { - struct intel_engine_cs *engine; - struct i915_gem_context *ctx; - struct file *file; - struct rnd_state prng; - unsigned int n_cpus; -}; - -static int igt_fill_blt_thread(void *arg) -{ - struct igt_thread_arg *thread = arg; - struct intel_engine_cs *engine = thread->engine; - struct rnd_state *prng = &thread->prng; - struct drm_i915_gem_object *obj; - struct i915_gem_context *ctx; - struct intel_context *ce; - unsigned int prio; - IGT_TIMEOUT(end); - u64 total, max; - int err; - - ctx = thread->ctx; - if (!ctx) { - ctx = live_context_for_engine(engine, thread->file); - if (IS_ERR(ctx)) - return PTR_ERR(ctx); - - prio = i915_prandom_u32_max_state(I915_PRIORITY_MAX, prng); - ctx->sched.priority = prio; - } - - ce = i915_gem_context_get_engine(ctx, 0); - GEM_BUG_ON(IS_ERR(ce)); - - /* - * If we have a tiny shared address space, like for the GGTT - * then we can't be too greedy. - */ - max = ce->vm->total; - if (i915_is_ggtt(ce->vm) || thread->ctx) - max = div_u64(max, thread->n_cpus); - max >>= 4; - - total = PAGE_SIZE; - do { - /* Aim to keep the runtime under reasonable bounds! */ - const u32 max_phys_size = SZ_64K; - u32 val = prandom_u32_state(prng); - u32 phys_sz; - u32 sz; - u32 *vaddr; - u32 i; - - total = min(total, max); - sz = i915_prandom_u32_max_state(total, prng) + 1; - phys_sz = sz % max_phys_size + 1; - - sz = round_up(sz, PAGE_SIZE); - phys_sz = round_up(phys_sz, PAGE_SIZE); - phys_sz = min(phys_sz, sz); - - pr_debug("%s with phys_sz= %x, sz=%x, val=%x\n", __func__, - phys_sz, sz, val); - - obj = huge_gem_object(engine->i915, phys_sz, sz); - if (IS_ERR(obj)) { - err = PTR_ERR(obj); - goto err_flush; - } - - vaddr = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WB); - if (IS_ERR(vaddr)) { - err = PTR_ERR(vaddr); - goto err_put; - } - - /* - * Make sure the potentially async clflush does its job, if - * required. - */ - memset32(vaddr, val ^ 0xdeadbeaf, - huge_gem_object_phys_size(obj) / sizeof(u32)); - - if (!(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE)) - obj->cache_dirty = true; - - err = i915_gem_object_fill_blt(obj, ce, val); - if (err) - goto err_unpin; - - err = i915_gem_object_wait(obj, 0, MAX_SCHEDULE_TIMEOUT); - if (err) - goto err_unpin; - - for (i = 0; i < huge_gem_object_phys_size(obj) / sizeof(u32); i += 17) { - if (!(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ)) - drm_clflush_virt_range(&vaddr[i], sizeof(vaddr[i])); - - if (vaddr[i] != val) { - pr_err("vaddr[%u]=%x, expected=%x\n", i, - vaddr[i], val); - err = -EINVAL; - goto err_unpin; - } - } - - i915_gem_object_unpin_map(obj); - i915_gem_object_put(obj); - - total <<= 1; - } while (!time_after(jiffies, end)); - - goto err_flush; - -err_unpin: - i915_gem_object_unpin_map(obj); -err_put: - i915_gem_object_put(obj); -err_flush: - if (err == -ENOMEM) - err = 0; - - intel_context_put(ce); - return err; -} - -static int igt_copy_blt_thread(void *arg) -{ - struct igt_thread_arg *thread = arg; - struct intel_engine_cs *engine = thread->engine; - struct rnd_state *prng = &thread->prng; - struct drm_i915_gem_object *src, *dst; - struct i915_gem_context *ctx; - struct intel_context *ce; - unsigned int prio; - IGT_TIMEOUT(end); - u64 total, max; - int err; - - ctx = thread->ctx; - if (!ctx) { - ctx = live_context_for_engine(engine, thread->file); - if (IS_ERR(ctx)) - return PTR_ERR(ctx); - - prio = i915_prandom_u32_max_state(I915_PRIORITY_MAX, prng); - ctx->sched.priority = prio; - } - - ce = i915_gem_context_get_engine(ctx, 0); - GEM_BUG_ON(IS_ERR(ce)); - - /* - * If we have a tiny shared address space, like for the GGTT - * then we can't be too greedy. - */ - max = ce->vm->total; - if (i915_is_ggtt(ce->vm) || thread->ctx) - max = div_u64(max, thread->n_cpus); - max >>= 4; - - total = PAGE_SIZE; - do { - /* Aim to keep the runtime under reasonable bounds! */ - const u32 max_phys_size = SZ_64K; - u32 val = prandom_u32_state(prng); - u32 phys_sz; - u32 sz; - u32 *vaddr; - u32 i; - - total = min(total, max); - sz = i915_prandom_u32_max_state(total, prng) + 1; - phys_sz = sz % max_phys_size + 1; - - sz = round_up(sz, PAGE_SIZE); - phys_sz = round_up(phys_sz, PAGE_SIZE); - phys_sz = min(phys_sz, sz); - - pr_debug("%s with phys_sz= %x, sz=%x, val=%x\n", __func__, - phys_sz, sz, val); - - src = huge_gem_object(engine->i915, phys_sz, sz); - if (IS_ERR(src)) { - err = PTR_ERR(src); - goto err_flush; - } - - vaddr = i915_gem_object_pin_map_unlocked(src, I915_MAP_WB); - if (IS_ERR(vaddr)) { - err = PTR_ERR(vaddr); - goto err_put_src; - } - - memset32(vaddr, val, - huge_gem_object_phys_size(src) / sizeof(u32)); - - i915_gem_object_unpin_map(src); - - if (!(src->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ)) - src->cache_dirty = true; - - dst = huge_gem_object(engine->i915, phys_sz, sz); - if (IS_ERR(dst)) { - err = PTR_ERR(dst); - goto err_put_src; - } - - vaddr = i915_gem_object_pin_map_unlocked(dst, I915_MAP_WB); - if (IS_ERR(vaddr)) { - err = PTR_ERR(vaddr); - goto err_put_dst; - } - - memset32(vaddr, val ^ 0xdeadbeaf, - huge_gem_object_phys_size(dst) / sizeof(u32)); - - if (!(dst->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE)) - dst->cache_dirty = true; - - err = i915_gem_object_copy_blt(src, dst, ce); - if (err) - goto err_unpin; - - err = i915_gem_object_wait(dst, 0, MAX_SCHEDULE_TIMEOUT); - if (err) - goto err_unpin; - - for (i = 0; i < huge_gem_object_phys_size(dst) / sizeof(u32); i += 17) { - if (!(dst->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ)) - drm_clflush_virt_range(&vaddr[i], sizeof(vaddr[i])); - - if (vaddr[i] != val) { - pr_err("vaddr[%u]=%x, expected=%x\n", i, - vaddr[i], val); - err = -EINVAL; - goto err_unpin; - } - } - - i915_gem_object_unpin_map(dst); - - i915_gem_object_put(src); - i915_gem_object_put(dst); - - total <<= 1; - } while (!time_after(jiffies, end)); - - goto err_flush; - -err_unpin: - i915_gem_object_unpin_map(dst); -err_put_dst: - i915_gem_object_put(dst); -err_put_src: - i915_gem_object_put(src); -err_flush: - if (err == -ENOMEM) - err = 0; - - intel_context_put(ce); - return err; -} - -static int igt_threaded_blt(struct intel_engine_cs *engine, - int (*blt_fn)(void *arg), - unsigned int flags) -#define SINGLE_CTX BIT(0) -{ - struct igt_thread_arg *thread; - struct task_struct **tsk; - unsigned int n_cpus, i; - I915_RND_STATE(prng); - int err = 0; - - n_cpus = num_online_cpus() + 1; - - tsk = kcalloc(n_cpus, sizeof(struct task_struct *), GFP_KERNEL); - if (!tsk) - return 0; - - thread = kcalloc(n_cpus, sizeof(struct igt_thread_arg), GFP_KERNEL); - if (!thread) - goto out_tsk; - - thread[0].file = mock_file(engine->i915); - if (IS_ERR(thread[0].file)) { - err = PTR_ERR(thread[0].file); - goto out_thread; - } - - if (flags & SINGLE_CTX) { - thread[0].ctx = live_context_for_engine(engine, thread[0].file); - if (IS_ERR(thread[0].ctx)) { - err = PTR_ERR(thread[0].ctx); - goto out_file; - } - } - - for (i = 0; i < n_cpus; ++i) { - thread[i].engine = engine; - thread[i].file = thread[0].file; - thread[i].ctx = thread[0].ctx; - thread[i].n_cpus = n_cpus; - thread[i].prng = - I915_RND_STATE_INITIALIZER(prandom_u32_state(&prng)); - - tsk[i] = kthread_run(blt_fn, &thread[i], "igt/blt-%d", i); - if (IS_ERR(tsk[i])) { - err = PTR_ERR(tsk[i]); - break; - } - - get_task_struct(tsk[i]); - } - - yield(); /* start all threads before we kthread_stop() */ - - for (i = 0; i < n_cpus; ++i) { - int status; - - if (IS_ERR_OR_NULL(tsk[i])) - continue; - - status = kthread_stop(tsk[i]); - if (status && !err) - err = status; - - put_task_struct(tsk[i]); - } - -out_file: - fput(thread[0].file); -out_thread: - kfree(thread); -out_tsk: - kfree(tsk); - return err; -} - -static int test_copy_engines(struct drm_i915_private *i915, - int (*fn)(void *arg), - unsigned int flags) -{ - struct intel_engine_cs *engine; - int ret; - - for_each_uabi_class_engine(engine, I915_ENGINE_CLASS_COPY, i915) { - ret = igt_threaded_blt(engine, fn, flags); - if (ret) - return ret; - } - - return 0; -} - -static int igt_fill_blt(void *arg) -{ - return test_copy_engines(arg, igt_fill_blt_thread, 0); -} - -static int igt_fill_blt_ctx0(void *arg) -{ - return test_copy_engines(arg, igt_fill_blt_thread, SINGLE_CTX); -} - -static int igt_copy_blt(void *arg) -{ - return test_copy_engines(arg, igt_copy_blt_thread, 0); -} - -static int igt_copy_blt_ctx0(void *arg) -{ - return test_copy_engines(arg, igt_copy_blt_thread, SINGLE_CTX); -} - -int i915_gem_object_blt_live_selftests(struct drm_i915_private *i915) -{ - static const struct i915_subtest tests[] = { - SUBTEST(igt_fill_blt), - SUBTEST(igt_fill_blt_ctx0), - SUBTEST(igt_copy_blt), - SUBTEST(igt_copy_blt_ctx0), - }; - - if (intel_gt_is_wedged(&i915->gt)) - return 0; - - return i915_live_subtests(tests, i915); -} - -int i915_gem_object_blt_perf_selftests(struct drm_i915_private *i915) -{ - static const struct i915_subtest tests[] = { - SUBTEST(perf_fill_blt), - SUBTEST(perf_copy_blt), - }; - - if (intel_gt_is_wedged(&i915->gt)) - return 0; - - return i915_live_subtests(tests, i915); -} diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_phys.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_phys.c index 3a6ce87f8b52..d43d8dae0f69 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_phys.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_phys.c @@ -25,13 +25,14 @@ static int mock_phys_object(void *arg) goto out; } + i915_gem_object_lock(obj, NULL); if (!i915_gem_object_has_struct_page(obj)) { + i915_gem_object_unlock(obj); err = -EINVAL; pr_err("shmem has no struct page\n"); goto out_obj; } - i915_gem_object_lock(obj, NULL); err = i915_gem_object_attach_phys(obj, PAGE_SIZE); i915_gem_object_unlock(obj); if (err) { diff --git a/drivers/gpu/drm/i915/gem/selftests/mock_context.c b/drivers/gpu/drm/i915/gem/selftests/mock_context.c index 51b5a3421b40..fee070df1c97 100644 --- a/drivers/gpu/drm/i915/gem/selftests/mock_context.c +++ b/drivers/gpu/drm/i915/gem/selftests/mock_context.c @@ -14,6 +14,7 @@ mock_context(struct drm_i915_private *i915, { struct i915_gem_context *ctx; struct i915_gem_engines *e; + struct intel_sseu null_sseu = {}; ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); if (!ctx) @@ -30,15 +31,6 @@ mock_context(struct drm_i915_private *i915, i915_gem_context_set_persistence(ctx); - mutex_init(&ctx->engines_mutex); - e = default_engines(ctx); - if (IS_ERR(e)) - goto err_free; - RCU_INIT_POINTER(ctx->engines, e); - - INIT_RADIX_TREE(&ctx->handles_vma, GFP_KERNEL); - mutex_init(&ctx->lut_mutex); - if (name) { struct i915_ppgtt *ppgtt; @@ -46,25 +38,29 @@ mock_context(struct drm_i915_private *i915, ppgtt = mock_ppgtt(i915, name); if (!ppgtt) - goto err_put; - - mutex_lock(&ctx->mutex); - __set_ppgtt(ctx, &ppgtt->vm); - mutex_unlock(&ctx->mutex); + goto err_free; + ctx->vm = i915_vm_open(&ppgtt->vm); i915_vm_put(&ppgtt->vm); } + mutex_init(&ctx->engines_mutex); + e = default_engines(ctx, null_sseu); + if (IS_ERR(e)) + goto err_vm; + RCU_INIT_POINTER(ctx->engines, e); + + INIT_RADIX_TREE(&ctx->handles_vma, GFP_KERNEL); + mutex_init(&ctx->lut_mutex); + return ctx; +err_vm: + if (ctx->vm) + i915_vm_close(ctx->vm); err_free: kfree(ctx); return NULL; - -err_put: - i915_gem_context_set_closed(ctx); - i915_gem_context_put(ctx); - return NULL; } void mock_context_close(struct i915_gem_context *ctx) @@ -80,20 +76,29 @@ void mock_init_contexts(struct drm_i915_private *i915) struct i915_gem_context * live_context(struct drm_i915_private *i915, struct file *file) { + struct drm_i915_file_private *fpriv = to_drm_file(file)->driver_priv; + struct i915_gem_proto_context *pc; struct i915_gem_context *ctx; int err; u32 id; - ctx = i915_gem_create_context(i915, 0); + pc = proto_context_create(i915, 0); + if (IS_ERR(pc)) + return ERR_CAST(pc); + + ctx = i915_gem_create_context(i915, pc); + proto_context_close(pc); if (IS_ERR(ctx)) return ctx; i915_gem_context_set_no_error_capture(ctx); - err = gem_context_register(ctx, to_drm_file(file)->driver_priv, &id); + err = xa_alloc(&fpriv->context_xa, &id, NULL, xa_limit_32b, GFP_KERNEL); if (err < 0) goto err_ctx; + gem_context_register(ctx, fpriv, id); + return ctx; err_ctx: @@ -106,6 +111,7 @@ live_context_for_engine(struct intel_engine_cs *engine, struct file *file) { struct i915_gem_engines *engines; struct i915_gem_context *ctx; + struct intel_sseu null_sseu = {}; struct intel_context *ce; engines = alloc_engines(1); @@ -124,7 +130,7 @@ live_context_for_engine(struct intel_engine_cs *engine, struct file *file) return ERR_CAST(ce); } - intel_context_set_gem(ce, ctx); + intel_context_set_gem(ce, ctx, null_sseu); engines->engines[0] = ce; engines->num_engines = 1; @@ -139,11 +145,24 @@ live_context_for_engine(struct intel_engine_cs *engine, struct file *file) } struct i915_gem_context * -kernel_context(struct drm_i915_private *i915) +kernel_context(struct drm_i915_private *i915, + struct i915_address_space *vm) { struct i915_gem_context *ctx; + struct i915_gem_proto_context *pc; + + pc = proto_context_create(i915, 0); + if (IS_ERR(pc)) + return ERR_CAST(pc); + + if (vm) { + if (pc->vm) + i915_vm_put(pc->vm); + pc->vm = i915_vm_get(vm); + } - ctx = i915_gem_create_context(i915, 0); + ctx = i915_gem_create_context(i915, pc); + proto_context_close(pc); if (IS_ERR(ctx)) return ctx; diff --git a/drivers/gpu/drm/i915/gem/selftests/mock_context.h b/drivers/gpu/drm/i915/gem/selftests/mock_context.h index 2a6121d33352..7a02fd9b5866 100644 --- a/drivers/gpu/drm/i915/gem/selftests/mock_context.h +++ b/drivers/gpu/drm/i915/gem/selftests/mock_context.h @@ -10,6 +10,7 @@ struct file; struct drm_i915_private; struct intel_engine_cs; +struct i915_address_space; void mock_init_contexts(struct drm_i915_private *i915); @@ -25,7 +26,8 @@ live_context(struct drm_i915_private *i915, struct file *file); struct i915_gem_context * live_context_for_engine(struct intel_engine_cs *engine, struct file *file); -struct i915_gem_context *kernel_context(struct drm_i915_private *i915); +struct i915_gem_context *kernel_context(struct drm_i915_private *i915, + struct i915_address_space *vm); void kernel_context_close(struct i915_gem_context *ctx); #endif /* !__MOCK_CONTEXT_H */ diff --git a/drivers/gpu/drm/i915/gt/debugfs_gt_pm.c b/drivers/gpu/drm/i915/gt/debugfs_gt_pm.c index 4270b5a34a83..d6f5836396f8 100644 --- a/drivers/gpu/drm/i915/gt/debugfs_gt_pm.c +++ b/drivers/gpu/drm/i915/gt/debugfs_gt_pm.c @@ -437,20 +437,20 @@ static int frequency_show(struct seq_file *m, void *unused) max_freq = (IS_GEN9_LP(i915) ? rp_state_cap >> 0 : rp_state_cap >> 16) & 0xff; max_freq *= (IS_GEN9_BC(i915) || - GRAPHICS_VER(i915) >= 10 ? GEN9_FREQ_SCALER : 1); + GRAPHICS_VER(i915) >= 11 ? GEN9_FREQ_SCALER : 1); seq_printf(m, "Lowest (RPN) frequency: %dMHz\n", intel_gpu_freq(rps, max_freq)); max_freq = (rp_state_cap & 0xff00) >> 8; max_freq *= (IS_GEN9_BC(i915) || - GRAPHICS_VER(i915) >= 10 ? GEN9_FREQ_SCALER : 1); + GRAPHICS_VER(i915) >= 11 ? GEN9_FREQ_SCALER : 1); seq_printf(m, "Nominal (RP1) frequency: %dMHz\n", intel_gpu_freq(rps, max_freq)); max_freq = (IS_GEN9_LP(i915) ? rp_state_cap >> 16 : rp_state_cap >> 0) & 0xff; max_freq *= (IS_GEN9_BC(i915) || - GRAPHICS_VER(i915) >= 10 ? GEN9_FREQ_SCALER : 1); + GRAPHICS_VER(i915) >= 11 ? GEN9_FREQ_SCALER : 1); seq_printf(m, "Max non-overclocked (RP0) frequency: %dMHz\n", intel_gpu_freq(rps, max_freq)); seq_printf(m, "Max overclocked frequency: %dMHz\n", @@ -500,7 +500,7 @@ static int llc_show(struct seq_file *m, void *data) min_gpu_freq = rps->min_freq; max_gpu_freq = rps->max_freq; - if (IS_GEN9_BC(i915) || GRAPHICS_VER(i915) >= 10) { + if (IS_GEN9_BC(i915) || GRAPHICS_VER(i915) >= 11) { /* Convert GT frequency to 50 HZ units */ min_gpu_freq /= GEN9_FREQ_SCALER; max_gpu_freq /= GEN9_FREQ_SCALER; @@ -518,7 +518,7 @@ static int llc_show(struct seq_file *m, void *data) intel_gpu_freq(rps, (gpu_freq * (IS_GEN9_BC(i915) || - GRAPHICS_VER(i915) >= 10 ? + GRAPHICS_VER(i915) >= 11 ? GEN9_FREQ_SCALER : 1))), ((ia_freq >> 0) & 0xff) * 100, ((ia_freq >> 8) & 0xff) * 100); diff --git a/drivers/gpu/drm/i915/gt/gen8_engine_cs.c b/drivers/gpu/drm/i915/gt/gen8_engine_cs.c index 94e0a5669f90..461844dffd7e 100644 --- a/drivers/gpu/drm/i915/gt/gen8_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/gen8_engine_cs.c @@ -42,7 +42,7 @@ int gen8_emit_flush_rcs(struct i915_request *rq, u32 mode) vf_flush_wa = true; /* WaForGAMHang:kbl */ - if (IS_KBL_GT_STEP(rq->engine->i915, 0, STEP_B0)) + if (IS_KBL_GT_STEP(rq->engine->i915, 0, STEP_C0)) dc_flush_wa = true; } @@ -208,7 +208,7 @@ int gen12_emit_flush_rcs(struct i915_request *rq, u32 mode) flags |= PIPE_CONTROL_FLUSH_L3; flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH; flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH; - /* Wa_1409600907:tgl */ + /* Wa_1409600907:tgl,adl-p */ flags |= PIPE_CONTROL_DEPTH_STALL; flags |= PIPE_CONTROL_DC_FLUSH_ENABLE; flags |= PIPE_CONTROL_FLUSH_ENABLE; @@ -279,7 +279,7 @@ int gen12_emit_flush_xcs(struct i915_request *rq, u32 mode) if (mode & EMIT_INVALIDATE) aux_inv = rq->engine->mask & ~BIT(BCS0); if (aux_inv) - cmd += 2 * hweight8(aux_inv) + 2; + cmd += 2 * hweight32(aux_inv) + 2; cs = intel_ring_begin(rq, cmd); if (IS_ERR(cs)) @@ -313,9 +313,8 @@ int gen12_emit_flush_xcs(struct i915_request *rq, u32 mode) struct intel_engine_cs *engine; unsigned int tmp; - *cs++ = MI_LOAD_REGISTER_IMM(hweight8(aux_inv)); - for_each_engine_masked(engine, rq->engine->gt, - aux_inv, tmp) { + *cs++ = MI_LOAD_REGISTER_IMM(hweight32(aux_inv)); + for_each_engine_masked(engine, rq->engine->gt, aux_inv, tmp) { *cs++ = i915_mmio_reg_offset(aux_inv_reg(engine)); *cs++ = AUX_INV; } @@ -506,7 +505,8 @@ gen8_emit_fini_breadcrumb_tail(struct i915_request *rq, u32 *cs) *cs++ = MI_USER_INTERRUPT; *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; - if (intel_engine_has_semaphores(rq->engine)) + if (intel_engine_has_semaphores(rq->engine) && + !intel_uc_uses_guc_submission(&rq->engine->gt->uc)) cs = emit_preempt_busywait(rq, cs); rq->tail = intel_ring_offset(rq, cs); @@ -598,7 +598,8 @@ gen12_emit_fini_breadcrumb_tail(struct i915_request *rq, u32 *cs) *cs++ = MI_USER_INTERRUPT; *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; - if (intel_engine_has_semaphores(rq->engine)) + if (intel_engine_has_semaphores(rq->engine) && + !intel_uc_uses_guc_submission(&rq->engine->gt->uc)) cs = gen12_emit_preempt_busywait(rq, cs); rq->tail = intel_ring_offset(rq, cs); diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c index da4f5eb43ac2..6e0e52eeb87a 100644 --- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c +++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c @@ -358,6 +358,54 @@ static void gen8_ppgtt_alloc(struct i915_address_space *vm, &start, start + length, vm->top); } +static void __gen8_ppgtt_foreach(struct i915_address_space *vm, + struct i915_page_directory *pd, + u64 *start, u64 end, int lvl, + void (*fn)(struct i915_address_space *vm, + struct i915_page_table *pt, + void *data), + void *data) +{ + unsigned int idx, len; + + len = gen8_pd_range(*start, end, lvl--, &idx); + + spin_lock(&pd->lock); + do { + struct i915_page_table *pt = pd->entry[idx]; + + atomic_inc(&pt->used); + spin_unlock(&pd->lock); + + if (lvl) { + __gen8_ppgtt_foreach(vm, as_pd(pt), start, end, lvl, + fn, data); + } else { + fn(vm, pt, data); + *start += gen8_pt_count(*start, end); + } + + spin_lock(&pd->lock); + atomic_dec(&pt->used); + } while (idx++, --len); + spin_unlock(&pd->lock); +} + +static void gen8_ppgtt_foreach(struct i915_address_space *vm, + u64 start, u64 length, + void (*fn)(struct i915_address_space *vm, + struct i915_page_table *pt, + void *data), + void *data) +{ + start >>= GEN8_PTE_SHIFT; + length >>= GEN8_PTE_SHIFT; + + __gen8_ppgtt_foreach(vm, i915_vm_to_ppgtt(vm)->pd, + &start, start + length, vm->top, + fn, data); +} + static __always_inline u64 gen8_ppgtt_insert_pte(struct i915_ppgtt *ppgtt, struct i915_page_directory *pdp, @@ -552,6 +600,24 @@ static void gen8_ppgtt_insert(struct i915_address_space *vm, } } +static void gen8_ppgtt_insert_entry(struct i915_address_space *vm, + dma_addr_t addr, + u64 offset, + enum i915_cache_level level, + u32 flags) +{ + u64 idx = offset >> GEN8_PTE_SHIFT; + struct i915_page_directory * const pdp = + gen8_pdp_for_page_index(vm, idx); + struct i915_page_directory *pd = + i915_pd_entry(pdp, gen8_pd_index(idx, 2)); + gen8_pte_t *vaddr; + + vaddr = px_vaddr(i915_pt_entry(pd, gen8_pd_index(idx, 1))); + vaddr[gen8_pd_index(idx, 0)] = gen8_pte_encode(addr, level, flags); + clflush_cache_range(&vaddr[gen8_pd_index(idx, 0)], sizeof(*vaddr)); +} + static int gen8_init_scratch(struct i915_address_space *vm) { u32 pte_flags; @@ -731,8 +797,10 @@ struct i915_ppgtt *gen8_ppgtt_create(struct intel_gt *gt) ppgtt->vm.bind_async_flags = I915_VMA_LOCAL_BIND; ppgtt->vm.insert_entries = gen8_ppgtt_insert; + ppgtt->vm.insert_page = gen8_ppgtt_insert_entry; ppgtt->vm.allocate_va_range = gen8_ppgtt_alloc; ppgtt->vm.clear_range = gen8_ppgtt_clear; + ppgtt->vm.foreach = gen8_ppgtt_foreach; ppgtt->vm.pte_encode = gen8_pte_encode; diff --git a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c index 38cc42783dfb..209cf265bf74 100644 --- a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c +++ b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c @@ -15,28 +15,14 @@ #include "intel_gt_pm.h" #include "intel_gt_requests.h" -static bool irq_enable(struct intel_engine_cs *engine) +static bool irq_enable(struct intel_breadcrumbs *b) { - if (!engine->irq_enable) - return false; - - /* Caller disables interrupts */ - spin_lock(&engine->gt->irq_lock); - engine->irq_enable(engine); - spin_unlock(&engine->gt->irq_lock); - - return true; + return intel_engine_irq_enable(b->irq_engine); } -static void irq_disable(struct intel_engine_cs *engine) +static void irq_disable(struct intel_breadcrumbs *b) { - if (!engine->irq_disable) - return; - - /* Caller disables interrupts */ - spin_lock(&engine->gt->irq_lock); - engine->irq_disable(engine); - spin_unlock(&engine->gt->irq_lock); + intel_engine_irq_disable(b->irq_engine); } static void __intel_breadcrumbs_arm_irq(struct intel_breadcrumbs *b) @@ -57,7 +43,7 @@ static void __intel_breadcrumbs_arm_irq(struct intel_breadcrumbs *b) WRITE_ONCE(b->irq_armed, true); /* Requests may have completed before we could enable the interrupt. */ - if (!b->irq_enabled++ && irq_enable(b->irq_engine)) + if (!b->irq_enabled++ && b->irq_enable(b)) irq_work_queue(&b->irq_work); } @@ -76,7 +62,7 @@ static void __intel_breadcrumbs_disarm_irq(struct intel_breadcrumbs *b) { GEM_BUG_ON(!b->irq_enabled); if (!--b->irq_enabled) - irq_disable(b->irq_engine); + b->irq_disable(b); WRITE_ONCE(b->irq_armed, false); intel_gt_pm_put_async(b->irq_engine->gt); @@ -259,6 +245,9 @@ static void signal_irq_work(struct irq_work *work) llist_entry(signal, typeof(*rq), signal_node); struct list_head cb_list; + if (rq->engine->sched_engine->retire_inflight_request_prio) + rq->engine->sched_engine->retire_inflight_request_prio(rq); + spin_lock(&rq->lock); list_replace(&rq->fence.cb_list, &cb_list); __dma_fence_signal__timestamp(&rq->fence, timestamp); @@ -281,7 +270,7 @@ intel_breadcrumbs_create(struct intel_engine_cs *irq_engine) if (!b) return NULL; - b->irq_engine = irq_engine; + kref_init(&b->ref); spin_lock_init(&b->signalers_lock); INIT_LIST_HEAD(&b->signalers); @@ -290,6 +279,10 @@ intel_breadcrumbs_create(struct intel_engine_cs *irq_engine) spin_lock_init(&b->irq_lock); init_irq_work(&b->irq_work, signal_irq_work); + b->irq_engine = irq_engine; + b->irq_enable = irq_enable; + b->irq_disable = irq_disable; + return b; } @@ -303,9 +296,9 @@ void intel_breadcrumbs_reset(struct intel_breadcrumbs *b) spin_lock_irqsave(&b->irq_lock, flags); if (b->irq_enabled) - irq_enable(b->irq_engine); + b->irq_enable(b); else - irq_disable(b->irq_engine); + b->irq_disable(b); spin_unlock_irqrestore(&b->irq_lock, flags); } @@ -325,11 +318,14 @@ void __intel_breadcrumbs_park(struct intel_breadcrumbs *b) } } -void intel_breadcrumbs_free(struct intel_breadcrumbs *b) +void intel_breadcrumbs_free(struct kref *kref) { + struct intel_breadcrumbs *b = container_of(kref, typeof(*b), ref); + irq_work_sync(&b->irq_work); GEM_BUG_ON(!list_empty(&b->signalers)); GEM_BUG_ON(b->irq_armed); + kfree(b); } diff --git a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.h b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.h index 3ce5ce270b04..be0d4f379a85 100644 --- a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.h +++ b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.h @@ -9,7 +9,7 @@ #include <linux/atomic.h> #include <linux/irq_work.h> -#include "intel_engine_types.h" +#include "intel_breadcrumbs_types.h" struct drm_printer; struct i915_request; @@ -17,7 +17,7 @@ struct intel_breadcrumbs; struct intel_breadcrumbs * intel_breadcrumbs_create(struct intel_engine_cs *irq_engine); -void intel_breadcrumbs_free(struct intel_breadcrumbs *b); +void intel_breadcrumbs_free(struct kref *kref); void intel_breadcrumbs_reset(struct intel_breadcrumbs *b); void __intel_breadcrumbs_park(struct intel_breadcrumbs *b); @@ -48,4 +48,16 @@ void i915_request_cancel_breadcrumb(struct i915_request *request); void intel_context_remove_breadcrumbs(struct intel_context *ce, struct intel_breadcrumbs *b); +static inline struct intel_breadcrumbs * +intel_breadcrumbs_get(struct intel_breadcrumbs *b) +{ + kref_get(&b->ref); + return b; +} + +static inline void intel_breadcrumbs_put(struct intel_breadcrumbs *b) +{ + kref_put(&b->ref, intel_breadcrumbs_free); +} + #endif /* __INTEL_BREADCRUMBS__ */ diff --git a/drivers/gpu/drm/i915/gt/intel_breadcrumbs_types.h b/drivers/gpu/drm/i915/gt/intel_breadcrumbs_types.h index 3a084ce8ff5e..72dfd3748c4c 100644 --- a/drivers/gpu/drm/i915/gt/intel_breadcrumbs_types.h +++ b/drivers/gpu/drm/i915/gt/intel_breadcrumbs_types.h @@ -7,10 +7,13 @@ #define __INTEL_BREADCRUMBS_TYPES__ #include <linux/irq_work.h> +#include <linux/kref.h> #include <linux/list.h> #include <linux/spinlock.h> #include <linux/types.h> +#include "intel_engine_types.h" + /* * Rather than have every client wait upon all user interrupts, * with the herd waking after every interrupt and each doing the @@ -29,6 +32,7 @@ * the overhead of waking that client is much preferred. */ struct intel_breadcrumbs { + struct kref ref; atomic_t active; spinlock_t signalers_lock; /* protects the list of signalers */ @@ -42,7 +46,10 @@ struct intel_breadcrumbs { bool irq_armed; /* Not all breadcrumbs are attached to physical HW */ + intel_engine_mask_t engine_mask; struct intel_engine_cs *irq_engine; + bool (*irq_enable)(struct intel_breadcrumbs *b); + void (*irq_disable)(struct intel_breadcrumbs *b); }; #endif /* __INTEL_BREADCRUMBS_TYPES__ */ diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c index 4033184f13b9..745e84c72c90 100644 --- a/drivers/gpu/drm/i915/gt/intel_context.c +++ b/drivers/gpu/drm/i915/gt/intel_context.c @@ -7,28 +7,26 @@ #include "gem/i915_gem_pm.h" #include "i915_drv.h" -#include "i915_globals.h" +#include "i915_trace.h" #include "intel_context.h" #include "intel_engine.h" #include "intel_engine_pm.h" #include "intel_ring.h" -static struct i915_global_context { - struct i915_global base; - struct kmem_cache *slab_ce; -} global; +static struct kmem_cache *slab_ce; static struct intel_context *intel_context_alloc(void) { - return kmem_cache_zalloc(global.slab_ce, GFP_KERNEL); + return kmem_cache_zalloc(slab_ce, GFP_KERNEL); } static void rcu_context_free(struct rcu_head *rcu) { struct intel_context *ce = container_of(rcu, typeof(*ce), rcu); - kmem_cache_free(global.slab_ce, ce); + trace_intel_context_free(ce); + kmem_cache_free(slab_ce, ce); } void intel_context_free(struct intel_context *ce) @@ -46,6 +44,7 @@ intel_context_create(struct intel_engine_cs *engine) return ERR_PTR(-ENOMEM); intel_context_init(ce, engine); + trace_intel_context_create(ce); return ce; } @@ -80,7 +79,7 @@ static int intel_context_active_acquire(struct intel_context *ce) __i915_active_acquire(&ce->active); - if (intel_context_is_barrier(ce)) + if (intel_context_is_barrier(ce) || intel_engine_uses_guc(ce->engine)) return 0; /* Preallocate tracking nodes */ @@ -268,6 +267,8 @@ int __intel_context_do_pin_ww(struct intel_context *ce, GEM_BUG_ON(!intel_context_is_pinned(ce)); /* no overflow! */ + trace_intel_context_do_pin(ce); + err_unlock: mutex_unlock(&ce->pin_mutex); err_post_unpin: @@ -306,9 +307,9 @@ retry: return err; } -void intel_context_unpin(struct intel_context *ce) +void __intel_context_do_unpin(struct intel_context *ce, int sub) { - if (!atomic_dec_and_test(&ce->pin_count)) + if (!atomic_sub_and_test(sub, &ce->pin_count)) return; CE_TRACE(ce, "unpin\n"); @@ -323,6 +324,7 @@ void intel_context_unpin(struct intel_context *ce) */ intel_context_get(ce); intel_context_active_release(ce); + trace_intel_context_do_unpin(ce); intel_context_put(ce); } @@ -360,6 +362,12 @@ static int __intel_context_active(struct i915_active *active) return 0; } +static int sw_fence_dummy_notify(struct i915_sw_fence *sf, + enum i915_sw_fence_notify state) +{ + return NOTIFY_DONE; +} + void intel_context_init(struct intel_context *ce, struct intel_engine_cs *engine) { @@ -371,7 +379,8 @@ intel_context_init(struct intel_context *ce, struct intel_engine_cs *engine) ce->engine = engine; ce->ops = engine->cops; ce->sseu = engine->sseu; - ce->ring = __intel_context_ring_size(SZ_4K); + ce->ring = NULL; + ce->ring_size = SZ_4K; ewma_runtime_init(&ce->runtime.avg); @@ -383,6 +392,22 @@ intel_context_init(struct intel_context *ce, struct intel_engine_cs *engine) mutex_init(&ce->pin_mutex); + spin_lock_init(&ce->guc_state.lock); + INIT_LIST_HEAD(&ce->guc_state.fences); + + spin_lock_init(&ce->guc_active.lock); + INIT_LIST_HEAD(&ce->guc_active.requests); + + ce->guc_id = GUC_INVALID_LRC_ID; + INIT_LIST_HEAD(&ce->guc_id_link); + + /* + * Initialize fence to be complete as this is expected to be complete + * unless there is a pending schedule disable outstanding. + */ + i915_sw_fence_init(&ce->guc_blocked, sw_fence_dummy_notify); + i915_sw_fence_commit(&ce->guc_blocked); + i915_active_init(&ce->active, __intel_context_active, __intel_context_retire, 0); } @@ -397,28 +422,17 @@ void intel_context_fini(struct intel_context *ce) i915_active_fini(&ce->active); } -static void i915_global_context_shrink(void) -{ - kmem_cache_shrink(global.slab_ce); -} - -static void i915_global_context_exit(void) +void i915_context_module_exit(void) { - kmem_cache_destroy(global.slab_ce); + kmem_cache_destroy(slab_ce); } -static struct i915_global_context global = { { - .shrink = i915_global_context_shrink, - .exit = i915_global_context_exit, -} }; - -int __init i915_global_context_init(void) +int __init i915_context_module_init(void) { - global.slab_ce = KMEM_CACHE(intel_context, SLAB_HWCACHE_ALIGN); - if (!global.slab_ce) + slab_ce = KMEM_CACHE(intel_context, SLAB_HWCACHE_ALIGN); + if (!slab_ce) return -ENOMEM; - i915_global_register(&global.base); return 0; } @@ -499,6 +513,26 @@ retry: return rq; } +struct i915_request *intel_context_find_active_request(struct intel_context *ce) +{ + struct i915_request *rq, *active = NULL; + unsigned long flags; + + GEM_BUG_ON(!intel_engine_uses_guc(ce->engine)); + + spin_lock_irqsave(&ce->guc_active.lock, flags); + list_for_each_entry_reverse(rq, &ce->guc_active.requests, + sched.link) { + if (i915_request_completed(rq)) + break; + + active = rq; + } + spin_unlock_irqrestore(&ce->guc_active.lock, flags); + + return active; +} + #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) #include "selftest_context.c" #endif diff --git a/drivers/gpu/drm/i915/gt/intel_context.h b/drivers/gpu/drm/i915/gt/intel_context.h index f83a73a2b39f..c41098950746 100644 --- a/drivers/gpu/drm/i915/gt/intel_context.h +++ b/drivers/gpu/drm/i915/gt/intel_context.h @@ -16,6 +16,7 @@ #include "intel_engine_types.h" #include "intel_ring_types.h" #include "intel_timeline_types.h" +#include "i915_trace.h" #define CE_TRACE(ce, fmt, ...) do { \ const struct intel_context *ce__ = (ce); \ @@ -30,6 +31,9 @@ void intel_context_init(struct intel_context *ce, struct intel_engine_cs *engine); void intel_context_fini(struct intel_context *ce); +void i915_context_module_exit(void); +int i915_context_module_init(void); + struct intel_context * intel_context_create(struct intel_engine_cs *engine); @@ -69,6 +73,13 @@ intel_context_is_pinned(struct intel_context *ce) return atomic_read(&ce->pin_count); } +static inline void intel_context_cancel_request(struct intel_context *ce, + struct i915_request *rq) +{ + GEM_BUG_ON(!ce->ops->cancel_request); + return ce->ops->cancel_request(ce, rq); +} + /** * intel_context_unlock_pinned - Releases the earlier locking of 'pinned' status * @ce - the context @@ -113,7 +124,32 @@ static inline void __intel_context_pin(struct intel_context *ce) atomic_inc(&ce->pin_count); } -void intel_context_unpin(struct intel_context *ce); +void __intel_context_do_unpin(struct intel_context *ce, int sub); + +static inline void intel_context_sched_disable_unpin(struct intel_context *ce) +{ + __intel_context_do_unpin(ce, 2); +} + +static inline void intel_context_unpin(struct intel_context *ce) +{ + if (!ce->ops->sched_disable) { + __intel_context_do_unpin(ce, 1); + } else { + /* + * Move ownership of this pin to the scheduling disable which is + * an async operation. When that operation completes the above + * intel_context_sched_disable_unpin is called potentially + * unpinning the context. + */ + while (!atomic_add_unless(&ce->pin_count, -1, 1)) { + if (atomic_cmpxchg(&ce->pin_count, 1, 2) == 1) { + ce->ops->sched_disable(ce); + break; + } + } + } +} void intel_context_enter_engine(struct intel_context *ce); void intel_context_exit_engine(struct intel_context *ce); @@ -175,10 +211,8 @@ int intel_context_prepare_remote_request(struct intel_context *ce, struct i915_request *intel_context_create_request(struct intel_context *ce); -static inline struct intel_ring *__intel_context_ring_size(u64 sz) -{ - return u64_to_ptr(struct intel_ring, sz); -} +struct i915_request * +intel_context_find_active_request(struct intel_context *ce); static inline bool intel_context_is_barrier(const struct intel_context *ce) { @@ -220,6 +254,18 @@ static inline bool intel_context_set_banned(struct intel_context *ce) return test_and_set_bit(CONTEXT_BANNED, &ce->flags); } +static inline bool intel_context_ban(struct intel_context *ce, + struct i915_request *rq) +{ + bool ret = intel_context_set_banned(ce); + + trace_intel_context_ban(ce); + if (ce->ops->ban) + ce->ops->ban(ce, rq); + + return ret; +} + static inline bool intel_context_force_single_submission(const struct intel_context *ce) { diff --git a/drivers/gpu/drm/i915/gt/intel_context_param.c b/drivers/gpu/drm/i915/gt/intel_context_param.c deleted file mode 100644 index 65dcd090245d..000000000000 --- a/drivers/gpu/drm/i915/gt/intel_context_param.c +++ /dev/null @@ -1,63 +0,0 @@ -// SPDX-License-Identifier: MIT -/* - * Copyright © 2019 Intel Corporation - */ - -#include "i915_active.h" -#include "intel_context.h" -#include "intel_context_param.h" -#include "intel_ring.h" - -int intel_context_set_ring_size(struct intel_context *ce, long sz) -{ - int err; - - if (intel_context_lock_pinned(ce)) - return -EINTR; - - err = i915_active_wait(&ce->active); - if (err < 0) - goto unlock; - - if (intel_context_is_pinned(ce)) { - err = -EBUSY; /* In active use, come back later! */ - goto unlock; - } - - if (test_bit(CONTEXT_ALLOC_BIT, &ce->flags)) { - struct intel_ring *ring; - - /* Replace the existing ringbuffer */ - ring = intel_engine_create_ring(ce->engine, sz); - if (IS_ERR(ring)) { - err = PTR_ERR(ring); - goto unlock; - } - - intel_ring_put(ce->ring); - ce->ring = ring; - - /* Context image will be updated on next pin */ - } else { - ce->ring = __intel_context_ring_size(sz); - } - -unlock: - intel_context_unlock_pinned(ce); - return err; -} - -long intel_context_get_ring_size(struct intel_context *ce) -{ - long sz = (unsigned long)READ_ONCE(ce->ring); - - if (test_bit(CONTEXT_ALLOC_BIT, &ce->flags)) { - if (intel_context_lock_pinned(ce)) - return -EINTR; - - sz = ce->ring->size; - intel_context_unlock_pinned(ce); - } - - return sz; -} diff --git a/drivers/gpu/drm/i915/gt/intel_context_param.h b/drivers/gpu/drm/i915/gt/intel_context_param.h index 3ecacc675f41..0c69cb42d075 100644 --- a/drivers/gpu/drm/i915/gt/intel_context_param.h +++ b/drivers/gpu/drm/i915/gt/intel_context_param.h @@ -10,14 +10,10 @@ #include "intel_context.h" -int intel_context_set_ring_size(struct intel_context *ce, long sz); -long intel_context_get_ring_size(struct intel_context *ce); - -static inline int +static inline void intel_context_set_watchdog_us(struct intel_context *ce, u64 timeout_us) { ce->watchdog.timeout_us = timeout_us; - return 0; } #endif /* INTEL_CONTEXT_PARAM_H */ diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h index ed8c447a7346..e54351a170e2 100644 --- a/drivers/gpu/drm/i915/gt/intel_context_types.h +++ b/drivers/gpu/drm/i915/gt/intel_context_types.h @@ -13,12 +13,14 @@ #include <linux/types.h> #include "i915_active_types.h" +#include "i915_sw_fence.h" #include "i915_utils.h" #include "intel_engine_types.h" #include "intel_sseu.h" -#define CONTEXT_REDZONE POISON_INUSE +#include "uc/intel_guc_fwif.h" +#define CONTEXT_REDZONE POISON_INUSE DECLARE_EWMA(runtime, 3, 8); struct i915_gem_context; @@ -35,16 +37,29 @@ struct intel_context_ops { int (*alloc)(struct intel_context *ce); + void (*ban)(struct intel_context *ce, struct i915_request *rq); + int (*pre_pin)(struct intel_context *ce, struct i915_gem_ww_ctx *ww, void **vaddr); int (*pin)(struct intel_context *ce, void *vaddr); void (*unpin)(struct intel_context *ce); void (*post_unpin)(struct intel_context *ce); + void (*cancel_request)(struct intel_context *ce, + struct i915_request *rq); + void (*enter)(struct intel_context *ce); void (*exit)(struct intel_context *ce); + void (*sched_disable)(struct intel_context *ce); + void (*reset)(struct intel_context *ce); void (*destroy)(struct kref *kref); + + /* virtual engine/context interface */ + struct intel_context *(*create_virtual)(struct intel_engine_cs **engine, + unsigned int count); + struct intel_engine_cs *(*get_sibling)(struct intel_engine_cs *engine, + unsigned int sibling); }; struct intel_context { @@ -82,6 +97,7 @@ struct intel_context { spinlock_t signal_lock; /* protects signals, the list of requests */ struct i915_vma *state; + u32 ring_size; struct intel_ring *ring; struct intel_timeline *timeline; @@ -95,6 +111,7 @@ struct intel_context { #define CONTEXT_BANNED 6 #define CONTEXT_FORCE_SINGLE_SUBMISSION 7 #define CONTEXT_NOPREEMPT 8 +#define CONTEXT_LRCA_DIRTY 9 struct { u64 timeout_us; @@ -136,6 +153,51 @@ struct intel_context { struct intel_sseu sseu; u8 wa_bb_page; /* if set, page num reserved for context workarounds */ + + struct { + /** lock: protects everything in guc_state */ + spinlock_t lock; + /** + * sched_state: scheduling state of this context using GuC + * submission + */ + u16 sched_state; + /* + * fences: maintains of list of requests that have a submit + * fence related to GuC submission + */ + struct list_head fences; + } guc_state; + + struct { + /** lock: protects everything in guc_active */ + spinlock_t lock; + /** requests: active requests on this context */ + struct list_head requests; + } guc_active; + + /* GuC scheduling state flags that do not require a lock. */ + atomic_t guc_sched_state_no_lock; + + /* GuC LRC descriptor ID */ + u16 guc_id; + + /* GuC LRC descriptor reference count */ + atomic_t guc_id_ref; + + /* + * GuC ID link - in list when unpinned but guc_id still valid in GuC + */ + struct list_head guc_id_link; + + /* GuC context blocked fence */ + struct i915_sw_fence guc_blocked; + + /* + * GuC priority management + */ + u8 guc_prio; + u32 guc_prio_count[GUC_CLIENT_PRIORITY_NUM]; }; #endif /* __INTEL_CONTEXT_TYPES__ */ diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h b/drivers/gpu/drm/i915/gt/intel_engine.h index 8d9184920c51..87579affb952 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine.h +++ b/drivers/gpu/drm/i915/gt/intel_engine.h @@ -19,7 +19,9 @@ #include "intel_workarounds.h" struct drm_printer; +struct intel_context; struct intel_gt; +struct lock_class_key; /* Early gen2 devices have a cacheline of just 32 bytes, using 64 is overkill, * but keeps the logic simple. Indeed, the whole purpose of this macro is just @@ -123,20 +125,6 @@ execlists_active(const struct intel_engine_execlists *execlists) return active; } -static inline void -execlists_active_lock_bh(struct intel_engine_execlists *execlists) -{ - local_bh_disable(); /* prevent local softirq and lock recursion */ - tasklet_lock(&execlists->tasklet); -} - -static inline void -execlists_active_unlock_bh(struct intel_engine_execlists *execlists) -{ - tasklet_unlock(&execlists->tasklet); - local_bh_enable(); /* restore softirq, and kick ksoftirqd! */ -} - struct i915_request * execlists_unwind_incomplete_requests(struct intel_engine_execlists *execlists); @@ -186,11 +174,12 @@ intel_write_status_page(struct intel_engine_cs *engine, int reg, u32 value) #define I915_GEM_HWS_PREEMPT_ADDR (I915_GEM_HWS_PREEMPT * sizeof(u32)) #define I915_GEM_HWS_SEQNO 0x40 #define I915_GEM_HWS_SEQNO_ADDR (I915_GEM_HWS_SEQNO * sizeof(u32)) +#define I915_GEM_HWS_MIGRATE (0x42 * sizeof(u32)) #define I915_GEM_HWS_SCRATCH 0x80 #define I915_HWS_CSB_BUF0_INDEX 0x10 #define I915_HWS_CSB_WRITE_INDEX 0x1f -#define CNL_HWS_CSB_WRITE_INDEX 0x2f +#define ICL_HWS_CSB_WRITE_INDEX 0x2f void intel_engine_stop(struct intel_engine_cs *engine); void intel_engine_cleanup(struct intel_engine_cs *engine); @@ -223,6 +212,9 @@ void intel_engine_get_instdone(const struct intel_engine_cs *engine, void intel_engine_init_execlists(struct intel_engine_cs *engine); +bool intel_engine_irq_enable(struct intel_engine_cs *engine); +void intel_engine_irq_disable(struct intel_engine_cs *engine); + static inline void __intel_engine_reset(struct intel_engine_cs *engine, bool stalled) { @@ -248,17 +240,27 @@ __printf(3, 4) void intel_engine_dump(struct intel_engine_cs *engine, struct drm_printer *m, const char *header, ...); +void intel_engine_dump_active_requests(struct list_head *requests, + struct i915_request *hung_rq, + struct drm_printer *m); ktime_t intel_engine_get_busy_time(struct intel_engine_cs *engine, ktime_t *now); struct i915_request * -intel_engine_find_active_request(struct intel_engine_cs *engine); +intel_engine_execlist_find_hung_request(struct intel_engine_cs *engine); u32 intel_engine_context_size(struct intel_gt *gt, u8 class); +struct intel_context * +intel_engine_create_pinned_context(struct intel_engine_cs *engine, + struct i915_address_space *vm, + unsigned int ring_size, + unsigned int hwsp, + struct lock_class_key *key, + const char *name); + +void intel_engine_destroy_pinned_context(struct intel_context *ce); -void intel_engine_init_active(struct intel_engine_cs *engine, - unsigned int subclass); #define ENGINE_PHYSICAL 0 #define ENGINE_MOCK 1 #define ENGINE_VIRTUAL 2 @@ -277,13 +279,60 @@ intel_engine_has_preempt_reset(const struct intel_engine_cs *engine) return intel_engine_has_preemption(engine); } +struct intel_context * +intel_engine_create_virtual(struct intel_engine_cs **siblings, + unsigned int count); + +static inline bool +intel_virtual_engine_has_heartbeat(const struct intel_engine_cs *engine) +{ + /* + * For non-GuC submission we expect the back-end to look at the + * heartbeat status of the actual physical engine that the work + * has been (or is being) scheduled on, so we should only reach + * here with GuC submission enabled. + */ + GEM_BUG_ON(!intel_engine_uses_guc(engine)); + + return intel_guc_virtual_engine_has_heartbeat(engine); +} + static inline bool intel_engine_has_heartbeat(const struct intel_engine_cs *engine) { if (!IS_ACTIVE(CONFIG_DRM_I915_HEARTBEAT_INTERVAL)) return false; - return READ_ONCE(engine->props.heartbeat_interval_ms); + if (intel_engine_is_virtual(engine)) + return intel_virtual_engine_has_heartbeat(engine); + else + return READ_ONCE(engine->props.heartbeat_interval_ms); +} + +static inline struct intel_engine_cs * +intel_engine_get_sibling(struct intel_engine_cs *engine, unsigned int sibling) +{ + GEM_BUG_ON(!intel_engine_is_virtual(engine)); + return engine->cops->get_sibling(engine, sibling); +} + +static inline void +intel_engine_set_hung_context(struct intel_engine_cs *engine, + struct intel_context *ce) +{ + engine->hung_ce = ce; +} + +static inline void +intel_engine_clear_hung_context(struct intel_engine_cs *engine) +{ + intel_engine_set_hung_context(engine, NULL); +} + +static inline struct intel_context * +intel_engine_get_hung_context(struct intel_engine_cs *engine) +{ + return engine->hung_ce; } #endif /* _INTEL_RINGBUFFER_H_ */ diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index 7f03df236613..0d9105a31d84 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -35,14 +35,12 @@ #define DEFAULT_LR_CONTEXT_RENDER_SIZE (22 * PAGE_SIZE) #define GEN8_LR_CONTEXT_RENDER_SIZE (20 * PAGE_SIZE) #define GEN9_LR_CONTEXT_RENDER_SIZE (22 * PAGE_SIZE) -#define GEN10_LR_CONTEXT_RENDER_SIZE (18 * PAGE_SIZE) #define GEN11_LR_CONTEXT_RENDER_SIZE (14 * PAGE_SIZE) #define GEN8_LR_CONTEXT_OTHER_SIZE ( 2 * PAGE_SIZE) #define MAX_MMIO_BASES 3 struct engine_info { - unsigned int hw_id; u8 class; u8 instance; /* mmio bases table *must* be sorted in reverse graphics_ver order */ @@ -54,7 +52,6 @@ struct engine_info { static const struct engine_info intel_engines[] = { [RCS0] = { - .hw_id = RCS0_HW, .class = RENDER_CLASS, .instance = 0, .mmio_bases = { @@ -62,7 +59,6 @@ static const struct engine_info intel_engines[] = { }, }, [BCS0] = { - .hw_id = BCS0_HW, .class = COPY_ENGINE_CLASS, .instance = 0, .mmio_bases = { @@ -70,7 +66,6 @@ static const struct engine_info intel_engines[] = { }, }, [VCS0] = { - .hw_id = VCS0_HW, .class = VIDEO_DECODE_CLASS, .instance = 0, .mmio_bases = { @@ -80,7 +75,6 @@ static const struct engine_info intel_engines[] = { }, }, [VCS1] = { - .hw_id = VCS1_HW, .class = VIDEO_DECODE_CLASS, .instance = 1, .mmio_bases = { @@ -89,7 +83,6 @@ static const struct engine_info intel_engines[] = { }, }, [VCS2] = { - .hw_id = VCS2_HW, .class = VIDEO_DECODE_CLASS, .instance = 2, .mmio_bases = { @@ -97,15 +90,41 @@ static const struct engine_info intel_engines[] = { }, }, [VCS3] = { - .hw_id = VCS3_HW, .class = VIDEO_DECODE_CLASS, .instance = 3, .mmio_bases = { { .graphics_ver = 11, .base = GEN11_BSD4_RING_BASE } }, }, + [VCS4] = { + .class = VIDEO_DECODE_CLASS, + .instance = 4, + .mmio_bases = { + { .graphics_ver = 12, .base = XEHP_BSD5_RING_BASE } + }, + }, + [VCS5] = { + .class = VIDEO_DECODE_CLASS, + .instance = 5, + .mmio_bases = { + { .graphics_ver = 12, .base = XEHP_BSD6_RING_BASE } + }, + }, + [VCS6] = { + .class = VIDEO_DECODE_CLASS, + .instance = 6, + .mmio_bases = { + { .graphics_ver = 12, .base = XEHP_BSD7_RING_BASE } + }, + }, + [VCS7] = { + .class = VIDEO_DECODE_CLASS, + .instance = 7, + .mmio_bases = { + { .graphics_ver = 12, .base = XEHP_BSD8_RING_BASE } + }, + }, [VECS0] = { - .hw_id = VECS0_HW, .class = VIDEO_ENHANCEMENT_CLASS, .instance = 0, .mmio_bases = { @@ -114,13 +133,26 @@ static const struct engine_info intel_engines[] = { }, }, [VECS1] = { - .hw_id = VECS1_HW, .class = VIDEO_ENHANCEMENT_CLASS, .instance = 1, .mmio_bases = { { .graphics_ver = 11, .base = GEN11_VEBOX2_RING_BASE } }, }, + [VECS2] = { + .class = VIDEO_ENHANCEMENT_CLASS, + .instance = 2, + .mmio_bases = { + { .graphics_ver = 12, .base = XEHP_VEBOX3_RING_BASE } + }, + }, + [VECS3] = { + .class = VIDEO_ENHANCEMENT_CLASS, + .instance = 3, + .mmio_bases = { + { .graphics_ver = 12, .base = XEHP_VEBOX4_RING_BASE } + }, + }, }; /** @@ -153,8 +185,6 @@ u32 intel_engine_context_size(struct intel_gt *gt, u8 class) case 12: case 11: return GEN11_LR_CONTEXT_RENDER_SIZE; - case 10: - return GEN10_LR_CONTEXT_RENDER_SIZE; case 9: return GEN9_LR_CONTEXT_RENDER_SIZE; case 8: @@ -269,6 +299,8 @@ static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id) BUILD_BUG_ON(MAX_ENGINE_CLASS >= BIT(GEN11_ENGINE_CLASS_WIDTH)); BUILD_BUG_ON(MAX_ENGINE_INSTANCE >= BIT(GEN11_ENGINE_INSTANCE_WIDTH)); + BUILD_BUG_ON(I915_MAX_VCS > (MAX_ENGINE_INSTANCE + 1)); + BUILD_BUG_ON(I915_MAX_VECS > (MAX_ENGINE_INSTANCE + 1)); if (GEM_DEBUG_WARN_ON(id >= ARRAY_SIZE(gt->engine))) return -EINVAL; @@ -294,7 +326,6 @@ static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id) engine->i915 = i915; engine->gt = gt; engine->uncore = gt->uncore; - engine->hw_id = info->hw_id; guc_class = engine_class_to_guc_class(info->class); engine->guc_id = MAKE_GUC_ID(guc_class, info->instance); engine->mmio_base = __engine_mmio_base(i915, info->mmio_bases); @@ -328,9 +359,6 @@ static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id) if (engine->context_size) DRIVER_CAPS(i915)->has_logical_contexts = true; - /* Nothing to do here, execute in order of dependencies */ - engine->schedule = NULL; - ewma__engine_latency_init(&engine->latency); seqcount_init(&engine->stats.lock); @@ -445,6 +473,28 @@ void intel_engines_free(struct intel_gt *gt) } } +static +bool gen11_vdbox_has_sfc(struct drm_i915_private *i915, + unsigned int physical_vdbox, + unsigned int logical_vdbox, u16 vdbox_mask) +{ + /* + * In Gen11, only even numbered logical VDBOXes are hooked + * up to an SFC (Scaler & Format Converter) unit. + * In Gen12, Even numbered physical instance always are connected + * to an SFC. Odd numbered physical instances have SFC only if + * previous even instance is fused off. + */ + if (GRAPHICS_VER(i915) == 12) + return (physical_vdbox % 2 == 0) || + !(BIT(physical_vdbox - 1) & vdbox_mask); + else if (GRAPHICS_VER(i915) == 11) + return logical_vdbox % 2 == 0; + + MISSING_CASE(GRAPHICS_VER(i915)); + return false; +} + /* * Determine which engines are fused off in our particular hardware. * Note that we have a catch-22 situation where we need to be able to access @@ -471,7 +521,14 @@ static intel_engine_mask_t init_engine_mask(struct intel_gt *gt) if (GRAPHICS_VER(i915) < 11) return info->engine_mask; - media_fuse = ~intel_uncore_read(uncore, GEN11_GT_VEBOX_VDBOX_DISABLE); + /* + * On newer platforms the fusing register is called 'enable' and has + * enable semantics, while on older platforms it is called 'disable' + * and bits have disable semantices. + */ + media_fuse = intel_uncore_read(uncore, GEN11_GT_VEBOX_VDBOX_DISABLE); + if (GRAPHICS_VER_FULL(i915) < IP_VER(12, 50)) + media_fuse = ~media_fuse; vdbox_mask = media_fuse & GEN11_GT_VDBOX_DISABLE_MASK; vebox_mask = (media_fuse & GEN11_GT_VEBOX_DISABLE_MASK) >> @@ -489,13 +546,9 @@ static intel_engine_mask_t init_engine_mask(struct intel_gt *gt) continue; } - /* - * In Gen11, only even numbered logical VDBOXes are - * hooked up to an SFC (Scaler & Format Converter) unit. - * In TGL each VDBOX has access to an SFC. - */ - if (GRAPHICS_VER(i915) >= 12 || logical_vdbox++ % 2 == 0) + if (gen11_vdbox_has_sfc(i915, i, logical_vdbox, vdbox_mask)) gt->info.vdbox_sfc_access |= BIT(i); + logical_vdbox++; } drm_dbg(&i915->drm, "vdbox enable: %04x, instances: %04lx\n", vdbox_mask, VDBOX_MASK(gt)); @@ -585,9 +638,6 @@ void intel_engine_init_execlists(struct intel_engine_cs *engine) memset(execlists->pending, 0, sizeof(execlists->pending)); execlists->active = memset(execlists->inflight, 0, sizeof(execlists->inflight)); - - execlists->queue_priority_hint = INT_MIN; - execlists->queue = RB_ROOT_CACHED; } static void cleanup_status_page(struct intel_engine_cs *engine) @@ -714,11 +764,17 @@ static int engine_setup_common(struct intel_engine_cs *engine) goto err_status; } + engine->sched_engine = i915_sched_engine_create(ENGINE_PHYSICAL); + if (!engine->sched_engine) { + err = -ENOMEM; + goto err_sched_engine; + } + engine->sched_engine->private_data = engine; + err = intel_engine_init_cmd_parser(engine); if (err) goto err_cmd_parser; - intel_engine_init_active(engine, ENGINE_PHYSICAL); intel_engine_init_execlists(engine); intel_engine_init__pm(engine); intel_engine_init_retire(engine); @@ -737,7 +793,9 @@ static int engine_setup_common(struct intel_engine_cs *engine) return 0; err_cmd_parser: - intel_breadcrumbs_free(engine->breadcrumbs); + i915_sched_engine_put(engine->sched_engine); +err_sched_engine: + intel_breadcrumbs_put(engine->breadcrumbs); err_status: cleanup_status_page(engine); return err; @@ -775,11 +833,11 @@ static int measure_breadcrumb_dw(struct intel_context *ce) frame->rq.ring = &frame->ring; mutex_lock(&ce->timeline->mutex); - spin_lock_irq(&engine->active.lock); + spin_lock_irq(&engine->sched_engine->lock); dw = engine->emit_fini_breadcrumb(&frame->rq, frame->cs) - frame->cs; - spin_unlock_irq(&engine->active.lock); + spin_unlock_irq(&engine->sched_engine->lock); mutex_unlock(&ce->timeline->mutex); GEM_BUG_ON(dw & 1); /* RING_TAIL must be qword aligned */ @@ -788,33 +846,13 @@ static int measure_breadcrumb_dw(struct intel_context *ce) return dw; } -void -intel_engine_init_active(struct intel_engine_cs *engine, unsigned int subclass) -{ - INIT_LIST_HEAD(&engine->active.requests); - INIT_LIST_HEAD(&engine->active.hold); - - spin_lock_init(&engine->active.lock); - lockdep_set_subclass(&engine->active.lock, subclass); - - /* - * Due to an interesting quirk in lockdep's internal debug tracking, - * after setting a subclass we must ensure the lock is used. Otherwise, - * nr_unused_locks is incremented once too often. - */ -#ifdef CONFIG_DEBUG_LOCK_ALLOC - local_irq_disable(); - lock_map_acquire(&engine->active.lock.dep_map); - lock_map_release(&engine->active.lock.dep_map); - local_irq_enable(); -#endif -} - -static struct intel_context * -create_pinned_context(struct intel_engine_cs *engine, - unsigned int hwsp, - struct lock_class_key *key, - const char *name) +struct intel_context * +intel_engine_create_pinned_context(struct intel_engine_cs *engine, + struct i915_address_space *vm, + unsigned int ring_size, + unsigned int hwsp, + struct lock_class_key *key, + const char *name) { struct intel_context *ce; int err; @@ -825,6 +863,11 @@ create_pinned_context(struct intel_engine_cs *engine, __set_bit(CONTEXT_BARRIER_BIT, &ce->flags); ce->timeline = page_pack_bits(NULL, hwsp); + ce->ring = NULL; + ce->ring_size = ring_size; + + i915_vm_put(ce->vm); + ce->vm = i915_vm_get(vm); err = intel_context_pin(ce); /* perma-pin so it is always available */ if (err) { @@ -843,7 +886,7 @@ create_pinned_context(struct intel_engine_cs *engine, return ce; } -static void destroy_pinned_context(struct intel_context *ce) +void intel_engine_destroy_pinned_context(struct intel_context *ce) { struct intel_engine_cs *engine = ce->engine; struct i915_vma *hwsp = engine->status_page.vma; @@ -863,8 +906,9 @@ create_kernel_context(struct intel_engine_cs *engine) { static struct lock_class_key kernel; - return create_pinned_context(engine, I915_GEM_HWS_SEQNO_ADDR, - &kernel, "kernel_context"); + return intel_engine_create_pinned_context(engine, engine->gt->vm, SZ_4K, + I915_GEM_HWS_SEQNO_ADDR, + &kernel, "kernel_context"); } /** @@ -907,7 +951,7 @@ static int engine_init_common(struct intel_engine_cs *engine) return 0; err_context: - destroy_pinned_context(ce); + intel_engine_destroy_pinned_context(ce); return ret; } @@ -957,10 +1001,10 @@ int intel_engines_init(struct intel_gt *gt) */ void intel_engine_cleanup_common(struct intel_engine_cs *engine) { - GEM_BUG_ON(!list_empty(&engine->active.requests)); - tasklet_kill(&engine->execlists.tasklet); /* flush the callback */ + GEM_BUG_ON(!list_empty(&engine->sched_engine->requests)); - intel_breadcrumbs_free(engine->breadcrumbs); + i915_sched_engine_put(engine->sched_engine); + intel_breadcrumbs_put(engine->breadcrumbs); intel_engine_fini_retire(engine); intel_engine_cleanup_cmd_parser(engine); @@ -969,7 +1013,7 @@ void intel_engine_cleanup_common(struct intel_engine_cs *engine) fput(engine->default_state); if (engine->kernel_context) - destroy_pinned_context(engine->kernel_context); + intel_engine_destroy_pinned_context(engine->kernel_context); GEM_BUG_ON(!llist_empty(&engine->barrier_tasks)); cleanup_status_page(engine); @@ -1105,45 +1149,8 @@ static u32 read_subslice_reg(const struct intel_engine_cs *engine, int slice, int subslice, i915_reg_t reg) { - struct drm_i915_private *i915 = engine->i915; - struct intel_uncore *uncore = engine->uncore; - u32 mcr_mask, mcr_ss, mcr, old_mcr, val; - enum forcewake_domains fw_domains; - - if (GRAPHICS_VER(i915) >= 11) { - mcr_mask = GEN11_MCR_SLICE_MASK | GEN11_MCR_SUBSLICE_MASK; - mcr_ss = GEN11_MCR_SLICE(slice) | GEN11_MCR_SUBSLICE(subslice); - } else { - mcr_mask = GEN8_MCR_SLICE_MASK | GEN8_MCR_SUBSLICE_MASK; - mcr_ss = GEN8_MCR_SLICE(slice) | GEN8_MCR_SUBSLICE(subslice); - } - - fw_domains = intel_uncore_forcewake_for_reg(uncore, reg, - FW_REG_READ); - fw_domains |= intel_uncore_forcewake_for_reg(uncore, - GEN8_MCR_SELECTOR, - FW_REG_READ | FW_REG_WRITE); - - spin_lock_irq(&uncore->lock); - intel_uncore_forcewake_get__locked(uncore, fw_domains); - - old_mcr = mcr = intel_uncore_read_fw(uncore, GEN8_MCR_SELECTOR); - - mcr &= ~mcr_mask; - mcr |= mcr_ss; - intel_uncore_write_fw(uncore, GEN8_MCR_SELECTOR, mcr); - - val = intel_uncore_read_fw(uncore, reg); - - mcr &= ~mcr_mask; - mcr |= old_mcr & mcr_mask; - - intel_uncore_write_fw(uncore, GEN8_MCR_SELECTOR, mcr); - - intel_uncore_forcewake_put__locked(uncore, fw_domains); - spin_unlock_irq(&uncore->lock); - - return val; + return intel_uncore_read_with_mcr_steering(engine->uncore, reg, + slice, subslice); } /* NB: please notice the memset */ @@ -1243,7 +1250,7 @@ static bool ring_is_idle(struct intel_engine_cs *engine) void __intel_engine_flush_submission(struct intel_engine_cs *engine, bool sync) { - struct tasklet_struct *t = &engine->execlists.tasklet; + struct tasklet_struct *t = &engine->sched_engine->tasklet; if (!t->callback) return; @@ -1283,7 +1290,7 @@ bool intel_engine_is_idle(struct intel_engine_cs *engine) intel_engine_flush_submission(engine); /* ELSP is empty, but there are ready requests? E.g. after reset */ - if (!RB_EMPTY_ROOT(&engine->execlists.queue.rb_root)) + if (!i915_sched_engine_is_empty(engine->sched_engine)) return false; /* Ring stopped? */ @@ -1314,6 +1321,30 @@ bool intel_engines_are_idle(struct intel_gt *gt) return true; } +bool intel_engine_irq_enable(struct intel_engine_cs *engine) +{ + if (!engine->irq_enable) + return false; + + /* Caller disables interrupts */ + spin_lock(&engine->gt->irq_lock); + engine->irq_enable(engine); + spin_unlock(&engine->gt->irq_lock); + + return true; +} + +void intel_engine_irq_disable(struct intel_engine_cs *engine) +{ + if (!engine->irq_disable) + return; + + /* Caller disables interrupts */ + spin_lock(&engine->gt->irq_lock); + engine->irq_disable(engine); + spin_unlock(&engine->gt->irq_lock); +} + void intel_engines_reset_default_submission(struct intel_gt *gt) { struct intel_engine_cs *engine; @@ -1349,7 +1380,7 @@ static struct intel_timeline *get_timeline(struct i915_request *rq) struct intel_timeline *tl; /* - * Even though we are holding the engine->active.lock here, there + * Even though we are holding the engine->sched_engine->lock here, there * is no control over the submission queue per-se and we are * inspecting the active state at a random point in time, with an * unknown queue. Play safe and make sure the timeline remains valid. @@ -1504,8 +1535,8 @@ static void intel_engine_print_registers(struct intel_engine_cs *engine, drm_printf(m, "\tExeclist tasklet queued? %s (%s), preempt? %s, timeslice? %s\n", yesno(test_bit(TASKLET_STATE_SCHED, - &engine->execlists.tasklet.state)), - enableddisabled(!atomic_read(&engine->execlists.tasklet.count)), + &engine->sched_engine->tasklet.state)), + enableddisabled(!atomic_read(&engine->sched_engine->tasklet.count)), repr_timer(&engine->execlists.preempt), repr_timer(&engine->execlists.timer)); @@ -1529,7 +1560,7 @@ static void intel_engine_print_registers(struct intel_engine_cs *engine, idx, hws[idx * 2], hws[idx * 2 + 1]); } - execlists_active_lock_bh(execlists); + i915_sched_engine_active_lock_bh(engine->sched_engine); rcu_read_lock(); for (port = execlists->active; (rq = *port); port++) { char hdr[160]; @@ -1560,7 +1591,7 @@ static void intel_engine_print_registers(struct intel_engine_cs *engine, i915_request_show(m, rq, hdr, 0); } rcu_read_unlock(); - execlists_active_unlock_bh(execlists); + i915_sched_engine_active_unlock_bh(engine->sched_engine); } else if (GRAPHICS_VER(dev_priv) > 6) { drm_printf(m, "\tPP_DIR_BASE: 0x%08x\n", ENGINE_READ(engine, RING_PP_DIR_BASE)); @@ -1650,6 +1681,98 @@ static void print_properties(struct intel_engine_cs *engine, read_ul(&engine->defaults, p->offset)); } +static void engine_dump_request(struct i915_request *rq, struct drm_printer *m, const char *msg) +{ + struct intel_timeline *tl = get_timeline(rq); + + i915_request_show(m, rq, msg, 0); + + drm_printf(m, "\t\tring->start: 0x%08x\n", + i915_ggtt_offset(rq->ring->vma)); + drm_printf(m, "\t\tring->head: 0x%08x\n", + rq->ring->head); + drm_printf(m, "\t\tring->tail: 0x%08x\n", + rq->ring->tail); + drm_printf(m, "\t\tring->emit: 0x%08x\n", + rq->ring->emit); + drm_printf(m, "\t\tring->space: 0x%08x\n", + rq->ring->space); + + if (tl) { + drm_printf(m, "\t\tring->hwsp: 0x%08x\n", + tl->hwsp_offset); + intel_timeline_put(tl); + } + + print_request_ring(m, rq); + + if (rq->context->lrc_reg_state) { + drm_printf(m, "Logical Ring Context:\n"); + hexdump(m, rq->context->lrc_reg_state, PAGE_SIZE); + } +} + +void intel_engine_dump_active_requests(struct list_head *requests, + struct i915_request *hung_rq, + struct drm_printer *m) +{ + struct i915_request *rq; + const char *msg; + enum i915_request_state state; + + list_for_each_entry(rq, requests, sched.link) { + if (rq == hung_rq) + continue; + + state = i915_test_request_state(rq); + if (state < I915_REQUEST_QUEUED) + continue; + + if (state == I915_REQUEST_ACTIVE) + msg = "\t\tactive on engine"; + else + msg = "\t\tactive in queue"; + + engine_dump_request(rq, m, msg); + } +} + +static void engine_dump_active_requests(struct intel_engine_cs *engine, struct drm_printer *m) +{ + struct i915_request *hung_rq = NULL; + struct intel_context *ce; + bool guc; + + /* + * No need for an engine->irq_seqno_barrier() before the seqno reads. + * The GPU is still running so requests are still executing and any + * hardware reads will be out of date by the time they are reported. + * But the intention here is just to report an instantaneous snapshot + * so that's fine. + */ + lockdep_assert_held(&engine->sched_engine->lock); + + drm_printf(m, "\tRequests:\n"); + + guc = intel_uc_uses_guc_submission(&engine->gt->uc); + if (guc) { + ce = intel_engine_get_hung_context(engine); + if (ce) + hung_rq = intel_context_find_active_request(ce); + } else { + hung_rq = intel_engine_execlist_find_hung_request(engine); + } + + if (hung_rq) + engine_dump_request(hung_rq, m, "\t\thung"); + + if (guc) + intel_guc_dump_active_requests(engine, hung_rq, m); + else + intel_engine_dump_active_requests(&engine->sched_engine->requests, + hung_rq, m); +} + void intel_engine_dump(struct intel_engine_cs *engine, struct drm_printer *m, const char *header, ...) @@ -1694,41 +1817,12 @@ void intel_engine_dump(struct intel_engine_cs *engine, i915_reset_count(error)); print_properties(engine, m); - drm_printf(m, "\tRequests:\n"); + spin_lock_irqsave(&engine->sched_engine->lock, flags); + engine_dump_active_requests(engine, m); - spin_lock_irqsave(&engine->active.lock, flags); - rq = intel_engine_find_active_request(engine); - if (rq) { - struct intel_timeline *tl = get_timeline(rq); - - i915_request_show(m, rq, "\t\tactive ", 0); - - drm_printf(m, "\t\tring->start: 0x%08x\n", - i915_ggtt_offset(rq->ring->vma)); - drm_printf(m, "\t\tring->head: 0x%08x\n", - rq->ring->head); - drm_printf(m, "\t\tring->tail: 0x%08x\n", - rq->ring->tail); - drm_printf(m, "\t\tring->emit: 0x%08x\n", - rq->ring->emit); - drm_printf(m, "\t\tring->space: 0x%08x\n", - rq->ring->space); - - if (tl) { - drm_printf(m, "\t\tring->hwsp: 0x%08x\n", - tl->hwsp_offset); - intel_timeline_put(tl); - } - - print_request_ring(m, rq); - - if (rq->context->lrc_reg_state) { - drm_printf(m, "Logical Ring Context:\n"); - hexdump(m, rq->context->lrc_reg_state, PAGE_SIZE); - } - } - drm_printf(m, "\tOn hold?: %lu\n", list_count(&engine->active.hold)); - spin_unlock_irqrestore(&engine->active.lock, flags); + drm_printf(m, "\tOn hold?: %lu\n", + list_count(&engine->sched_engine->hold)); + spin_unlock_irqrestore(&engine->sched_engine->lock, flags); drm_printf(m, "\tMMIO base: 0x%08x\n", engine->mmio_base); wakeref = intel_runtime_pm_get_if_in_use(engine->uncore->rpm); @@ -1785,19 +1879,33 @@ ktime_t intel_engine_get_busy_time(struct intel_engine_cs *engine, ktime_t *now) return total; } -static bool match_ring(struct i915_request *rq) +struct intel_context * +intel_engine_create_virtual(struct intel_engine_cs **siblings, + unsigned int count) { - u32 ring = ENGINE_READ(rq->engine, RING_START); + if (count == 0) + return ERR_PTR(-EINVAL); + + if (count == 1) + return intel_context_create(siblings[0]); - return ring == i915_ggtt_offset(rq->ring->vma); + GEM_BUG_ON(!siblings[0]->cops->create_virtual); + return siblings[0]->cops->create_virtual(siblings, count); } struct i915_request * -intel_engine_find_active_request(struct intel_engine_cs *engine) +intel_engine_execlist_find_hung_request(struct intel_engine_cs *engine) { struct i915_request *request, *active = NULL; /* + * This search does not work in GuC submission mode. However, the GuC + * will report the hanging context directly to the driver itself. So + * the driver should never get here when in GuC mode. + */ + GEM_BUG_ON(intel_uc_uses_guc_submission(&engine->gt->uc)); + + /* * We are called by the error capture, reset and to dump engine * state at random points in time. In particular, note that neither is * crucially ordered with an interrupt. After a hang, the GPU is dead @@ -1808,7 +1916,7 @@ intel_engine_find_active_request(struct intel_engine_cs *engine) * At all other times, we must assume the GPU is still running, but * we only care about the snapshot of this moment. */ - lockdep_assert_held(&engine->active.lock); + lockdep_assert_held(&engine->sched_engine->lock); rcu_read_lock(); request = execlists_active(&engine->execlists); @@ -1826,15 +1934,9 @@ intel_engine_find_active_request(struct intel_engine_cs *engine) if (active) return active; - list_for_each_entry(request, &engine->active.requests, sched.link) { - if (__i915_request_is_complete(request)) - continue; - - if (!__i915_request_has_started(request)) - continue; - - /* More than one preemptible request may match! */ - if (!match_ring(request)) + list_for_each_entry(request, &engine->sched_engine->requests, + sched.link) { + if (i915_test_request_state(request) != I915_REQUEST_ACTIVE) continue; active = request; diff --git a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c index b99ac41695f3..74775ae961b2 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c @@ -70,12 +70,38 @@ static void show_heartbeat(const struct i915_request *rq, { struct drm_printer p = drm_debug_printer("heartbeat"); - intel_engine_dump(engine, &p, - "%s heartbeat {seqno:%llx:%lld, prio:%d} not ticking\n", - engine->name, - rq->fence.context, - rq->fence.seqno, - rq->sched.attr.priority); + if (!rq) { + intel_engine_dump(engine, &p, + "%s heartbeat not ticking\n", + engine->name); + } else { + intel_engine_dump(engine, &p, + "%s heartbeat {seqno:%llx:%lld, prio:%d} not ticking\n", + engine->name, + rq->fence.context, + rq->fence.seqno, + rq->sched.attr.priority); + } +} + +static void +reset_engine(struct intel_engine_cs *engine, struct i915_request *rq) +{ + if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) + show_heartbeat(rq, engine); + + if (intel_engine_uses_guc(engine)) + /* + * GuC itself is toast or GuC's hang detection + * is disabled. Either way, need to find the + * hang culprit manually. + */ + intel_guc_find_hung_context(engine); + + intel_gt_handle_error(engine->gt, engine->mask, + I915_ERROR_CAPTURE, + "stopped heartbeat on %s", + engine->name); } static void heartbeat(struct work_struct *wrk) @@ -102,6 +128,11 @@ static void heartbeat(struct work_struct *wrk) if (intel_gt_is_wedged(engine->gt)) goto out; + if (i915_sched_engine_disabled(engine->sched_engine)) { + reset_engine(engine, engine->heartbeat.systole); + goto out; + } + if (engine->heartbeat.systole) { long delay = READ_ONCE(engine->props.heartbeat_interval_ms); @@ -121,7 +152,7 @@ static void heartbeat(struct work_struct *wrk) * but all other contexts, including the kernel * context are stuck waiting for the signal. */ - } else if (engine->schedule && + } else if (engine->sched_engine->schedule && rq->sched.attr.priority < I915_PRIORITY_BARRIER) { /* * Gradually raise the priority of the heartbeat to @@ -136,16 +167,10 @@ static void heartbeat(struct work_struct *wrk) attr.priority = I915_PRIORITY_BARRIER; local_bh_disable(); - engine->schedule(rq, &attr); + engine->sched_engine->schedule(rq, &attr); local_bh_enable(); } else { - if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) - show_heartbeat(rq, engine); - - intel_gt_handle_error(engine->gt, engine->mask, - I915_ERROR_CAPTURE, - "stopped heartbeat on %s", - engine->name); + reset_engine(engine, rq); } rq->emitted_jiffies = jiffies; @@ -194,6 +219,25 @@ void intel_engine_park_heartbeat(struct intel_engine_cs *engine) i915_request_put(fetch_and_zero(&engine->heartbeat.systole)); } +void intel_gt_unpark_heartbeats(struct intel_gt *gt) +{ + struct intel_engine_cs *engine; + enum intel_engine_id id; + + for_each_engine(engine, gt, id) + if (intel_engine_pm_is_awake(engine)) + intel_engine_unpark_heartbeat(engine); +} + +void intel_gt_park_heartbeats(struct intel_gt *gt) +{ + struct intel_engine_cs *engine; + enum intel_engine_id id; + + for_each_engine(engine, gt, id) + intel_engine_park_heartbeat(engine); +} + void intel_engine_init_heartbeat(struct intel_engine_cs *engine) { INIT_DELAYED_WORK(&engine->heartbeat.work, heartbeat); diff --git a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.h b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.h index a488ea3e84a3..5da6d809a87a 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.h @@ -7,6 +7,7 @@ #define INTEL_ENGINE_HEARTBEAT_H struct intel_engine_cs; +struct intel_gt; void intel_engine_init_heartbeat(struct intel_engine_cs *engine); @@ -16,6 +17,9 @@ int intel_engine_set_heartbeat(struct intel_engine_cs *engine, void intel_engine_park_heartbeat(struct intel_engine_cs *engine); void intel_engine_unpark_heartbeat(struct intel_engine_cs *engine); +void intel_gt_park_heartbeats(struct intel_gt *gt); +void intel_gt_unpark_heartbeats(struct intel_gt *gt); + int intel_engine_pulse(struct intel_engine_cs *engine); int intel_engine_flush_barriers(struct intel_engine_cs *engine); diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c index 47f4397095e5..1f07ac4e0672 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c @@ -275,13 +275,11 @@ static int __engine_park(struct intel_wakeref *wf) intel_breadcrumbs_park(engine->breadcrumbs); /* Must be reset upon idling, or we may miss the busy wakeup. */ - GEM_BUG_ON(engine->execlists.queue_priority_hint != INT_MIN); + GEM_BUG_ON(engine->sched_engine->queue_priority_hint != INT_MIN); if (engine->park) engine->park(engine); - engine->execlists.no_priolist = false; - /* While gt calls i915_vma_parked(), we have to break the lock cycle */ intel_gt_pm_put_async(engine->gt); return 0; diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h index e113f93b3274..ed91bcff20eb 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h @@ -21,32 +21,20 @@ #include "i915_pmu.h" #include "i915_priolist_types.h" #include "i915_selftest.h" -#include "intel_breadcrumbs_types.h" #include "intel_sseu.h" #include "intel_timeline_types.h" #include "intel_uncore.h" #include "intel_wakeref.h" #include "intel_workarounds_types.h" -/* Legacy HW Engine ID */ - -#define RCS0_HW 0 -#define VCS0_HW 1 -#define BCS0_HW 2 -#define VECS0_HW 3 -#define VCS1_HW 4 -#define VCS2_HW 6 -#define VCS3_HW 7 -#define VECS1_HW 12 - -/* Gen11+ HW Engine class + instance */ +/* HW Engine class + instance */ #define RENDER_CLASS 0 #define VIDEO_DECODE_CLASS 1 #define VIDEO_ENHANCEMENT_CLASS 2 #define COPY_ENGINE_CLASS 3 #define OTHER_CLASS 4 #define MAX_ENGINE_CLASS 4 -#define MAX_ENGINE_INSTANCE 3 +#define MAX_ENGINE_INSTANCE 7 #define I915_MAX_SLICES 3 #define I915_MAX_SUBSLICES 8 @@ -59,11 +47,13 @@ struct drm_i915_reg_table; struct i915_gem_context; struct i915_request; struct i915_sched_attr; +struct i915_sched_engine; struct intel_gt; struct intel_ring; struct intel_uncore; +struct intel_breadcrumbs; -typedef u8 intel_engine_mask_t; +typedef u32 intel_engine_mask_t; #define ALL_ENGINES ((intel_engine_mask_t)~0ul) struct intel_hw_status_page { @@ -100,8 +90,8 @@ struct i915_ctx_workarounds { struct i915_vma *vma; }; -#define I915_MAX_VCS 4 -#define I915_MAX_VECS 2 +#define I915_MAX_VCS 8 +#define I915_MAX_VECS 4 /* * Engine IDs definitions. @@ -114,9 +104,15 @@ enum intel_engine_id { VCS1, VCS2, VCS3, + VCS4, + VCS5, + VCS6, + VCS7, #define _VCS(n) (VCS0 + (n)) VECS0, VECS1, + VECS2, + VECS3, #define _VECS(n) (VECS0 + (n)) I915_NUM_ENGINES #define INVALID_ENGINE ((enum intel_engine_id)-1) @@ -138,11 +134,6 @@ struct st_preempt_hang { */ struct intel_engine_execlists { /** - * @tasklet: softirq tasklet for bottom handler - */ - struct tasklet_struct tasklet; - - /** * @timer: kick the current context if its timeslice expires */ struct timer_list timer; @@ -153,11 +144,6 @@ struct intel_engine_execlists { struct timer_list preempt; /** - * @default_priolist: priority list for I915_PRIORITY_NORMAL - */ - struct i915_priolist default_priolist; - - /** * @ccid: identifier for contexts submitted to this engine */ u32 ccid; @@ -192,11 +178,6 @@ struct intel_engine_execlists { u32 reset_ccid; /** - * @no_priolist: priority lists disabled - */ - bool no_priolist; - - /** * @submit_reg: gen-specific execlist submission register * set to the ExecList Submission Port (elsp) register pre-Gen11 and to * the ExecList Submission Queue Contents register array for Gen11+ @@ -238,23 +219,10 @@ struct intel_engine_execlists { unsigned int port_mask; /** - * @queue_priority_hint: Highest pending priority. - * - * When we add requests into the queue, or adjust the priority of - * executing requests, we compute the maximum priority of those - * pending requests. We can then use this value to determine if - * we need to preempt the executing requests to service the queue. - * However, since the we may have recorded the priority of an inflight - * request we wanted to preempt but since completed, at the time of - * dequeuing the priority hint may no longer may match the highest - * available request priority. + * @virtual: Queue of requets on a virtual engine, sorted by priority. + * Each RB entry is a struct i915_priolist containing a list of requests + * of the same priority. */ - int queue_priority_hint; - - /** - * @queue: queue of requests, in priority lists - */ - struct rb_root_cached queue; struct rb_root_cached virtual; /** @@ -295,7 +263,6 @@ struct intel_engine_cs { enum intel_engine_id id; enum intel_engine_id legacy_idx; - unsigned int hw_id; unsigned int guc_id; intel_engine_mask_t mask; @@ -326,15 +293,13 @@ struct intel_engine_cs { struct intel_sseu sseu; - struct { - spinlock_t lock; - struct list_head requests; - struct list_head hold; /* ready requests, but on hold */ - } active; + struct i915_sched_engine *sched_engine; /* keep a request in reserve for a [pm] barrier under oom */ struct i915_request *request_pool; + struct intel_context *hung_ce; + struct llist_head barrier_tasks; struct intel_context *kernel_context; /* pinned */ @@ -419,6 +384,8 @@ struct intel_engine_cs { void (*park)(struct intel_engine_cs *engine); void (*unpark)(struct intel_engine_cs *engine); + void (*bump_serial)(struct intel_engine_cs *engine); + void (*set_default_submission)(struct intel_engine_cs *engine); const struct intel_context_ops *cops; @@ -447,22 +414,13 @@ struct intel_engine_cs { */ void (*submit_request)(struct i915_request *rq); - /* - * Called on signaling of a SUBMIT_FENCE, passing along the signaling - * request down to the bonded pairs. - */ - void (*bond_execute)(struct i915_request *rq, - struct dma_fence *signal); + void (*release)(struct intel_engine_cs *engine); /* - * Call when the priority on a request has changed and it and its - * dependencies may need rescheduling. Note the request itself may - * not be ready to run! + * Add / remove request from engine active tracking */ - void (*schedule)(struct i915_request *request, - const struct i915_sched_attr *attr); - - void (*release)(struct intel_engine_cs *engine); + void (*add_active_request)(struct i915_request *rq); + void (*remove_active_request)(struct i915_request *rq); struct intel_engine_execlists execlists; @@ -485,6 +443,7 @@ struct intel_engine_cs { #define I915_ENGINE_IS_VIRTUAL BIT(5) #define I915_ENGINE_HAS_RELATIVE_MMIO BIT(6) #define I915_ENGINE_REQUIRES_CMD_PARSER BIT(7) +#define I915_ENGINE_WANT_FORCED_PREEMPTION BIT(8) unsigned int flags; /* diff --git a/drivers/gpu/drm/i915/gt/intel_engine_user.c b/drivers/gpu/drm/i915/gt/intel_engine_user.c index 3cca7ea2d6ea..8f8bea08e734 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_user.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_user.c @@ -11,6 +11,7 @@ #include "intel_engine.h" #include "intel_engine_user.h" #include "intel_gt.h" +#include "uc/intel_guc_submission.h" struct intel_engine_cs * intel_engine_lookup_user(struct drm_i915_private *i915, u8 class, u8 instance) @@ -108,13 +109,16 @@ static void set_scheduler_caps(struct drm_i915_private *i915) for_each_uabi_engine(engine, i915) { /* all engines must agree! */ int i; - if (engine->schedule) + if (engine->sched_engine->schedule) enabled |= (I915_SCHEDULER_CAP_ENABLED | I915_SCHEDULER_CAP_PRIORITY); else disabled |= (I915_SCHEDULER_CAP_ENABLED | I915_SCHEDULER_CAP_PRIORITY); + if (intel_uc_uses_guc_submission(&i915->gt.uc)) + enabled |= I915_SCHEDULER_CAP_STATIC_PRIORITY_MAP; + for (i = 0; i < ARRAY_SIZE(map); i++) { if (engine->flags & BIT(map[i].engine)) enabled |= BIT(map[i].sched); diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c index fc77592d88a9..de5f9c86b9a4 100644 --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c @@ -114,6 +114,7 @@ #include "gen8_engine_cs.h" #include "intel_breadcrumbs.h" #include "intel_context.h" +#include "intel_engine_heartbeat.h" #include "intel_engine_pm.h" #include "intel_engine_stats.h" #include "intel_execlists_submission.h" @@ -153,6 +154,12 @@ #define GEN12_CSB_CTX_VALID(csb_dw) \ (FIELD_GET(GEN12_CSB_SW_CTX_ID_MASK, csb_dw) != GEN12_IDLE_CTX_ID) +#define XEHP_CTX_STATUS_SWITCHED_TO_NEW_QUEUE BIT(1) /* upper csb dword */ +#define XEHP_CSB_SW_CTX_ID_MASK GENMASK(31, 10) +#define XEHP_IDLE_CTX_ID 0xFFFF +#define XEHP_CSB_CTX_VALID(csb_dw) \ + (FIELD_GET(XEHP_CSB_SW_CTX_ID_MASK, csb_dw) != XEHP_IDLE_CTX_ID) + /* Typical size of the average request (2 pipecontrols and a MI_BB) */ #define EXECLISTS_REQUEST_SIZE 64 /* bytes */ @@ -182,18 +189,6 @@ struct virtual_engine { int prio; } nodes[I915_NUM_ENGINES]; - /* - * Keep track of bonded pairs -- restrictions upon on our selection - * of physical engines any particular request may be submitted to. - * If we receive a submit-fence from a master engine, we will only - * use one of sibling_mask physical engines. - */ - struct ve_bond { - const struct intel_engine_cs *master; - intel_engine_mask_t sibling_mask; - } *bonds; - unsigned int num_bonds; - /* And finally, which physical engines this virtual engine maps onto. */ unsigned int num_siblings; struct intel_engine_cs *siblings[]; @@ -205,6 +200,9 @@ static struct virtual_engine *to_virtual_engine(struct intel_engine_cs *engine) return container_of(engine, struct virtual_engine, base); } +static struct intel_context * +execlists_create_virtual(struct intel_engine_cs **siblings, unsigned int count); + static struct i915_request * __active_request(const struct intel_timeline * const tl, struct i915_request *rq, @@ -273,11 +271,11 @@ static int effective_prio(const struct i915_request *rq) return prio; } -static int queue_prio(const struct intel_engine_execlists *execlists) +static int queue_prio(const struct i915_sched_engine *sched_engine) { struct rb_node *rb; - rb = rb_first_cached(&execlists->queue); + rb = rb_first_cached(&sched_engine->queue); if (!rb) return INT_MIN; @@ -318,14 +316,14 @@ static bool need_preempt(const struct intel_engine_cs *engine, * to preserve FIFO ordering of dependencies. */ last_prio = max(effective_prio(rq), I915_PRIORITY_NORMAL - 1); - if (engine->execlists.queue_priority_hint <= last_prio) + if (engine->sched_engine->queue_priority_hint <= last_prio) return false; /* * Check against the first request in ELSP[1], it will, thanks to the * power of PI, be the highest priority of that context. */ - if (!list_is_last(&rq->sched.link, &engine->active.requests) && + if (!list_is_last(&rq->sched.link, &engine->sched_engine->requests) && rq_prio(list_next_entry(rq, sched.link)) > last_prio) return true; @@ -340,7 +338,7 @@ static bool need_preempt(const struct intel_engine_cs *engine, * context, it's priority would not exceed ELSP[0] aka last_prio. */ return max(virtual_prio(&engine->execlists), - queue_prio(&engine->execlists)) > last_prio; + queue_prio(engine->sched_engine)) > last_prio; } __maybe_unused static bool @@ -367,10 +365,10 @@ __unwind_incomplete_requests(struct intel_engine_cs *engine) struct list_head *pl; int prio = I915_PRIORITY_INVALID; - lockdep_assert_held(&engine->active.lock); + lockdep_assert_held(&engine->sched_engine->lock); list_for_each_entry_safe_reverse(rq, rn, - &engine->active.requests, + &engine->sched_engine->requests, sched.link) { if (__i915_request_is_complete(rq)) { list_del_init(&rq->sched.link); @@ -382,9 +380,10 @@ __unwind_incomplete_requests(struct intel_engine_cs *engine) GEM_BUG_ON(rq_prio(rq) == I915_PRIORITY_INVALID); if (rq_prio(rq) != prio) { prio = rq_prio(rq); - pl = i915_sched_lookup_priolist(engine, prio); + pl = i915_sched_lookup_priolist(engine->sched_engine, + prio); } - GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root)); + GEM_BUG_ON(i915_sched_engine_is_empty(engine->sched_engine)); list_move(&rq->sched.link, pl); set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); @@ -489,6 +488,16 @@ __execlists_schedule_in(struct i915_request *rq) /* Use a fixed tag for OA and friends */ GEM_BUG_ON(ce->tag <= BITS_PER_LONG); ce->lrc.ccid = ce->tag; + } else if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 50)) { + /* We don't need a strict matching tag, just different values */ + unsigned int tag = ffs(READ_ONCE(engine->context_tag)); + + GEM_BUG_ON(tag == 0 || tag >= BITS_PER_LONG); + clear_bit(tag - 1, &engine->context_tag); + ce->lrc.ccid = tag << (XEHP_SW_CTX_ID_SHIFT - 32); + + BUILD_BUG_ON(BITS_PER_LONG > GEN12_MAX_CONTEXT_HW_ID); + } else { /* We don't need a strict matching tag, just different values */ unsigned int tag = __ffs(engine->context_tag); @@ -534,13 +543,13 @@ resubmit_virtual_request(struct i915_request *rq, struct virtual_engine *ve) { struct intel_engine_cs *engine = rq->engine; - spin_lock_irq(&engine->active.lock); + spin_lock_irq(&engine->sched_engine->lock); clear_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); WRITE_ONCE(rq->engine, &ve->base); ve->base.submit_request(rq); - spin_unlock_irq(&engine->active.lock); + spin_unlock_irq(&engine->sched_engine->lock); } static void kick_siblings(struct i915_request *rq, struct intel_context *ce) @@ -569,7 +578,7 @@ static void kick_siblings(struct i915_request *rq, struct intel_context *ce) resubmit_virtual_request(rq, ve); if (READ_ONCE(ve->request)) - tasklet_hi_schedule(&ve->base.execlists.tasklet); + tasklet_hi_schedule(&ve->base.sched_engine->tasklet); } static void __execlists_schedule_out(struct i915_request * const rq, @@ -579,7 +588,7 @@ static void __execlists_schedule_out(struct i915_request * const rq, unsigned int ccid; /* - * NB process_csb() is not under the engine->active.lock and hence + * NB process_csb() is not under the engine->sched_engine->lock and hence * schedule_out can race with schedule_in meaning that we should * refrain from doing non-trivial work here. */ @@ -599,8 +608,14 @@ static void __execlists_schedule_out(struct i915_request * const rq, intel_engine_add_retire(engine, ce->timeline); ccid = ce->lrc.ccid; - ccid >>= GEN11_SW_CTX_ID_SHIFT - 32; - ccid &= GEN12_MAX_CONTEXT_HW_ID; + if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 50)) { + ccid >>= XEHP_SW_CTX_ID_SHIFT - 32; + ccid &= XEHP_MAX_CONTEXT_HW_ID; + } else { + ccid >>= GEN11_SW_CTX_ID_SHIFT - 32; + ccid &= GEN12_MAX_CONTEXT_HW_ID; + } + if (ccid < BITS_PER_LONG) { GEM_BUG_ON(ccid == 0); GEM_BUG_ON(test_bit(ccid - 1, &engine->context_tag)); @@ -738,9 +753,9 @@ trace_ports(const struct intel_engine_execlists *execlists, } static bool -reset_in_progress(const struct intel_engine_execlists *execlists) +reset_in_progress(const struct intel_engine_cs *engine) { - return unlikely(!__tasklet_is_enabled(&execlists->tasklet)); + return unlikely(!__tasklet_is_enabled(&engine->sched_engine->tasklet)); } static __maybe_unused noinline bool @@ -756,7 +771,7 @@ assert_pending_valid(const struct intel_engine_execlists *execlists, trace_ports(execlists, msg, execlists->pending); /* We may be messing around with the lists during reset, lalala */ - if (reset_in_progress(execlists)) + if (reset_in_progress(engine)) return true; if (!execlists->pending[0]) { @@ -1096,7 +1111,8 @@ static void defer_active(struct intel_engine_cs *engine) if (!rq) return; - defer_request(rq, i915_sched_lookup_priolist(engine, rq_prio(rq))); + defer_request(rq, i915_sched_lookup_priolist(engine->sched_engine, + rq_prio(rq))); } static bool @@ -1133,13 +1149,14 @@ static bool needs_timeslice(const struct intel_engine_cs *engine, return false; /* If ELSP[1] is occupied, always check to see if worth slicing */ - if (!list_is_last_rcu(&rq->sched.link, &engine->active.requests)) { + if (!list_is_last_rcu(&rq->sched.link, + &engine->sched_engine->requests)) { ENGINE_TRACE(engine, "timeslice required for second inflight context\n"); return true; } /* Otherwise, ELSP[0] is by itself, but may be waiting in the queue */ - if (!RB_EMPTY_ROOT(&engine->execlists.queue.rb_root)) { + if (!i915_sched_engine_is_empty(engine->sched_engine)) { ENGINE_TRACE(engine, "timeslice required for queue\n"); return true; } @@ -1187,7 +1204,7 @@ static void start_timeslice(struct intel_engine_cs *engine) * its timeslice, so recheck. */ if (!timer_pending(&el->timer)) - tasklet_hi_schedule(&el->tasklet); + tasklet_hi_schedule(&engine->sched_engine->tasklet); return; } @@ -1236,6 +1253,7 @@ static bool completed(const struct i915_request *rq) static void execlists_dequeue(struct intel_engine_cs *engine) { struct intel_engine_execlists * const execlists = &engine->execlists; + struct i915_sched_engine * const sched_engine = engine->sched_engine; struct i915_request **port = execlists->pending; struct i915_request ** const last_port = port + execlists->port_mask; struct i915_request *last, * const *active; @@ -1265,7 +1283,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) * and context switches) submission. */ - spin_lock(&engine->active.lock); + spin_lock(&sched_engine->lock); /* * If the queue is higher priority than the last @@ -1287,7 +1305,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) last->fence.context, last->fence.seqno, last->sched.attr.priority, - execlists->queue_priority_hint); + sched_engine->queue_priority_hint); record_preemption(execlists); /* @@ -1313,7 +1331,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) yesno(timer_expired(&execlists->timer)), last->fence.context, last->fence.seqno, rq_prio(last), - execlists->queue_priority_hint, + sched_engine->queue_priority_hint, yesno(timeslice_yield(execlists, last))); /* @@ -1365,7 +1383,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) * Even if ELSP[1] is occupied and not worthy * of timeslices, our queue might be. */ - spin_unlock(&engine->active.lock); + spin_unlock(&sched_engine->lock); return; } } @@ -1375,7 +1393,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) while ((ve = first_virtual_engine(engine))) { struct i915_request *rq; - spin_lock(&ve->base.active.lock); + spin_lock(&ve->base.sched_engine->lock); rq = ve->request; if (unlikely(!virtual_matches(ve, rq, engine))) @@ -1384,14 +1402,14 @@ static void execlists_dequeue(struct intel_engine_cs *engine) GEM_BUG_ON(rq->engine != &ve->base); GEM_BUG_ON(rq->context != &ve->context); - if (unlikely(rq_prio(rq) < queue_prio(execlists))) { - spin_unlock(&ve->base.active.lock); + if (unlikely(rq_prio(rq) < queue_prio(sched_engine))) { + spin_unlock(&ve->base.sched_engine->lock); break; } if (last && !can_merge_rq(last, rq)) { - spin_unlock(&ve->base.active.lock); - spin_unlock(&engine->active.lock); + spin_unlock(&ve->base.sched_engine->lock); + spin_unlock(&engine->sched_engine->lock); return; /* leave this for another sibling */ } @@ -1405,7 +1423,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) yesno(engine != ve->siblings[0])); WRITE_ONCE(ve->request, NULL); - WRITE_ONCE(ve->base.execlists.queue_priority_hint, INT_MIN); + WRITE_ONCE(ve->base.sched_engine->queue_priority_hint, INT_MIN); rb = &ve->nodes[engine->id].rb; rb_erase_cached(rb, &execlists->virtual); @@ -1437,7 +1455,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) i915_request_put(rq); unlock: - spin_unlock(&ve->base.active.lock); + spin_unlock(&ve->base.sched_engine->lock); /* * Hmm, we have a bunch of virtual engine requests, @@ -1450,7 +1468,7 @@ unlock: break; } - while ((rb = rb_first_cached(&execlists->queue))) { + while ((rb = rb_first_cached(&sched_engine->queue))) { struct i915_priolist *p = to_priolist(rb); struct i915_request *rq, *rn; @@ -1529,7 +1547,7 @@ unlock: } } - rb_erase_cached(&p->node, &execlists->queue); + rb_erase_cached(&p->node, &sched_engine->queue); i915_priolist_free(p); } done: @@ -1551,8 +1569,9 @@ done: * request triggering preemption on the next dequeue (or subsequent * interrupt for secondary ports). */ - execlists->queue_priority_hint = queue_prio(execlists); - spin_unlock(&engine->active.lock); + sched_engine->queue_priority_hint = queue_prio(sched_engine); + i915_sched_engine_reset_on_empty(sched_engine); + spin_unlock(&sched_engine->lock); /* * We can skip poking the HW if we ended up with exactly the same set @@ -1655,13 +1674,24 @@ static void invalidate_csb_entries(const u64 *first, const u64 *last) * bits 44-46: reserved * bits 47-57: sw context id of the lrc the GT switched away from * bits 58-63: sw counter of the lrc the GT switched away from + * + * Xe_HP csb shuffles things around compared to TGL: + * + * bits 0-3: context switch detail (same possible values as TGL) + * bits 4-9: engine instance + * bits 10-25: sw context id of the lrc the GT switched to + * bits 26-31: sw counter of the lrc the GT switched to + * bit 32: semaphore wait mode (poll or signal), Only valid when + * switch detail is set to "wait on semaphore" + * bit 33: switched to new queue + * bits 34-41: wait detail (for switch detail 1 to 4) + * bits 42-57: sw context id of the lrc the GT switched away from + * bits 58-63: sw counter of the lrc the GT switched away from */ -static bool gen12_csb_parse(const u64 csb) +static inline bool +__gen12_csb_parse(bool ctx_to_valid, bool ctx_away_valid, bool new_queue, + u8 switch_detail) { - bool ctx_away_valid = GEN12_CSB_CTX_VALID(upper_32_bits(csb)); - bool new_queue = - lower_32_bits(csb) & GEN12_CTX_STATUS_SWITCHED_TO_NEW_QUEUE; - /* * The context switch detail is not guaranteed to be 5 when a preemption * occurs, so we can't just check for that. The check below works for @@ -1670,7 +1700,7 @@ static bool gen12_csb_parse(const u64 csb) * would require some extra handling, but we don't support that. */ if (!ctx_away_valid || new_queue) { - GEM_BUG_ON(!GEN12_CSB_CTX_VALID(lower_32_bits(csb))); + GEM_BUG_ON(!ctx_to_valid); return true; } @@ -1679,10 +1709,26 @@ static bool gen12_csb_parse(const u64 csb) * context switch on an unsuccessful wait instruction since we always * use polling mode. */ - GEM_BUG_ON(GEN12_CTX_SWITCH_DETAIL(upper_32_bits(csb))); + GEM_BUG_ON(switch_detail); return false; } +static bool xehp_csb_parse(const u64 csb) +{ + return __gen12_csb_parse(XEHP_CSB_CTX_VALID(lower_32_bits(csb)), /* cxt to */ + XEHP_CSB_CTX_VALID(upper_32_bits(csb)), /* cxt away */ + upper_32_bits(csb) & XEHP_CTX_STATUS_SWITCHED_TO_NEW_QUEUE, + GEN12_CTX_SWITCH_DETAIL(lower_32_bits(csb))); +} + +static bool gen12_csb_parse(const u64 csb) +{ + return __gen12_csb_parse(GEN12_CSB_CTX_VALID(lower_32_bits(csb)), /* cxt to */ + GEN12_CSB_CTX_VALID(upper_32_bits(csb)), /* cxt away */ + lower_32_bits(csb) & GEN12_CTX_STATUS_SWITCHED_TO_NEW_QUEUE, + GEN12_CTX_SWITCH_DETAIL(upper_32_bits(csb))); +} + static bool gen8_csb_parse(const u64 csb) { return csb & (GEN8_CTX_STATUS_IDLE_ACTIVE | GEN8_CTX_STATUS_PREEMPTED); @@ -1767,8 +1813,8 @@ process_csb(struct intel_engine_cs *engine, struct i915_request **inactive) * access. Either we are inside the tasklet, or the tasklet is disabled * and we assume that is only inside the reset paths and so serialised. */ - GEM_BUG_ON(!tasklet_is_locked(&execlists->tasklet) && - !reset_in_progress(execlists)); + GEM_BUG_ON(!tasklet_is_locked(&engine->sched_engine->tasklet) && + !reset_in_progress(engine)); /* * Note that csb_write, csb_status may be either in HWSP or mmio. @@ -1847,7 +1893,9 @@ process_csb(struct intel_engine_cs *engine, struct i915_request **inactive) ENGINE_TRACE(engine, "csb[%d]: status=0x%08x:0x%08x\n", head, upper_32_bits(csb), lower_32_bits(csb)); - if (GRAPHICS_VER(engine->i915) >= 12) + if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 50)) + promote = xehp_csb_parse(csb); + else if (GRAPHICS_VER(engine->i915) >= 12) promote = gen12_csb_parse(csb); else promote = gen8_csb_parse(csb); @@ -1979,7 +2027,8 @@ static void __execlists_hold(struct i915_request *rq) __i915_request_unsubmit(rq); clear_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); - list_move_tail(&rq->sched.link, &rq->engine->active.hold); + list_move_tail(&rq->sched.link, + &rq->engine->sched_engine->hold); i915_request_set_hold(rq); RQ_TRACE(rq, "on hold\n"); @@ -2016,7 +2065,7 @@ static bool execlists_hold(struct intel_engine_cs *engine, if (i915_request_on_hold(rq)) return false; - spin_lock_irq(&engine->active.lock); + spin_lock_irq(&engine->sched_engine->lock); if (__i915_request_is_complete(rq)) { /* too late! */ rq = NULL; @@ -2032,10 +2081,10 @@ static bool execlists_hold(struct intel_engine_cs *engine, GEM_BUG_ON(i915_request_on_hold(rq)); GEM_BUG_ON(rq->engine != engine); __execlists_hold(rq); - GEM_BUG_ON(list_empty(&engine->active.hold)); + GEM_BUG_ON(list_empty(&engine->sched_engine->hold)); unlock: - spin_unlock_irq(&engine->active.lock); + spin_unlock_irq(&engine->sched_engine->lock); return rq; } @@ -2079,7 +2128,7 @@ static void __execlists_unhold(struct i915_request *rq) i915_request_clear_hold(rq); list_move_tail(&rq->sched.link, - i915_sched_lookup_priolist(rq->engine, + i915_sched_lookup_priolist(rq->engine->sched_engine, rq_prio(rq))); set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); @@ -2115,7 +2164,7 @@ static void __execlists_unhold(struct i915_request *rq) static void execlists_unhold(struct intel_engine_cs *engine, struct i915_request *rq) { - spin_lock_irq(&engine->active.lock); + spin_lock_irq(&engine->sched_engine->lock); /* * Move this request back to the priority queue, and all of its @@ -2123,12 +2172,12 @@ static void execlists_unhold(struct intel_engine_cs *engine, */ __execlists_unhold(rq); - if (rq_prio(rq) > engine->execlists.queue_priority_hint) { - engine->execlists.queue_priority_hint = rq_prio(rq); - tasklet_hi_schedule(&engine->execlists.tasklet); + if (rq_prio(rq) > engine->sched_engine->queue_priority_hint) { + engine->sched_engine->queue_priority_hint = rq_prio(rq); + tasklet_hi_schedule(&engine->sched_engine->tasklet); } - spin_unlock_irq(&engine->active.lock); + spin_unlock_irq(&engine->sched_engine->lock); } struct execlists_capture { @@ -2258,13 +2307,13 @@ static void execlists_capture(struct intel_engine_cs *engine) if (!cap) return; - spin_lock_irq(&engine->active.lock); + spin_lock_irq(&engine->sched_engine->lock); cap->rq = active_context(engine, active_ccid(engine)); if (cap->rq) { cap->rq = active_request(cap->rq->context->timeline, cap->rq); cap->rq = i915_request_get_rcu(cap->rq); } - spin_unlock_irq(&engine->active.lock); + spin_unlock_irq(&engine->sched_engine->lock); if (!cap->rq) goto err_free; @@ -2316,13 +2365,13 @@ static void execlists_reset(struct intel_engine_cs *engine, const char *msg) ENGINE_TRACE(engine, "reset for %s\n", msg); /* Mark this tasklet as disabled to avoid waiting for it to complete */ - tasklet_disable_nosync(&engine->execlists.tasklet); + tasklet_disable_nosync(&engine->sched_engine->tasklet); ring_set_paused(engine, 1); /* Freeze the current request in place */ execlists_capture(engine); intel_engine_reset(engine, msg); - tasklet_enable(&engine->execlists.tasklet); + tasklet_enable(&engine->sched_engine->tasklet); clear_and_wake_up_bit(bit, lock); } @@ -2345,8 +2394,9 @@ static bool preempt_timeout(const struct intel_engine_cs *const engine) */ static void execlists_submission_tasklet(struct tasklet_struct *t) { - struct intel_engine_cs * const engine = - from_tasklet(engine, t, execlists.tasklet); + struct i915_sched_engine *sched_engine = + from_tasklet(sched_engine, t, tasklet); + struct intel_engine_cs * const engine = sched_engine->private_data; struct i915_request *post[2 * EXECLIST_MAX_PORTS]; struct i915_request **inactive; @@ -2421,13 +2471,16 @@ static void execlists_irq_handler(struct intel_engine_cs *engine, u16 iir) intel_engine_signal_breadcrumbs(engine); if (tasklet) - tasklet_hi_schedule(&engine->execlists.tasklet); + tasklet_hi_schedule(&engine->sched_engine->tasklet); } static void __execlists_kick(struct intel_engine_execlists *execlists) { + struct intel_engine_cs *engine = + container_of(execlists, typeof(*engine), execlists); + /* Kick the tasklet for some interrupt coalescing and reset handling */ - tasklet_hi_schedule(&execlists->tasklet); + tasklet_hi_schedule(&engine->sched_engine->tasklet); } #define execlists_kick(t, member) \ @@ -2448,19 +2501,20 @@ static void queue_request(struct intel_engine_cs *engine, { GEM_BUG_ON(!list_empty(&rq->sched.link)); list_add_tail(&rq->sched.link, - i915_sched_lookup_priolist(engine, rq_prio(rq))); + i915_sched_lookup_priolist(engine->sched_engine, + rq_prio(rq))); set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); } static bool submit_queue(struct intel_engine_cs *engine, const struct i915_request *rq) { - struct intel_engine_execlists *execlists = &engine->execlists; + struct i915_sched_engine *sched_engine = engine->sched_engine; - if (rq_prio(rq) <= execlists->queue_priority_hint) + if (rq_prio(rq) <= sched_engine->queue_priority_hint) return false; - execlists->queue_priority_hint = rq_prio(rq); + sched_engine->queue_priority_hint = rq_prio(rq); return true; } @@ -2468,7 +2522,7 @@ static bool ancestor_on_hold(const struct intel_engine_cs *engine, const struct i915_request *rq) { GEM_BUG_ON(i915_request_on_hold(rq)); - return !list_empty(&engine->active.hold) && hold_request(rq); + return !list_empty(&engine->sched_engine->hold) && hold_request(rq); } static void execlists_submit_request(struct i915_request *request) @@ -2477,23 +2531,24 @@ static void execlists_submit_request(struct i915_request *request) unsigned long flags; /* Will be called from irq-context when using foreign fences. */ - spin_lock_irqsave(&engine->active.lock, flags); + spin_lock_irqsave(&engine->sched_engine->lock, flags); if (unlikely(ancestor_on_hold(engine, request))) { RQ_TRACE(request, "ancestor on hold\n"); - list_add_tail(&request->sched.link, &engine->active.hold); + list_add_tail(&request->sched.link, + &engine->sched_engine->hold); i915_request_set_hold(request); } else { queue_request(engine, request); - GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root)); + GEM_BUG_ON(i915_sched_engine_is_empty(engine->sched_engine)); GEM_BUG_ON(list_empty(&request->sched.link)); if (submit_queue(engine, request)) __execlists_kick(&engine->execlists); } - spin_unlock_irqrestore(&engine->active.lock, flags); + spin_unlock_irqrestore(&engine->sched_engine->lock, flags); } static int @@ -2533,11 +2588,26 @@ static int execlists_context_alloc(struct intel_context *ce) return lrc_alloc(ce, ce->engine); } +static void execlists_context_cancel_request(struct intel_context *ce, + struct i915_request *rq) +{ + struct intel_engine_cs *engine = NULL; + + i915_request_active_engine(rq, &engine); + + if (engine && intel_engine_pulse(engine)) + intel_gt_handle_error(engine->gt, engine->mask, 0, + "request cancellation by %s", + current->comm); +} + static const struct intel_context_ops execlists_context_ops = { .flags = COPS_HAS_INFLIGHT, .alloc = execlists_context_alloc, + .cancel_request = execlists_context_cancel_request, + .pre_pin = execlists_context_pre_pin, .pin = execlists_context_pin, .unpin = lrc_unpin, @@ -2548,6 +2618,8 @@ static const struct intel_context_ops execlists_context_ops = { .reset = lrc_reset, .destroy = lrc_destroy, + + .create_virtual = execlists_create_virtual, }; static int emit_pdps(struct i915_request *rq) @@ -2800,10 +2872,8 @@ static int execlists_resume(struct intel_engine_cs *engine) static void execlists_reset_prepare(struct intel_engine_cs *engine) { - struct intel_engine_execlists * const execlists = &engine->execlists; - ENGINE_TRACE(engine, "depth<-%d\n", - atomic_read(&execlists->tasklet.count)); + atomic_read(&engine->sched_engine->tasklet.count)); /* * Prevent request submission to the hardware until we have @@ -2814,8 +2884,8 @@ static void execlists_reset_prepare(struct intel_engine_cs *engine) * Turning off the execlists->tasklet until the reset is over * prevents the race. */ - __tasklet_disable_sync_once(&execlists->tasklet); - GEM_BUG_ON(!reset_in_progress(execlists)); + __tasklet_disable_sync_once(&engine->sched_engine->tasklet); + GEM_BUG_ON(!reset_in_progress(engine)); /* * We stop engines, otherwise we might get failed reset and a @@ -2957,24 +3027,26 @@ static void execlists_reset_rewind(struct intel_engine_cs *engine, bool stalled) /* Push back any incomplete requests for replay after the reset. */ rcu_read_lock(); - spin_lock_irqsave(&engine->active.lock, flags); + spin_lock_irqsave(&engine->sched_engine->lock, flags); __unwind_incomplete_requests(engine); - spin_unlock_irqrestore(&engine->active.lock, flags); + spin_unlock_irqrestore(&engine->sched_engine->lock, flags); rcu_read_unlock(); } static void nop_submission_tasklet(struct tasklet_struct *t) { - struct intel_engine_cs * const engine = - from_tasklet(engine, t, execlists.tasklet); + struct i915_sched_engine *sched_engine = + from_tasklet(sched_engine, t, tasklet); + struct intel_engine_cs * const engine = sched_engine->private_data; /* The driver is wedged; don't process any more events. */ - WRITE_ONCE(engine->execlists.queue_priority_hint, INT_MIN); + WRITE_ONCE(engine->sched_engine->queue_priority_hint, INT_MIN); } static void execlists_reset_cancel(struct intel_engine_cs *engine) { struct intel_engine_execlists * const execlists = &engine->execlists; + struct i915_sched_engine * const sched_engine = engine->sched_engine; struct i915_request *rq, *rn; struct rb_node *rb; unsigned long flags; @@ -2998,15 +3070,15 @@ static void execlists_reset_cancel(struct intel_engine_cs *engine) execlists_reset_csb(engine, true); rcu_read_lock(); - spin_lock_irqsave(&engine->active.lock, flags); + spin_lock_irqsave(&engine->sched_engine->lock, flags); /* Mark all executing requests as skipped. */ - list_for_each_entry(rq, &engine->active.requests, sched.link) + list_for_each_entry(rq, &engine->sched_engine->requests, sched.link) i915_request_put(i915_request_mark_eio(rq)); intel_engine_signal_breadcrumbs(engine); /* Flush the queued requests to the timeline list (for retiring). */ - while ((rb = rb_first_cached(&execlists->queue))) { + while ((rb = rb_first_cached(&sched_engine->queue))) { struct i915_priolist *p = to_priolist(rb); priolist_for_each_request_consume(rq, rn, p) { @@ -3016,12 +3088,12 @@ static void execlists_reset_cancel(struct intel_engine_cs *engine) } } - rb_erase_cached(&p->node, &execlists->queue); + rb_erase_cached(&p->node, &sched_engine->queue); i915_priolist_free(p); } /* On-hold requests will be flushed to timeline upon their release */ - list_for_each_entry(rq, &engine->active.hold, sched.link) + list_for_each_entry(rq, &sched_engine->hold, sched.link) i915_request_put(i915_request_mark_eio(rq)); /* Cancel all attached virtual engines */ @@ -3032,7 +3104,7 @@ static void execlists_reset_cancel(struct intel_engine_cs *engine) rb_erase_cached(rb, &execlists->virtual); RB_CLEAR_NODE(rb); - spin_lock(&ve->base.active.lock); + spin_lock(&ve->base.sched_engine->lock); rq = fetch_and_zero(&ve->request); if (rq) { if (i915_request_mark_eio(rq)) { @@ -3042,20 +3114,20 @@ static void execlists_reset_cancel(struct intel_engine_cs *engine) } i915_request_put(rq); - ve->base.execlists.queue_priority_hint = INT_MIN; + ve->base.sched_engine->queue_priority_hint = INT_MIN; } - spin_unlock(&ve->base.active.lock); + spin_unlock(&ve->base.sched_engine->lock); } /* Remaining _unready_ requests will be nop'ed when submitted */ - execlists->queue_priority_hint = INT_MIN; - execlists->queue = RB_ROOT_CACHED; + sched_engine->queue_priority_hint = INT_MIN; + sched_engine->queue = RB_ROOT_CACHED; - GEM_BUG_ON(__tasklet_is_enabled(&execlists->tasklet)); - execlists->tasklet.callback = nop_submission_tasklet; + GEM_BUG_ON(__tasklet_is_enabled(&engine->sched_engine->tasklet)); + engine->sched_engine->tasklet.callback = nop_submission_tasklet; - spin_unlock_irqrestore(&engine->active.lock, flags); + spin_unlock_irqrestore(&engine->sched_engine->lock, flags); rcu_read_unlock(); } @@ -3073,14 +3145,14 @@ static void execlists_reset_finish(struct intel_engine_cs *engine) * reset as the next level of recovery, and as a final resort we * will declare the device wedged. */ - GEM_BUG_ON(!reset_in_progress(execlists)); + GEM_BUG_ON(!reset_in_progress(engine)); /* And kick in case we missed a new request submission. */ - if (__tasklet_enable(&execlists->tasklet)) + if (__tasklet_enable(&engine->sched_engine->tasklet)) __execlists_kick(execlists); ENGINE_TRACE(engine, "depth->%d\n", - atomic_read(&execlists->tasklet.count)); + atomic_read(&engine->sched_engine->tasklet.count)); } static void gen8_logical_ring_enable_irq(struct intel_engine_cs *engine) @@ -3101,6 +3173,42 @@ static void execlists_park(struct intel_engine_cs *engine) cancel_timer(&engine->execlists.preempt); } +static void add_to_engine(struct i915_request *rq) +{ + lockdep_assert_held(&rq->engine->sched_engine->lock); + list_move_tail(&rq->sched.link, &rq->engine->sched_engine->requests); +} + +static void remove_from_engine(struct i915_request *rq) +{ + struct intel_engine_cs *engine, *locked; + + /* + * Virtual engines complicate acquiring the engine timeline lock, + * as their rq->engine pointer is not stable until under that + * engine lock. The simple ploy we use is to take the lock then + * check that the rq still belongs to the newly locked engine. + */ + locked = READ_ONCE(rq->engine); + spin_lock_irq(&locked->sched_engine->lock); + while (unlikely(locked != (engine = READ_ONCE(rq->engine)))) { + spin_unlock(&locked->sched_engine->lock); + spin_lock(&engine->sched_engine->lock); + locked = engine; + } + list_del_init(&rq->sched.link); + + clear_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); + clear_bit(I915_FENCE_FLAG_HOLD, &rq->fence.flags); + + /* Prevent further __await_execution() registering a cb, then flush */ + set_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags); + + spin_unlock_irq(&locked->sched_engine->lock); + + i915_request_notify_execute_cb_imm(rq); +} + static bool can_preempt(struct intel_engine_cs *engine) { if (GRAPHICS_VER(engine->i915) > 8) @@ -3110,11 +3218,62 @@ static bool can_preempt(struct intel_engine_cs *engine) return engine->class != RENDER_CLASS; } +static void kick_execlists(const struct i915_request *rq, int prio) +{ + struct intel_engine_cs *engine = rq->engine; + struct i915_sched_engine *sched_engine = engine->sched_engine; + const struct i915_request *inflight; + + /* + * We only need to kick the tasklet once for the high priority + * new context we add into the queue. + */ + if (prio <= sched_engine->queue_priority_hint) + return; + + rcu_read_lock(); + + /* Nothing currently active? We're overdue for a submission! */ + inflight = execlists_active(&engine->execlists); + if (!inflight) + goto unlock; + + /* + * If we are already the currently executing context, don't + * bother evaluating if we should preempt ourselves. + */ + if (inflight->context == rq->context) + goto unlock; + + ENGINE_TRACE(engine, + "bumping queue-priority-hint:%d for rq:%llx:%lld, inflight:%llx:%lld prio %d\n", + prio, + rq->fence.context, rq->fence.seqno, + inflight->fence.context, inflight->fence.seqno, + inflight->sched.attr.priority); + + sched_engine->queue_priority_hint = prio; + + /* + * Allow preemption of low -> normal -> high, but we do + * not allow low priority tasks to preempt other low priority + * tasks under the impression that latency for low priority + * tasks does not matter (as much as background throughput), + * so kiss. + */ + if (prio >= max(I915_PRIORITY_NORMAL, rq_prio(inflight))) + tasklet_hi_schedule(&sched_engine->tasklet); + +unlock: + rcu_read_unlock(); +} + static void execlists_set_default_submission(struct intel_engine_cs *engine) { engine->submit_request = execlists_submit_request; - engine->schedule = i915_schedule; - engine->execlists.tasklet.callback = execlists_submission_tasklet; + engine->sched_engine->schedule = i915_schedule; + engine->sched_engine->kick_backend = kick_execlists; + engine->sched_engine->tasklet.callback = execlists_submission_tasklet; } static void execlists_shutdown(struct intel_engine_cs *engine) @@ -3122,7 +3281,7 @@ static void execlists_shutdown(struct intel_engine_cs *engine) /* Synchronise with residual timers and any softirq they raise */ del_timer_sync(&engine->execlists.timer); del_timer_sync(&engine->execlists.preempt); - tasklet_kill(&engine->execlists.tasklet); + tasklet_kill(&engine->sched_engine->tasklet); } static void execlists_release(struct intel_engine_cs *engine) @@ -3144,6 +3303,8 @@ logical_ring_default_vfuncs(struct intel_engine_cs *engine) engine->cops = &execlists_context_ops; engine->request_alloc = execlists_request_alloc; + engine->add_active_request = add_to_engine; + engine->remove_active_request = remove_from_engine; engine->reset.prepare = execlists_reset_prepare; engine->reset.rewind = execlists_reset_rewind; @@ -3238,7 +3399,7 @@ int intel_execlists_submission_setup(struct intel_engine_cs *engine) struct intel_uncore *uncore = engine->uncore; u32 base = engine->mmio_base; - tasklet_setup(&engine->execlists.tasklet, execlists_submission_tasklet); + tasklet_setup(&engine->sched_engine->tasklet, execlists_submission_tasklet); timer_setup(&engine->execlists.timer, execlists_timeslice, 0); timer_setup(&engine->execlists.preempt, execlists_preempt, 0); @@ -3255,6 +3416,10 @@ int intel_execlists_submission_setup(struct intel_engine_cs *engine) i915_mmio_reg_offset(RING_EXECLIST_SQ_CONTENTS(base)); execlists->ctrl_reg = uncore->regs + i915_mmio_reg_offset(RING_EXECLIST_CONTROL(base)); + + engine->fw_domain = intel_uncore_forcewake_for_reg(engine->uncore, + RING_EXECLIST_CONTROL(engine->mmio_base), + FW_REG_WRITE); } else { execlists->submit_reg = uncore->regs + i915_mmio_reg_offset(RING_ELSP(base)); @@ -3272,7 +3437,8 @@ int intel_execlists_submission_setup(struct intel_engine_cs *engine) execlists->csb_size = GEN11_CSB_ENTRIES; engine->context_tag = GENMASK(BITS_PER_LONG - 2, 0); - if (GRAPHICS_VER(engine->i915) >= 11) { + if (GRAPHICS_VER(engine->i915) >= 11 && + GRAPHICS_VER_FULL(engine->i915) < IP_VER(12, 50)) { execlists->ccid |= engine->instance << (GEN11_ENGINE_INSTANCE_SHIFT - 32); execlists->ccid |= engine->class << (GEN11_ENGINE_CLASS_SHIFT - 32); } @@ -3286,7 +3452,7 @@ int intel_execlists_submission_setup(struct intel_engine_cs *engine) static struct list_head *virtual_queue(struct virtual_engine *ve) { - return &ve->base.execlists.default_priolist.requests; + return &ve->base.sched_engine->default_priolist.requests; } static void rcu_virtual_context_destroy(struct work_struct *wrk) @@ -3301,7 +3467,7 @@ static void rcu_virtual_context_destroy(struct work_struct *wrk) if (unlikely(ve->request)) { struct i915_request *old; - spin_lock_irq(&ve->base.active.lock); + spin_lock_irq(&ve->base.sched_engine->lock); old = fetch_and_zero(&ve->request); if (old) { @@ -3310,7 +3476,7 @@ static void rcu_virtual_context_destroy(struct work_struct *wrk) i915_request_put(old); } - spin_unlock_irq(&ve->base.active.lock); + spin_unlock_irq(&ve->base.sched_engine->lock); } /* @@ -3320,7 +3486,7 @@ static void rcu_virtual_context_destroy(struct work_struct *wrk) * rbtrees as in the case it is running in parallel, it may reinsert * the rb_node into a sibling. */ - tasklet_kill(&ve->base.execlists.tasklet); + tasklet_kill(&ve->base.sched_engine->tasklet); /* Decouple ourselves from the siblings, no more access allowed. */ for (n = 0; n < ve->num_siblings; n++) { @@ -3330,24 +3496,26 @@ static void rcu_virtual_context_destroy(struct work_struct *wrk) if (RB_EMPTY_NODE(node)) continue; - spin_lock_irq(&sibling->active.lock); + spin_lock_irq(&sibling->sched_engine->lock); - /* Detachment is lazily performed in the execlists tasklet */ + /* Detachment is lazily performed in the sched_engine->tasklet */ if (!RB_EMPTY_NODE(node)) rb_erase_cached(node, &sibling->execlists.virtual); - spin_unlock_irq(&sibling->active.lock); + spin_unlock_irq(&sibling->sched_engine->lock); } - GEM_BUG_ON(__tasklet_is_scheduled(&ve->base.execlists.tasklet)); + GEM_BUG_ON(__tasklet_is_scheduled(&ve->base.sched_engine->tasklet)); GEM_BUG_ON(!list_empty(virtual_queue(ve))); lrc_fini(&ve->context); intel_context_fini(&ve->context); - intel_breadcrumbs_free(ve->base.breadcrumbs); + if (ve->base.breadcrumbs) + intel_breadcrumbs_put(ve->base.breadcrumbs); + if (ve->base.sched_engine) + i915_sched_engine_put(ve->base.sched_engine); intel_engine_free_request_pool(&ve->base); - kfree(ve->bonds); kfree(ve); } @@ -3440,11 +3608,24 @@ static void virtual_context_exit(struct intel_context *ce) intel_engine_pm_put(ve->siblings[n]); } +static struct intel_engine_cs * +virtual_get_sibling(struct intel_engine_cs *engine, unsigned int sibling) +{ + struct virtual_engine *ve = to_virtual_engine(engine); + + if (sibling >= ve->num_siblings) + return NULL; + + return ve->siblings[sibling]; +} + static const struct intel_context_ops virtual_context_ops = { .flags = COPS_HAS_INFLIGHT, .alloc = virtual_context_alloc, + .cancel_request = execlists_context_cancel_request, + .pre_pin = virtual_context_pre_pin, .pin = virtual_context_pin, .unpin = lrc_unpin, @@ -3454,6 +3635,8 @@ static const struct intel_context_ops virtual_context_ops = { .exit = virtual_context_exit, .destroy = virtual_context_destroy, + + .get_sibling = virtual_get_sibling, }; static intel_engine_mask_t virtual_submission_mask(struct virtual_engine *ve) @@ -3475,16 +3658,18 @@ static intel_engine_mask_t virtual_submission_mask(struct virtual_engine *ve) ENGINE_TRACE(&ve->base, "rq=%llx:%lld, mask=%x, prio=%d\n", rq->fence.context, rq->fence.seqno, - mask, ve->base.execlists.queue_priority_hint); + mask, ve->base.sched_engine->queue_priority_hint); return mask; } static void virtual_submission_tasklet(struct tasklet_struct *t) { + struct i915_sched_engine *sched_engine = + from_tasklet(sched_engine, t, tasklet); struct virtual_engine * const ve = - from_tasklet(ve, t, base.execlists.tasklet); - const int prio = READ_ONCE(ve->base.execlists.queue_priority_hint); + (struct virtual_engine *)sched_engine->private_data; + const int prio = READ_ONCE(sched_engine->queue_priority_hint); intel_engine_mask_t mask; unsigned int n; @@ -3503,7 +3688,7 @@ static void virtual_submission_tasklet(struct tasklet_struct *t) if (!READ_ONCE(ve->request)) break; /* already handled by a sibling's tasklet */ - spin_lock_irq(&sibling->active.lock); + spin_lock_irq(&sibling->sched_engine->lock); if (unlikely(!(mask & sibling->mask))) { if (!RB_EMPTY_NODE(&node->rb)) { @@ -3552,11 +3737,11 @@ static void virtual_submission_tasklet(struct tasklet_struct *t) submit_engine: GEM_BUG_ON(RB_EMPTY_NODE(&node->rb)); node->prio = prio; - if (first && prio > sibling->execlists.queue_priority_hint) - tasklet_hi_schedule(&sibling->execlists.tasklet); + if (first && prio > sibling->sched_engine->queue_priority_hint) + tasklet_hi_schedule(&sibling->sched_engine->tasklet); unlock_engine: - spin_unlock_irq(&sibling->active.lock); + spin_unlock_irq(&sibling->sched_engine->lock); if (intel_context_inflight(&ve->context)) break; @@ -3574,7 +3759,7 @@ static void virtual_submit_request(struct i915_request *rq) GEM_BUG_ON(ve->base.submit_request != virtual_submit_request); - spin_lock_irqsave(&ve->base.active.lock, flags); + spin_lock_irqsave(&ve->base.sched_engine->lock, flags); /* By the time we resubmit a request, it may be completed */ if (__i915_request_is_complete(rq)) { @@ -3588,68 +3773,25 @@ static void virtual_submit_request(struct i915_request *rq) i915_request_put(ve->request); } - ve->base.execlists.queue_priority_hint = rq_prio(rq); + ve->base.sched_engine->queue_priority_hint = rq_prio(rq); ve->request = i915_request_get(rq); GEM_BUG_ON(!list_empty(virtual_queue(ve))); list_move_tail(&rq->sched.link, virtual_queue(ve)); - tasklet_hi_schedule(&ve->base.execlists.tasklet); + tasklet_hi_schedule(&ve->base.sched_engine->tasklet); unlock: - spin_unlock_irqrestore(&ve->base.active.lock, flags); + spin_unlock_irqrestore(&ve->base.sched_engine->lock, flags); } -static struct ve_bond * -virtual_find_bond(struct virtual_engine *ve, - const struct intel_engine_cs *master) -{ - int i; - - for (i = 0; i < ve->num_bonds; i++) { - if (ve->bonds[i].master == master) - return &ve->bonds[i]; - } - - return NULL; -} - -static void -virtual_bond_execute(struct i915_request *rq, struct dma_fence *signal) -{ - struct virtual_engine *ve = to_virtual_engine(rq->engine); - intel_engine_mask_t allowed, exec; - struct ve_bond *bond; - - allowed = ~to_request(signal)->engine->mask; - - bond = virtual_find_bond(ve, to_request(signal)->engine); - if (bond) - allowed &= bond->sibling_mask; - - /* Restrict the bonded request to run on only the available engines */ - exec = READ_ONCE(rq->execution_mask); - while (!try_cmpxchg(&rq->execution_mask, &exec, exec & allowed)) - ; - - /* Prevent the master from being re-run on the bonded engines */ - to_request(signal)->execution_mask &= ~allowed; -} - -struct intel_context * -intel_execlists_create_virtual(struct intel_engine_cs **siblings, - unsigned int count) +static struct intel_context * +execlists_create_virtual(struct intel_engine_cs **siblings, unsigned int count) { struct virtual_engine *ve; unsigned int n; int err; - if (count == 0) - return ERR_PTR(-EINVAL); - - if (count == 1) - return intel_context_create(siblings[0]); - ve = kzalloc(struct_size(ve, siblings, count), GFP_KERNEL); if (!ve) return ERR_PTR(-ENOMEM); @@ -3681,19 +3823,24 @@ intel_execlists_create_virtual(struct intel_engine_cs **siblings, snprintf(ve->base.name, sizeof(ve->base.name), "virtual"); - intel_engine_init_active(&ve->base, ENGINE_VIRTUAL); intel_engine_init_execlists(&ve->base); + ve->base.sched_engine = i915_sched_engine_create(ENGINE_VIRTUAL); + if (!ve->base.sched_engine) { + err = -ENOMEM; + goto err_put; + } + ve->base.sched_engine->private_data = &ve->base; + ve->base.cops = &virtual_context_ops; ve->base.request_alloc = execlists_request_alloc; - ve->base.schedule = i915_schedule; + ve->base.sched_engine->schedule = i915_schedule; + ve->base.sched_engine->kick_backend = kick_execlists; ve->base.submit_request = virtual_submit_request; - ve->base.bond_execute = virtual_bond_execute; INIT_LIST_HEAD(virtual_queue(ve)); - ve->base.execlists.queue_priority_hint = INT_MIN; - tasklet_setup(&ve->base.execlists.tasklet, virtual_submission_tasklet); + tasklet_setup(&ve->base.sched_engine->tasklet, virtual_submission_tasklet); intel_context_init(&ve->context, &ve->base); @@ -3721,7 +3868,7 @@ intel_execlists_create_virtual(struct intel_engine_cs **siblings, * layering if we handle cloning of the requests and * submitting a copy into each backend. */ - if (sibling->execlists.tasklet.callback != + if (sibling->sched_engine->tasklet.callback != execlists_submission_tasklet) { err = -ENODEV; goto err_put; @@ -3756,6 +3903,8 @@ intel_execlists_create_virtual(struct intel_engine_cs **siblings, "v%dx%d", ve->base.class, count); ve->base.context_size = sibling->context_size; + ve->base.add_active_request = sibling->add_active_request; + ve->base.remove_active_request = sibling->remove_active_request; ve->base.emit_bb_start = sibling->emit_bb_start; ve->base.emit_flush = sibling->emit_flush; ve->base.emit_init_breadcrumb = sibling->emit_init_breadcrumb; @@ -3776,70 +3925,6 @@ err_put: return ERR_PTR(err); } -struct intel_context * -intel_execlists_clone_virtual(struct intel_engine_cs *src) -{ - struct virtual_engine *se = to_virtual_engine(src); - struct intel_context *dst; - - dst = intel_execlists_create_virtual(se->siblings, - se->num_siblings); - if (IS_ERR(dst)) - return dst; - - if (se->num_bonds) { - struct virtual_engine *de = to_virtual_engine(dst->engine); - - de->bonds = kmemdup(se->bonds, - sizeof(*se->bonds) * se->num_bonds, - GFP_KERNEL); - if (!de->bonds) { - intel_context_put(dst); - return ERR_PTR(-ENOMEM); - } - - de->num_bonds = se->num_bonds; - } - - return dst; -} - -int intel_virtual_engine_attach_bond(struct intel_engine_cs *engine, - const struct intel_engine_cs *master, - const struct intel_engine_cs *sibling) -{ - struct virtual_engine *ve = to_virtual_engine(engine); - struct ve_bond *bond; - int n; - - /* Sanity check the sibling is part of the virtual engine */ - for (n = 0; n < ve->num_siblings; n++) - if (sibling == ve->siblings[n]) - break; - if (n == ve->num_siblings) - return -EINVAL; - - bond = virtual_find_bond(ve, master); - if (bond) { - bond->sibling_mask |= sibling->mask; - return 0; - } - - bond = krealloc(ve->bonds, - sizeof(*bond) * (ve->num_bonds + 1), - GFP_KERNEL); - if (!bond) - return -ENOMEM; - - bond[ve->num_bonds].master = master; - bond[ve->num_bonds].sibling_mask = sibling->mask; - - ve->bonds = bond; - ve->num_bonds++; - - return 0; -} - void intel_execlists_show_requests(struct intel_engine_cs *engine, struct drm_printer *m, void (*show_request)(struct drm_printer *m, @@ -3849,16 +3934,17 @@ void intel_execlists_show_requests(struct intel_engine_cs *engine, unsigned int max) { const struct intel_engine_execlists *execlists = &engine->execlists; + struct i915_sched_engine *sched_engine = engine->sched_engine; struct i915_request *rq, *last; unsigned long flags; unsigned int count; struct rb_node *rb; - spin_lock_irqsave(&engine->active.lock, flags); + spin_lock_irqsave(&sched_engine->lock, flags); last = NULL; count = 0; - list_for_each_entry(rq, &engine->active.requests, sched.link) { + list_for_each_entry(rq, &sched_engine->requests, sched.link) { if (count++ < max - 1) show_request(m, rq, "\t\t", 0); else @@ -3873,13 +3959,13 @@ void intel_execlists_show_requests(struct intel_engine_cs *engine, show_request(m, last, "\t\t", 0); } - if (execlists->queue_priority_hint != INT_MIN) + if (sched_engine->queue_priority_hint != INT_MIN) drm_printf(m, "\t\tQueue priority hint: %d\n", - READ_ONCE(execlists->queue_priority_hint)); + READ_ONCE(sched_engine->queue_priority_hint)); last = NULL; count = 0; - for (rb = rb_first_cached(&execlists->queue); rb; rb = rb_next(rb)) { + for (rb = rb_first_cached(&sched_engine->queue); rb; rb = rb_next(rb)) { struct i915_priolist *p = rb_entry(rb, typeof(*p), node); priolist_for_each_request(rq, p) { @@ -3921,7 +4007,7 @@ void intel_execlists_show_requests(struct intel_engine_cs *engine, show_request(m, last, "\t\t", 0); } - spin_unlock_irqrestore(&engine->active.lock, flags); + spin_unlock_irqrestore(&sched_engine->lock, flags); } #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.h b/drivers/gpu/drm/i915/gt/intel_execlists_submission.h index 4ca9b475e252..a1aa92c983a5 100644 --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.h +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.h @@ -32,15 +32,7 @@ void intel_execlists_show_requests(struct intel_engine_cs *engine, int indent), unsigned int max); -struct intel_context * -intel_execlists_create_virtual(struct intel_engine_cs **siblings, - unsigned int count); - -struct intel_context * -intel_execlists_clone_virtual(struct intel_engine_cs *src); - -int intel_virtual_engine_attach_bond(struct intel_engine_cs *engine, - const struct intel_engine_cs *master, - const struct intel_engine_cs *sibling); +bool +intel_engine_in_execlists_submission_mode(const struct intel_engine_cs *engine); #endif /* __INTEL_EXECLISTS_SUBMISSION_H__ */ diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c index 20e46b843324..de3ac58fceec 100644 --- a/drivers/gpu/drm/i915/gt/intel_ggtt.c +++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c @@ -826,13 +826,13 @@ static int ggtt_probe_common(struct i915_ggtt *ggtt, u64 size) phys_addr = pci_resource_start(pdev, 0) + pci_resource_len(pdev, 0) / 2; /* - * On BXT+/CNL+ writes larger than 64 bit to the GTT pagetable range + * On BXT+/ICL+ writes larger than 64 bit to the GTT pagetable range * will be dropped. For WC mappings in general we have 64 byte burst * writes when the WC buffer is flushed, so we can't use it, but have to * resort to an uncached mapping. The WC issue is easily caught by the * readback check when writing GTT PTE entries. */ - if (IS_GEN9_LP(i915) || GRAPHICS_VER(i915) >= 10) + if (IS_GEN9_LP(i915) || GRAPHICS_VER(i915) >= 11) ggtt->gsm = ioremap(phys_addr, size); else ggtt->gsm = ioremap_wc(phys_addr, size); @@ -1494,7 +1494,7 @@ intel_partial_pages(const struct i915_ggtt_view *view, if (ret) goto err_sg_alloc; - iter = i915_gem_object_get_sg_dma(obj, view->partial.offset, &offset, true); + iter = i915_gem_object_get_sg_dma(obj, view->partial.offset, &offset); GEM_BUG_ON(!iter); sg = st->sgl; diff --git a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h index 2694dbb9967e..1c3af0fc0456 100644 --- a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h +++ b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h @@ -123,8 +123,10 @@ #define MI_SEMAPHORE_SAD_NEQ_SDD (5 << 12) #define MI_SEMAPHORE_TOKEN_MASK REG_GENMASK(9, 5) #define MI_SEMAPHORE_TOKEN_SHIFT 5 +#define MI_STORE_DATA_IMM MI_INSTR(0x20, 0) #define MI_STORE_DWORD_IMM MI_INSTR(0x20, 1) #define MI_STORE_DWORD_IMM_GEN4 MI_INSTR(0x20, 2) +#define MI_STORE_QWORD_IMM_GEN8 (MI_INSTR(0x20, 3) | REG_BIT(21)) #define MI_MEM_VIRTUAL (1 << 22) /* 945,g33,965 */ #define MI_USE_GGTT (1 << 22) /* g4x+ */ #define MI_STORE_DWORD_INDEX MI_INSTR(0x21, 1) diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c index 2161bf01ef8b..62d40c986642 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.c +++ b/drivers/gpu/drm/i915/gt/intel_gt.c @@ -13,6 +13,7 @@ #include "intel_gt_clock_utils.h" #include "intel_gt_pm.h" #include "intel_gt_requests.h" +#include "intel_migrate.h" #include "intel_mocs.h" #include "intel_rc6.h" #include "intel_renderstate.h" @@ -40,8 +41,8 @@ void intel_gt_init_early(struct intel_gt *gt, struct drm_i915_private *i915) intel_gt_init_timelines(gt); intel_gt_pm_init_early(gt); - intel_rps_init_early(>->rps); intel_uc_init_early(>->uc); + intel_rps_init_early(>->rps); } int intel_gt_probe_lmem(struct intel_gt *gt) @@ -83,13 +84,73 @@ void intel_gt_init_hw_early(struct intel_gt *gt, struct i915_ggtt *ggtt) gt->ggtt = ggtt; } +static const struct intel_mmio_range icl_l3bank_steering_table[] = { + { 0x00B100, 0x00B3FF }, + {}, +}; + +static const struct intel_mmio_range xehpsdv_mslice_steering_table[] = { + { 0x004000, 0x004AFF }, + { 0x00C800, 0x00CFFF }, + { 0x00DD00, 0x00DDFF }, + { 0x00E900, 0x00FFFF }, /* 0xEA00 - OxEFFF is unused */ + {}, +}; + +static const struct intel_mmio_range xehpsdv_lncf_steering_table[] = { + { 0x00B000, 0x00B0FF }, + { 0x00D800, 0x00D8FF }, + {}, +}; + +static const struct intel_mmio_range dg2_lncf_steering_table[] = { + { 0x00B000, 0x00B0FF }, + { 0x00D880, 0x00D8FF }, + {}, +}; + +static u16 slicemask(struct intel_gt *gt, int count) +{ + u64 dss_mask = intel_sseu_get_subslices(>->info.sseu, 0); + + return intel_slicemask_from_dssmask(dss_mask, count); +} + int intel_gt_init_mmio(struct intel_gt *gt) { + struct drm_i915_private *i915 = gt->i915; + intel_gt_init_clock_frequency(gt); intel_uc_init_mmio(>->uc); intel_sseu_info_init(gt); + /* + * An mslice is unavailable only if both the meml3 for the slice is + * disabled *and* all of the DSS in the slice (quadrant) are disabled. + */ + if (HAS_MSLICES(i915)) + gt->info.mslice_mask = + slicemask(gt, GEN_DSS_PER_MSLICE) | + (intel_uncore_read(gt->uncore, GEN10_MIRROR_FUSE3) & + GEN12_MEML3_EN_MASK); + + if (IS_DG2(i915)) { + gt->steering_table[MSLICE] = xehpsdv_mslice_steering_table; + gt->steering_table[LNCF] = dg2_lncf_steering_table; + } else if (IS_XEHPSDV(i915)) { + gt->steering_table[MSLICE] = xehpsdv_mslice_steering_table; + gt->steering_table[LNCF] = xehpsdv_lncf_steering_table; + } else if (GRAPHICS_VER(i915) >= 11 && + GRAPHICS_VER_FULL(i915) < IP_VER(12, 50)) { + gt->steering_table[L3BANK] = icl_l3bank_steering_table; + gt->info.l3bank_mask = + ~intel_uncore_read(gt->uncore, GEN10_MIRROR_FUSE3) & + GEN10_L3BANK_MASK; + } else if (HAS_MSLICES(i915)) { + MISSING_CASE(INTEL_INFO(i915)->platform); + } + return intel_engines_init_mmio(gt); } @@ -192,7 +253,7 @@ static void clear_register(struct intel_uncore *uncore, i915_reg_t reg) intel_uncore_rmw(uncore, reg, 0, 0); } -static void gen8_clear_engine_error_register(struct intel_engine_cs *engine) +static void gen6_clear_engine_error_register(struct intel_engine_cs *engine) { GEN6_RING_FAULT_REG_RMW(engine, RING_FAULT_VALID, 0); GEN6_RING_FAULT_REG_POSTING_READ(engine); @@ -238,7 +299,7 @@ intel_gt_clear_error_registers(struct intel_gt *gt, enum intel_engine_id id; for_each_engine_masked(engine, gt, engine_mask, id) - gen8_clear_engine_error_register(engine); + gen6_clear_engine_error_register(engine); } } @@ -572,6 +633,25 @@ static void __intel_gt_disable(struct intel_gt *gt) GEM_BUG_ON(intel_gt_pm_is_awake(gt)); } +int intel_gt_wait_for_idle(struct intel_gt *gt, long timeout) +{ + long remaining_timeout; + + /* If the device is asleep, we have no requests outstanding */ + if (!intel_gt_pm_is_awake(gt)) + return 0; + + while ((timeout = intel_gt_retire_requests_timeout(gt, timeout, + &remaining_timeout)) > 0) { + cond_resched(); + if (signal_pending(current)) + return -EINTR; + } + + return timeout ? timeout : intel_uc_wait_for_idle(>->uc, + remaining_timeout); +} + int intel_gt_init(struct intel_gt *gt) { int err; @@ -622,10 +702,14 @@ int intel_gt_init(struct intel_gt *gt) if (err) goto err_gt; + intel_uc_init_late(>->uc); + err = i915_inject_probe_error(gt->i915, -EIO); if (err) goto err_gt; + intel_migrate_init(>->migrate, gt); + goto out_fw; err_gt: __intel_gt_disable(gt); @@ -649,6 +733,7 @@ void intel_gt_driver_remove(struct intel_gt *gt) { __intel_gt_disable(gt); + intel_migrate_fini(>->migrate); intel_uc_driver_remove(>->uc); intel_engines_release(gt); @@ -697,6 +782,112 @@ void intel_gt_driver_late_release(struct intel_gt *gt) intel_engines_free(gt); } +/** + * intel_gt_reg_needs_read_steering - determine whether a register read + * requires explicit steering + * @gt: GT structure + * @reg: the register to check steering requirements for + * @type: type of multicast steering to check + * + * Determines whether @reg needs explicit steering of a specific type for + * reads. + * + * Returns false if @reg does not belong to a register range of the given + * steering type, or if the default (subslice-based) steering IDs are suitable + * for @type steering too. + */ +static bool intel_gt_reg_needs_read_steering(struct intel_gt *gt, + i915_reg_t reg, + enum intel_steering_type type) +{ + const u32 offset = i915_mmio_reg_offset(reg); + const struct intel_mmio_range *entry; + + if (likely(!intel_gt_needs_read_steering(gt, type))) + return false; + + for (entry = gt->steering_table[type]; entry->end; entry++) { + if (offset >= entry->start && offset <= entry->end) + return true; + } + + return false; +} + +/** + * intel_gt_get_valid_steering - determines valid IDs for a class of MCR steering + * @gt: GT structure + * @type: multicast register type + * @sliceid: Slice ID returned + * @subsliceid: Subslice ID returned + * + * Determines sliceid and subsliceid values that will steer reads + * of a specific multicast register class to a valid value. + */ +static void intel_gt_get_valid_steering(struct intel_gt *gt, + enum intel_steering_type type, + u8 *sliceid, u8 *subsliceid) +{ + switch (type) { + case L3BANK: + GEM_DEBUG_WARN_ON(!gt->info.l3bank_mask); /* should be impossible! */ + + *sliceid = 0; /* unused */ + *subsliceid = __ffs(gt->info.l3bank_mask); + break; + case MSLICE: + GEM_DEBUG_WARN_ON(!gt->info.mslice_mask); /* should be impossible! */ + + *sliceid = __ffs(gt->info.mslice_mask); + *subsliceid = 0; /* unused */ + break; + case LNCF: + GEM_DEBUG_WARN_ON(!gt->info.mslice_mask); /* should be impossible! */ + + /* + * An LNCF is always present if its mslice is present, so we + * can safely just steer to LNCF 0 in all cases. + */ + *sliceid = __ffs(gt->info.mslice_mask) << 1; + *subsliceid = 0; /* unused */ + break; + default: + MISSING_CASE(type); + *sliceid = 0; + *subsliceid = 0; + } +} + +/** + * intel_gt_read_register_fw - reads a GT register with support for multicast + * @gt: GT structure + * @reg: register to read + * + * This function will read a GT register. If the register is a multicast + * register, the read will be steered to a valid instance (i.e., one that + * isn't fused off or powered down by power gating). + * + * Returns the value from a valid instance of @reg. + */ +u32 intel_gt_read_register_fw(struct intel_gt *gt, i915_reg_t reg) +{ + int type; + u8 sliceid, subsliceid; + + for (type = 0; type < NUM_STEERING_TYPES; type++) { + if (intel_gt_reg_needs_read_steering(gt, reg, type)) { + intel_gt_get_valid_steering(gt, type, &sliceid, + &subsliceid); + return intel_uncore_read_with_mcr_steering_fw(gt->uncore, + reg, + sliceid, + subsliceid); + } + } + + return intel_uncore_read_fw(gt->uncore, reg); +} + void intel_gt_info_print(const struct intel_gt_info *info, struct drm_printer *p) { diff --git a/drivers/gpu/drm/i915/gt/intel_gt.h b/drivers/gpu/drm/i915/gt/intel_gt.h index 7ec395cace69..74e771871a9b 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.h +++ b/drivers/gpu/drm/i915/gt/intel_gt.h @@ -48,6 +48,8 @@ void intel_gt_driver_release(struct intel_gt *gt); void intel_gt_driver_late_release(struct intel_gt *gt); +int intel_gt_wait_for_idle(struct intel_gt *gt, long timeout); + void intel_gt_check_and_clear_faults(struct intel_gt *gt); void intel_gt_clear_error_registers(struct intel_gt *gt, intel_engine_mask_t engine_mask); @@ -75,6 +77,14 @@ static inline bool intel_gt_is_wedged(const struct intel_gt *gt) return unlikely(test_bit(I915_WEDGED, >->reset.flags)); } +static inline bool intel_gt_needs_read_steering(struct intel_gt *gt, + enum intel_steering_type type) +{ + return gt->steering_table[type]; +} + +u32 intel_gt_read_register_fw(struct intel_gt *gt, i915_reg_t reg); + void intel_gt_info_print(const struct intel_gt_info *info, struct drm_printer *p); diff --git a/drivers/gpu/drm/i915/gt/intel_gt_clock_utils.c b/drivers/gpu/drm/i915/gt/intel_gt_clock_utils.c index 9f0e729d2d15..3513d6f90747 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_clock_utils.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_clock_utils.c @@ -24,8 +24,8 @@ static u32 read_reference_ts_freq(struct intel_uncore *uncore) return base_freq + frac_freq; } -static u32 gen10_get_crystal_clock_freq(struct intel_uncore *uncore, - u32 rpm_config_reg) +static u32 gen9_get_crystal_clock_freq(struct intel_uncore *uncore, + u32 rpm_config_reg) { u32 f19_2_mhz = 19200000; u32 f24_mhz = 24000000; @@ -128,10 +128,10 @@ static u32 read_clock_frequency(struct intel_uncore *uncore) } else { u32 c0 = intel_uncore_read(uncore, RPM_CONFIG0); - if (GRAPHICS_VER(uncore->i915) <= 10) - freq = gen10_get_crystal_clock_freq(uncore, c0); - else + if (GRAPHICS_VER(uncore->i915) >= 11) freq = gen11_get_crystal_clock_freq(uncore, c0); + else + freq = gen9_get_crystal_clock_freq(uncore, c0); /* * Now figure out how the command stream's timestamp diff --git a/drivers/gpu/drm/i915/gt/intel_gt_irq.c b/drivers/gpu/drm/i915/gt/intel_gt_irq.c index c13462274fe8..b2de83be4d97 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_irq.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_irq.c @@ -184,7 +184,13 @@ void gen11_gt_irq_reset(struct intel_gt *gt) intel_uncore_write(uncore, GEN11_BCS_RSVD_INTR_MASK, ~0); intel_uncore_write(uncore, GEN11_VCS0_VCS1_INTR_MASK, ~0); intel_uncore_write(uncore, GEN11_VCS2_VCS3_INTR_MASK, ~0); + if (HAS_ENGINE(gt, VCS4) || HAS_ENGINE(gt, VCS5)) + intel_uncore_write(uncore, GEN12_VCS4_VCS5_INTR_MASK, ~0); + if (HAS_ENGINE(gt, VCS6) || HAS_ENGINE(gt, VCS7)) + intel_uncore_write(uncore, GEN12_VCS6_VCS7_INTR_MASK, ~0); intel_uncore_write(uncore, GEN11_VECS0_VECS1_INTR_MASK, ~0); + if (HAS_ENGINE(gt, VECS2) || HAS_ENGINE(gt, VECS3)) + intel_uncore_write(uncore, GEN12_VECS2_VECS3_INTR_MASK, ~0); intel_uncore_write(uncore, GEN11_GPM_WGBOXPERF_INTR_ENABLE, 0); intel_uncore_write(uncore, GEN11_GPM_WGBOXPERF_INTR_MASK, ~0); @@ -218,8 +224,13 @@ void gen11_gt_irq_postinstall(struct intel_gt *gt) intel_uncore_write(uncore, GEN11_BCS_RSVD_INTR_MASK, ~smask); intel_uncore_write(uncore, GEN11_VCS0_VCS1_INTR_MASK, ~dmask); intel_uncore_write(uncore, GEN11_VCS2_VCS3_INTR_MASK, ~dmask); + if (HAS_ENGINE(gt, VCS4) || HAS_ENGINE(gt, VCS5)) + intel_uncore_write(uncore, GEN12_VCS4_VCS5_INTR_MASK, ~dmask); + if (HAS_ENGINE(gt, VCS6) || HAS_ENGINE(gt, VCS7)) + intel_uncore_write(uncore, GEN12_VCS6_VCS7_INTR_MASK, ~dmask); intel_uncore_write(uncore, GEN11_VECS0_VECS1_INTR_MASK, ~dmask); - + if (HAS_ENGINE(gt, VECS2) || HAS_ENGINE(gt, VECS3)) + intel_uncore_write(uncore, GEN12_VECS2_VECS3_INTR_MASK, ~dmask); /* * RPS interrupts will get enabled/disabled on demand when RPS itself * is enabled/disabled. diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.c b/drivers/gpu/drm/i915/gt/intel_gt_pm.c index aef3084e8b16..dea8e2479897 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_pm.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.c @@ -6,7 +6,6 @@ #include <linux/suspend.h> #include "i915_drv.h" -#include "i915_globals.h" #include "i915_params.h" #include "intel_context.h" #include "intel_engine_pm.h" @@ -67,8 +66,6 @@ static int __gt_unpark(struct intel_wakeref *wf) GT_TRACE(gt, "\n"); - i915_globals_unpark(); - /* * It seems that the DMC likes to transition between the DC states a lot * when there are no connected displays (no active power domains) during @@ -116,8 +113,6 @@ static int __gt_park(struct intel_wakeref *wf) GEM_BUG_ON(!wakeref); intel_display_power_put_async(i915, POWER_DOMAIN_GT_IRQ, wakeref); - i915_globals_park(); - return 0; } @@ -174,8 +169,6 @@ static void gt_sanitize(struct intel_gt *gt, bool force) if (intel_gt_is_wedged(gt)) intel_gt_unset_wedged(gt); - intel_uc_sanitize(>->uc); - for_each_engine(engine, gt, id) if (engine->reset.prepare) engine->reset.prepare(engine); @@ -191,6 +184,8 @@ static void gt_sanitize(struct intel_gt *gt, bool force) __intel_engine_reset(engine, false); } + intel_uc_reset(>->uc, false); + for_each_engine(engine, gt, id) if (engine->reset.finish) engine->reset.finish(engine); @@ -243,6 +238,8 @@ int intel_gt_resume(struct intel_gt *gt) goto err_wedged; } + intel_uc_reset_finish(>->uc); + intel_rps_enable(>->rps); intel_llc_enable(>->llc); diff --git a/drivers/gpu/drm/i915/gt/intel_gt_requests.c b/drivers/gpu/drm/i915/gt/intel_gt_requests.c index 647eca9d867a..edb881d75630 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_requests.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_requests.c @@ -130,7 +130,8 @@ void intel_engine_fini_retire(struct intel_engine_cs *engine) GEM_BUG_ON(engine->retire); } -long intel_gt_retire_requests_timeout(struct intel_gt *gt, long timeout) +long intel_gt_retire_requests_timeout(struct intel_gt *gt, long timeout, + long *remaining_timeout) { struct intel_gt_timelines *timelines = >->timelines; struct intel_timeline *tl, *tn; @@ -195,22 +196,10 @@ out_active: spin_lock(&timelines->lock); if (flush_submission(gt, timeout)) /* Wait, there's more! */ active_count++; - return active_count ? timeout : 0; -} - -int intel_gt_wait_for_idle(struct intel_gt *gt, long timeout) -{ - /* If the device is asleep, we have no requests outstanding */ - if (!intel_gt_pm_is_awake(gt)) - return 0; - - while ((timeout = intel_gt_retire_requests_timeout(gt, timeout)) > 0) { - cond_resched(); - if (signal_pending(current)) - return -EINTR; - } + if (remaining_timeout) + *remaining_timeout = timeout; - return timeout; + return active_count ? timeout : 0; } static void retire_work_handler(struct work_struct *work) diff --git a/drivers/gpu/drm/i915/gt/intel_gt_requests.h b/drivers/gpu/drm/i915/gt/intel_gt_requests.h index fcc30a6e4fe9..51dbe0e3294e 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_requests.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_requests.h @@ -6,14 +6,17 @@ #ifndef INTEL_GT_REQUESTS_H #define INTEL_GT_REQUESTS_H +#include <stddef.h> + struct intel_engine_cs; struct intel_gt; struct intel_timeline; -long intel_gt_retire_requests_timeout(struct intel_gt *gt, long timeout); +long intel_gt_retire_requests_timeout(struct intel_gt *gt, long timeout, + long *remaining_timeout); static inline void intel_gt_retire_requests(struct intel_gt *gt) { - intel_gt_retire_requests_timeout(gt, 0); + intel_gt_retire_requests_timeout(gt, 0, NULL); } void intel_engine_init_retire(struct intel_engine_cs *engine); @@ -21,8 +24,6 @@ void intel_engine_add_retire(struct intel_engine_cs *engine, struct intel_timeline *tl); void intel_engine_fini_retire(struct intel_engine_cs *engine); -int intel_gt_wait_for_idle(struct intel_gt *gt, long timeout); - void intel_gt_init_requests(struct intel_gt *gt); void intel_gt_park_requests(struct intel_gt *gt); void intel_gt_unpark_requests(struct intel_gt *gt); diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h b/drivers/gpu/drm/i915/gt/intel_gt_types.h index fecfacf551d5..a81e21bf1bd1 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_types.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h @@ -24,6 +24,7 @@ #include "intel_reset_types.h" #include "intel_rc6_types.h" #include "intel_rps_types.h" +#include "intel_migrate_types.h" #include "intel_wakeref.h" struct drm_i915_private; @@ -31,6 +32,33 @@ struct i915_ggtt; struct intel_engine_cs; struct intel_uncore; +struct intel_mmio_range { + u32 start; + u32 end; +}; + +/* + * The hardware has multiple kinds of multicast register ranges that need + * special register steering (and future platforms are expected to add + * additional types). + * + * During driver startup, we initialize the steering control register to + * direct reads to a slice/subslice that are valid for the 'subslice' class + * of multicast registers. If another type of steering does not have any + * overlap in valid steering targets with 'subslice' style registers, we will + * need to explicitly re-steer reads of registers of the other type. + * + * Only the replication types that may need additional non-default steering + * are listed here. + */ +enum intel_steering_type { + L3BANK, + MSLICE, + LNCF, + + NUM_STEERING_TYPES +}; + enum intel_submission_method { INTEL_SUBMISSION_RING, INTEL_SUBMISSION_ELSP, @@ -145,8 +173,15 @@ struct intel_gt { struct i915_vma *scratch; + struct intel_migrate migrate; + + const struct intel_mmio_range *steering_table[NUM_STEERING_TYPES]; + struct intel_gt_info { intel_engine_mask_t engine_mask; + + u32 l3bank_mask; + u8 num_engines; /* Media engine access to SFC per instance */ @@ -154,6 +189,8 @@ struct intel_gt { /* Slice/subslice/EU info */ struct sseu_dev_info sseu; + + unsigned long mslice_mask; } info; }; diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.c b/drivers/gpu/drm/i915/gt/intel_gtt.c index 084ea65d59c0..e137dd32b5b8 100644 --- a/drivers/gpu/drm/i915/gt/intel_gtt.c +++ b/drivers/gpu/drm/i915/gt/intel_gtt.c @@ -16,7 +16,19 @@ struct drm_i915_gem_object *alloc_pt_lmem(struct i915_address_space *vm, int sz) { struct drm_i915_gem_object *obj; - obj = i915_gem_object_create_lmem(vm->i915, sz, 0); + /* + * To avoid severe over-allocation when dealing with min_page_size + * restrictions, we override that behaviour here by allowing an object + * size and page layout which can be smaller. In practice this should be + * totally fine, since GTT paging structures are not typically inserted + * into the GTT. + * + * Note that we also hit this path for the scratch page, and for this + * case it might need to be 64K, but that should work fine here since we + * used the passed in size for the page size, which should ensure it + * also has the same alignment. + */ + obj = __i915_gem_object_create_lmem_with_ps(vm->i915, sz, sz, 0); /* * Ensure all paging structures for this vm share the same dma-resv * object underneath, with the idea that one object_lock() will lock @@ -414,7 +426,7 @@ static void tgl_setup_private_ppat(struct intel_uncore *uncore) intel_uncore_write(uncore, GEN12_PAT_INDEX(7), GEN8_PPAT_WB); } -static void cnl_setup_private_ppat(struct intel_uncore *uncore) +static void icl_setup_private_ppat(struct intel_uncore *uncore) { intel_uncore_write(uncore, GEN10_PAT_INDEX(0), @@ -514,8 +526,8 @@ void setup_private_pat(struct intel_uncore *uncore) if (GRAPHICS_VER(i915) >= 12) tgl_setup_private_ppat(uncore); - else if (GRAPHICS_VER(i915) >= 10) - cnl_setup_private_ppat(uncore); + else if (GRAPHICS_VER(i915) >= 11) + icl_setup_private_ppat(uncore); else if (IS_CHERRYVIEW(i915) || IS_GEN9_LP(i915)) chv_setup_private_ppat(uncore); else diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.h b/drivers/gpu/drm/i915/gt/intel_gtt.h index edea95b97c36..bc7153018ebd 100644 --- a/drivers/gpu/drm/i915/gt/intel_gtt.h +++ b/drivers/gpu/drm/i915/gt/intel_gtt.h @@ -140,7 +140,6 @@ typedef u64 gen8_pte_t; enum i915_cache_level; -struct drm_i915_file_private; struct drm_i915_gem_object; struct i915_fence_reg; struct i915_vma; @@ -220,16 +219,6 @@ struct i915_address_space { struct intel_gt *gt; struct drm_i915_private *i915; struct device *dma; - /* - * Every address space belongs to a struct file - except for the global - * GTT that is owned by the driver (and so @file is set to NULL). In - * principle, no information should leak from one context to another - * (or between files/processes etc) unless explicitly shared by the - * owner. Tracking the owner is important in order to free up per-file - * objects along with the file, to aide resource tracking, and to - * assign blame. - */ - struct drm_i915_file_private *file; u64 total; /* size addr space maps (ex. 2GB for ggtt) */ u64 reserved; /* size addr space reserved */ @@ -296,6 +285,13 @@ struct i915_address_space { u32 flags); void (*cleanup)(struct i915_address_space *vm); + void (*foreach)(struct i915_address_space *vm, + u64 start, u64 length, + void (*fn)(struct i915_address_space *vm, + struct i915_page_table *pt, + void *data), + void *data); + struct i915_vma_ops vma_ops; I915_SELFTEST_DECLARE(struct fault_attr fault_attr); diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index a27bac0a4bfb..bb4af4977920 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -70,7 +70,7 @@ static void set_offsets(u32 *regs, if (close) { /* Close the batch; used mainly by live_lrc_layout() */ *regs = MI_BATCH_BUFFER_END; - if (GRAPHICS_VER(engine->i915) >= 10) + if (GRAPHICS_VER(engine->i915) >= 11) *regs |= BIT(0); } } @@ -484,6 +484,47 @@ static const u8 gen12_rcs_offsets[] = { END }; +static const u8 xehp_rcs_offsets[] = { + NOP(1), + LRI(13, POSTED), + REG16(0x244), + REG(0x034), + REG(0x030), + REG(0x038), + REG(0x03c), + REG(0x168), + REG(0x140), + REG(0x110), + REG(0x1c0), + REG(0x1c4), + REG(0x1c8), + REG(0x180), + REG16(0x2b4), + + NOP(5), + LRI(9, POSTED), + REG16(0x3a8), + REG16(0x28c), + REG16(0x288), + REG16(0x284), + REG16(0x280), + REG16(0x27c), + REG16(0x278), + REG16(0x274), + REG16(0x270), + + LRI(3, POSTED), + REG(0x1b0), + REG16(0x5a8), + REG16(0x5ac), + + NOP(6), + LRI(1, 0), + REG(0x0c8), + + END +}; + #undef END #undef REG16 #undef REG @@ -502,7 +543,9 @@ static const u8 *reg_offsets(const struct intel_engine_cs *engine) !intel_engine_has_relative_mmio(engine)); if (engine->class == RENDER_CLASS) { - if (GRAPHICS_VER(engine->i915) >= 12) + if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 50)) + return xehp_rcs_offsets; + else if (GRAPHICS_VER(engine->i915) >= 12) return gen12_rcs_offsets; else if (GRAPHICS_VER(engine->i915) >= 11) return gen11_rcs_offsets; @@ -522,7 +565,9 @@ static const u8 *reg_offsets(const struct intel_engine_cs *engine) static int lrc_ring_mi_mode(const struct intel_engine_cs *engine) { - if (GRAPHICS_VER(engine->i915) >= 12) + if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 50)) + return 0x70; + else if (GRAPHICS_VER(engine->i915) >= 12) return 0x60; else if (GRAPHICS_VER(engine->i915) >= 9) return 0x54; @@ -534,7 +579,9 @@ static int lrc_ring_mi_mode(const struct intel_engine_cs *engine) static int lrc_ring_gpr0(const struct intel_engine_cs *engine) { - if (GRAPHICS_VER(engine->i915) >= 12) + if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 50)) + return 0x84; + else if (GRAPHICS_VER(engine->i915) >= 12) return 0x74; else if (GRAPHICS_VER(engine->i915) >= 9) return 0x68; @@ -578,10 +625,16 @@ static int lrc_ring_indirect_offset(const struct intel_engine_cs *engine) static int lrc_ring_cmd_buf_cctl(const struct intel_engine_cs *engine) { - if (engine->class != RENDER_CLASS) - return -1; - if (GRAPHICS_VER(engine->i915) >= 12) + if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 50)) + /* + * Note that the CSFE context has a dummy slot for CMD_BUF_CCTL + * simply to match the RCS context image layout. + */ + return 0xc6; + else if (engine->class != RENDER_CLASS) + return -1; + else if (GRAPHICS_VER(engine->i915) >= 12) return 0xb6; else if (GRAPHICS_VER(engine->i915) >= 11) return 0xaa; @@ -600,8 +653,6 @@ lrc_ring_indirect_offset_default(const struct intel_engine_cs *engine) return GEN12_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT; case 11: return GEN11_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT; - case 10: - return GEN10_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT; case 9: return GEN9_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT; case 8: @@ -845,7 +896,7 @@ int lrc_alloc(struct intel_context *ce, struct intel_engine_cs *engine) if (IS_ERR(vma)) return PTR_ERR(vma); - ring = intel_engine_create_ring(engine, (unsigned long)ce->ring); + ring = intel_engine_create_ring(engine, ce->ring_size); if (IS_ERR(ring)) { err = PTR_ERR(ring); goto err_vma; @@ -1101,6 +1152,14 @@ setup_indirect_ctx_bb(const struct intel_context *ce, * bits 55-60: SW counter * bits 61-63: engine class * + * On Xe_HP, the upper dword of the descriptor has a new format: + * + * bits 32-37: virtual function number + * bit 38: mbz, reserved for use by hardware + * bits 39-54: SW context ID + * bits 55-57: reserved + * bits 58-63: SW counter + * * engine info, SW context ID and SW counter need to form a unique number * (Context ID) per lrc. */ @@ -1387,40 +1446,6 @@ static u32 *gen9_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch) return batch; } -static u32 * -gen10_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch) -{ - int i; - - /* - * WaPipeControlBefore3DStateSamplePattern: cnl - * - * Ensure the engine is idle prior to programming a - * 3DSTATE_SAMPLE_PATTERN during a context restore. - */ - batch = gen8_emit_pipe_control(batch, - PIPE_CONTROL_CS_STALL, - 0); - /* - * WaPipeControlBefore3DStateSamplePattern says we need 4 dwords for - * the PIPE_CONTROL followed by 12 dwords of 0x0, so 16 dwords in - * total. However, a PIPE_CONTROL is 6 dwords long, not 4, which is - * confusing. Since gen8_emit_pipe_control() already advances the - * batch by 6 dwords, we advance the other 10 here, completing a - * cacheline. It's not clear if the workaround requires this padding - * before other commands, or if it's just the regular padding we would - * already have for the workaround bb, so leave it here for now. - */ - for (i = 0; i < 10; i++) - *batch++ = MI_NOOP; - - /* Pad to end of cacheline */ - while ((unsigned long)batch % CACHELINE_BYTES) - *batch++ = MI_NOOP; - - return batch; -} - #define CTX_WA_BB_SIZE (PAGE_SIZE) static int lrc_create_wa_ctx(struct intel_engine_cs *engine) @@ -1473,10 +1498,6 @@ void lrc_init_wa_ctx(struct intel_engine_cs *engine) case 12: case 11: return; - case 10: - wa_bb_fn[0] = gen10_init_indirectctx_bb; - wa_bb_fn[1] = NULL; - break; case 9: wa_bb_fn[0] = gen9_init_indirectctx_bb; wa_bb_fn[1] = NULL; diff --git a/drivers/gpu/drm/i915/gt/intel_lrc_reg.h b/drivers/gpu/drm/i915/gt/intel_lrc_reg.h index 41e5350a7a05..f785d0ed238f 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc_reg.h +++ b/drivers/gpu/drm/i915/gt/intel_lrc_reg.h @@ -87,9 +87,10 @@ #define GEN11_CSB_WRITE_PTR_MASK (GEN11_CSB_PTR_MASK << 0) #define MAX_CONTEXT_HW_ID (1 << 21) /* exclusive */ -#define MAX_GUC_CONTEXT_HW_ID (1 << 20) /* exclusive */ #define GEN11_MAX_CONTEXT_HW_ID (1 << 11) /* exclusive */ /* in Gen12 ID 0x7FF is reserved to indicate idle */ #define GEN12_MAX_CONTEXT_HW_ID (GEN11_MAX_CONTEXT_HW_ID - 1) +/* in Xe_HP ID 0xFFFF is reserved to indicate "invalid context" */ +#define XEHP_MAX_CONTEXT_HW_ID 0xFFFF #endif /* _INTEL_LRC_REG_H_ */ diff --git a/drivers/gpu/drm/i915/gt/intel_migrate.c b/drivers/gpu/drm/i915/gt/intel_migrate.c new file mode 100644 index 000000000000..1dac21aa7e5c --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_migrate.c @@ -0,0 +1,688 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2020 Intel Corporation + */ + +#include "i915_drv.h" +#include "intel_context.h" +#include "intel_gpu_commands.h" +#include "intel_gt.h" +#include "intel_gtt.h" +#include "intel_migrate.h" +#include "intel_ring.h" + +struct insert_pte_data { + u64 offset; + bool is_lmem; +}; + +#define CHUNK_SZ SZ_8M /* ~1ms at 8GiB/s preemption delay */ + +static bool engine_supports_migration(struct intel_engine_cs *engine) +{ + if (!engine) + return false; + + /* + * We need the ability to prevent aribtration (MI_ARB_ON_OFF), + * the ability to write PTE using inline data (MI_STORE_DATA) + * and of course the ability to do the block transfer (blits). + */ + GEM_BUG_ON(engine->class != COPY_ENGINE_CLASS); + + return true; +} + +static void insert_pte(struct i915_address_space *vm, + struct i915_page_table *pt, + void *data) +{ + struct insert_pte_data *d = data; + + vm->insert_page(vm, px_dma(pt), d->offset, I915_CACHE_NONE, + d->is_lmem ? PTE_LM : 0); + d->offset += PAGE_SIZE; +} + +static struct i915_address_space *migrate_vm(struct intel_gt *gt) +{ + struct i915_vm_pt_stash stash = {}; + struct i915_ppgtt *vm; + int err; + int i; + + /* + * We construct a very special VM for use by all migration contexts, + * it is kept pinned so that it can be used at any time. As we need + * to pre-allocate the page directories for the migration VM, this + * limits us to only using a small number of prepared vma. + * + * To be able to pipeline and reschedule migration operations while + * avoiding unnecessary contention on the vm itself, the PTE updates + * are inline with the blits. All the blits use the same fixed + * addresses, with the backing store redirection being updated on the + * fly. Only 2 implicit vma are used for all migration operations. + * + * We lay the ppGTT out as: + * + * [0, CHUNK_SZ) -> first object + * [CHUNK_SZ, 2 * CHUNK_SZ) -> second object + * [2 * CHUNK_SZ, 2 * CHUNK_SZ + 2 * CHUNK_SZ >> 9] -> PTE + * + * By exposing the dma addresses of the page directories themselves + * within the ppGTT, we are then able to rewrite the PTE prior to use. + * But the PTE update and subsequent migration operation must be atomic, + * i.e. within the same non-preemptible window so that we do not switch + * to another migration context that overwrites the PTE. + * + * TODO: Add support for huge LMEM PTEs + */ + + vm = i915_ppgtt_create(gt); + if (IS_ERR(vm)) + return ERR_CAST(vm); + + if (!vm->vm.allocate_va_range || !vm->vm.foreach) { + err = -ENODEV; + goto err_vm; + } + + /* + * Each engine instance is assigned its own chunk in the VM, so + * that we can run multiple instances concurrently + */ + for (i = 0; i < ARRAY_SIZE(gt->engine_class[COPY_ENGINE_CLASS]); i++) { + struct intel_engine_cs *engine; + u64 base = (u64)i << 32; + struct insert_pte_data d = {}; + struct i915_gem_ww_ctx ww; + u64 sz; + + engine = gt->engine_class[COPY_ENGINE_CLASS][i]; + if (!engine_supports_migration(engine)) + continue; + + /* + * We copy in 8MiB chunks. Each PDE covers 2MiB, so we need + * 4x2 page directories for source/destination. + */ + sz = 2 * CHUNK_SZ; + d.offset = base + sz; + + /* + * We need another page directory setup so that we can write + * the 8x512 PTE in each chunk. + */ + sz += (sz >> 12) * sizeof(u64); + + err = i915_vm_alloc_pt_stash(&vm->vm, &stash, sz); + if (err) + goto err_vm; + + for_i915_gem_ww(&ww, err, true) { + err = i915_vm_lock_objects(&vm->vm, &ww); + if (err) + continue; + err = i915_vm_map_pt_stash(&vm->vm, &stash); + if (err) + continue; + + vm->vm.allocate_va_range(&vm->vm, &stash, base, sz); + } + i915_vm_free_pt_stash(&vm->vm, &stash); + if (err) + goto err_vm; + + /* Now allow the GPU to rewrite the PTE via its own ppGTT */ + d.is_lmem = i915_gem_object_is_lmem(vm->vm.scratch[0]); + vm->vm.foreach(&vm->vm, base, base + sz, insert_pte, &d); + } + + return &vm->vm; + +err_vm: + i915_vm_put(&vm->vm); + return ERR_PTR(err); +} + +static struct intel_engine_cs *first_copy_engine(struct intel_gt *gt) +{ + struct intel_engine_cs *engine; + int i; + + for (i = 0; i < ARRAY_SIZE(gt->engine_class[COPY_ENGINE_CLASS]); i++) { + engine = gt->engine_class[COPY_ENGINE_CLASS][i]; + if (engine_supports_migration(engine)) + return engine; + } + + return NULL; +} + +static struct intel_context *pinned_context(struct intel_gt *gt) +{ + static struct lock_class_key key; + struct intel_engine_cs *engine; + struct i915_address_space *vm; + struct intel_context *ce; + + engine = first_copy_engine(gt); + if (!engine) + return ERR_PTR(-ENODEV); + + vm = migrate_vm(gt); + if (IS_ERR(vm)) + return ERR_CAST(vm); + + ce = intel_engine_create_pinned_context(engine, vm, SZ_512K, + I915_GEM_HWS_MIGRATE, + &key, "migrate"); + i915_vm_put(vm); + return ce; +} + +int intel_migrate_init(struct intel_migrate *m, struct intel_gt *gt) +{ + struct intel_context *ce; + + memset(m, 0, sizeof(*m)); + + ce = pinned_context(gt); + if (IS_ERR(ce)) + return PTR_ERR(ce); + + m->context = ce; + return 0; +} + +static int random_index(unsigned int max) +{ + return upper_32_bits(mul_u32_u32(get_random_u32(), max)); +} + +static struct intel_context *__migrate_engines(struct intel_gt *gt) +{ + struct intel_engine_cs *engines[MAX_ENGINE_INSTANCE]; + struct intel_engine_cs *engine; + unsigned int count, i; + + count = 0; + for (i = 0; i < ARRAY_SIZE(gt->engine_class[COPY_ENGINE_CLASS]); i++) { + engine = gt->engine_class[COPY_ENGINE_CLASS][i]; + if (engine_supports_migration(engine)) + engines[count++] = engine; + } + + return intel_context_create(engines[random_index(count)]); +} + +struct intel_context *intel_migrate_create_context(struct intel_migrate *m) +{ + struct intel_context *ce; + + /* + * We randomly distribute contexts across the engines upon constrction, + * as they all share the same pinned vm, and so in order to allow + * multiple blits to run in parallel, we must construct each blit + * to use a different range of the vm for its GTT. This has to be + * known at construction, so we can not use the late greedy load + * balancing of the virtual-engine. + */ + ce = __migrate_engines(m->context->engine->gt); + if (IS_ERR(ce)) + return ce; + + ce->ring = NULL; + ce->ring_size = SZ_256K; + + i915_vm_put(ce->vm); + ce->vm = i915_vm_get(m->context->vm); + + return ce; +} + +static inline struct sgt_dma sg_sgt(struct scatterlist *sg) +{ + dma_addr_t addr = sg_dma_address(sg); + + return (struct sgt_dma){ sg, addr, addr + sg_dma_len(sg) }; +} + +static int emit_no_arbitration(struct i915_request *rq) +{ + u32 *cs; + + cs = intel_ring_begin(rq, 2); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + /* Explicitly disable preemption for this request. */ + *cs++ = MI_ARB_ON_OFF; + *cs++ = MI_NOOP; + intel_ring_advance(rq, cs); + + return 0; +} + +static int emit_pte(struct i915_request *rq, + struct sgt_dma *it, + enum i915_cache_level cache_level, + bool is_lmem, + u64 offset, + int length) +{ + const u64 encode = rq->context->vm->pte_encode(0, cache_level, + is_lmem ? PTE_LM : 0); + struct intel_ring *ring = rq->ring; + int total = 0; + u32 *hdr, *cs; + int pkt; + + GEM_BUG_ON(GRAPHICS_VER(rq->engine->i915) < 8); + + /* Compute the page directory offset for the target address range */ + offset += (u64)rq->engine->instance << 32; + offset >>= 12; + offset *= sizeof(u64); + offset += 2 * CHUNK_SZ; + + cs = intel_ring_begin(rq, 6); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + /* Pack as many PTE updates as possible into a single MI command */ + pkt = min_t(int, 0x400, ring->space / sizeof(u32) + 5); + pkt = min_t(int, pkt, (ring->size - ring->emit) / sizeof(u32) + 5); + + hdr = cs; + *cs++ = MI_STORE_DATA_IMM | REG_BIT(21); /* as qword elements */ + *cs++ = lower_32_bits(offset); + *cs++ = upper_32_bits(offset); + + do { + if (cs - hdr >= pkt) { + *hdr += cs - hdr - 2; + *cs++ = MI_NOOP; + + ring->emit = (void *)cs - ring->vaddr; + intel_ring_advance(rq, cs); + intel_ring_update_space(ring); + + cs = intel_ring_begin(rq, 6); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + pkt = min_t(int, 0x400, ring->space / sizeof(u32) + 5); + pkt = min_t(int, pkt, (ring->size - ring->emit) / sizeof(u32) + 5); + + hdr = cs; + *cs++ = MI_STORE_DATA_IMM | REG_BIT(21); + *cs++ = lower_32_bits(offset); + *cs++ = upper_32_bits(offset); + } + + *cs++ = lower_32_bits(encode | it->dma); + *cs++ = upper_32_bits(encode | it->dma); + + offset += 8; + total += I915_GTT_PAGE_SIZE; + + it->dma += I915_GTT_PAGE_SIZE; + if (it->dma >= it->max) { + it->sg = __sg_next(it->sg); + if (!it->sg || sg_dma_len(it->sg) == 0) + break; + + it->dma = sg_dma_address(it->sg); + it->max = it->dma + sg_dma_len(it->sg); + } + } while (total < length); + + *hdr += cs - hdr - 2; + *cs++ = MI_NOOP; + + ring->emit = (void *)cs - ring->vaddr; + intel_ring_advance(rq, cs); + intel_ring_update_space(ring); + + return total; +} + +static bool wa_1209644611_applies(int ver, u32 size) +{ + u32 height = size >> PAGE_SHIFT; + + if (ver != 11) + return false; + + return height % 4 == 3 && height <= 8; +} + +static int emit_copy(struct i915_request *rq, int size) +{ + const int ver = GRAPHICS_VER(rq->engine->i915); + u32 instance = rq->engine->instance; + u32 *cs; + + cs = intel_ring_begin(rq, ver >= 8 ? 10 : 6); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + if (ver >= 9 && !wa_1209644611_applies(ver, size)) { + *cs++ = GEN9_XY_FAST_COPY_BLT_CMD | (10 - 2); + *cs++ = BLT_DEPTH_32 | PAGE_SIZE; + *cs++ = 0; + *cs++ = size >> PAGE_SHIFT << 16 | PAGE_SIZE / 4; + *cs++ = CHUNK_SZ; /* dst offset */ + *cs++ = instance; + *cs++ = 0; + *cs++ = PAGE_SIZE; + *cs++ = 0; /* src offset */ + *cs++ = instance; + } else if (ver >= 8) { + *cs++ = XY_SRC_COPY_BLT_CMD | BLT_WRITE_RGBA | (10 - 2); + *cs++ = BLT_DEPTH_32 | BLT_ROP_SRC_COPY | PAGE_SIZE; + *cs++ = 0; + *cs++ = size >> PAGE_SHIFT << 16 | PAGE_SIZE / 4; + *cs++ = CHUNK_SZ; /* dst offset */ + *cs++ = instance; + *cs++ = 0; + *cs++ = PAGE_SIZE; + *cs++ = 0; /* src offset */ + *cs++ = instance; + } else { + GEM_BUG_ON(instance); + *cs++ = SRC_COPY_BLT_CMD | BLT_WRITE_RGBA | (6 - 2); + *cs++ = BLT_DEPTH_32 | BLT_ROP_SRC_COPY | PAGE_SIZE; + *cs++ = size >> PAGE_SHIFT << 16 | PAGE_SIZE; + *cs++ = CHUNK_SZ; /* dst offset */ + *cs++ = PAGE_SIZE; + *cs++ = 0; /* src offset */ + } + + intel_ring_advance(rq, cs); + return 0; +} + +int +intel_context_migrate_copy(struct intel_context *ce, + struct dma_fence *await, + struct scatterlist *src, + enum i915_cache_level src_cache_level, + bool src_is_lmem, + struct scatterlist *dst, + enum i915_cache_level dst_cache_level, + bool dst_is_lmem, + struct i915_request **out) +{ + struct sgt_dma it_src = sg_sgt(src), it_dst = sg_sgt(dst); + struct i915_request *rq; + int err; + + GEM_BUG_ON(ce->vm != ce->engine->gt->migrate.context->vm); + *out = NULL; + + GEM_BUG_ON(ce->ring->size < SZ_64K); + + do { + int len; + + rq = i915_request_create(ce); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto out_ce; + } + + if (await) { + err = i915_request_await_dma_fence(rq, await); + if (err) + goto out_rq; + + if (rq->engine->emit_init_breadcrumb) { + err = rq->engine->emit_init_breadcrumb(rq); + if (err) + goto out_rq; + } + + await = NULL; + } + + /* The PTE updates + copy must not be interrupted. */ + err = emit_no_arbitration(rq); + if (err) + goto out_rq; + + len = emit_pte(rq, &it_src, src_cache_level, src_is_lmem, 0, + CHUNK_SZ); + if (len <= 0) { + err = len; + goto out_rq; + } + + err = emit_pte(rq, &it_dst, dst_cache_level, dst_is_lmem, + CHUNK_SZ, len); + if (err < 0) + goto out_rq; + if (err < len) { + err = -EINVAL; + goto out_rq; + } + + err = rq->engine->emit_flush(rq, EMIT_INVALIDATE); + if (err) + goto out_rq; + + err = emit_copy(rq, len); + + /* Arbitration is re-enabled between requests. */ +out_rq: + if (*out) + i915_request_put(*out); + *out = i915_request_get(rq); + i915_request_add(rq); + if (err || !it_src.sg || !sg_dma_len(it_src.sg)) + break; + + cond_resched(); + } while (1); + +out_ce: + return err; +} + +static int emit_clear(struct i915_request *rq, int size, u32 value) +{ + const int ver = GRAPHICS_VER(rq->engine->i915); + u32 instance = rq->engine->instance; + u32 *cs; + + GEM_BUG_ON(size >> PAGE_SHIFT > S16_MAX); + + cs = intel_ring_begin(rq, ver >= 8 ? 8 : 6); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + if (ver >= 8) { + *cs++ = XY_COLOR_BLT_CMD | BLT_WRITE_RGBA | (7 - 2); + *cs++ = BLT_DEPTH_32 | BLT_ROP_COLOR_COPY | PAGE_SIZE; + *cs++ = 0; + *cs++ = size >> PAGE_SHIFT << 16 | PAGE_SIZE / 4; + *cs++ = 0; /* offset */ + *cs++ = instance; + *cs++ = value; + *cs++ = MI_NOOP; + } else { + GEM_BUG_ON(instance); + *cs++ = XY_COLOR_BLT_CMD | BLT_WRITE_RGBA | (6 - 2); + *cs++ = BLT_DEPTH_32 | BLT_ROP_COLOR_COPY | PAGE_SIZE; + *cs++ = 0; + *cs++ = size >> PAGE_SHIFT << 16 | PAGE_SIZE / 4; + *cs++ = 0; + *cs++ = value; + } + + intel_ring_advance(rq, cs); + return 0; +} + +int +intel_context_migrate_clear(struct intel_context *ce, + struct dma_fence *await, + struct scatterlist *sg, + enum i915_cache_level cache_level, + bool is_lmem, + u32 value, + struct i915_request **out) +{ + struct sgt_dma it = sg_sgt(sg); + struct i915_request *rq; + int err; + + GEM_BUG_ON(ce->vm != ce->engine->gt->migrate.context->vm); + *out = NULL; + + GEM_BUG_ON(ce->ring->size < SZ_64K); + + do { + int len; + + rq = i915_request_create(ce); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto out_ce; + } + + if (await) { + err = i915_request_await_dma_fence(rq, await); + if (err) + goto out_rq; + + if (rq->engine->emit_init_breadcrumb) { + err = rq->engine->emit_init_breadcrumb(rq); + if (err) + goto out_rq; + } + + await = NULL; + } + + /* The PTE updates + clear must not be interrupted. */ + err = emit_no_arbitration(rq); + if (err) + goto out_rq; + + len = emit_pte(rq, &it, cache_level, is_lmem, 0, CHUNK_SZ); + if (len <= 0) { + err = len; + goto out_rq; + } + + err = rq->engine->emit_flush(rq, EMIT_INVALIDATE); + if (err) + goto out_rq; + + err = emit_clear(rq, len, value); + + /* Arbitration is re-enabled between requests. */ +out_rq: + if (*out) + i915_request_put(*out); + *out = i915_request_get(rq); + i915_request_add(rq); + if (err || !it.sg || !sg_dma_len(it.sg)) + break; + + cond_resched(); + } while (1); + +out_ce: + return err; +} + +int intel_migrate_copy(struct intel_migrate *m, + struct i915_gem_ww_ctx *ww, + struct dma_fence *await, + struct scatterlist *src, + enum i915_cache_level src_cache_level, + bool src_is_lmem, + struct scatterlist *dst, + enum i915_cache_level dst_cache_level, + bool dst_is_lmem, + struct i915_request **out) +{ + struct intel_context *ce; + int err; + + *out = NULL; + if (!m->context) + return -ENODEV; + + ce = intel_migrate_create_context(m); + if (IS_ERR(ce)) + ce = intel_context_get(m->context); + GEM_BUG_ON(IS_ERR(ce)); + + err = intel_context_pin_ww(ce, ww); + if (err) + goto out; + + err = intel_context_migrate_copy(ce, await, + src, src_cache_level, src_is_lmem, + dst, dst_cache_level, dst_is_lmem, + out); + + intel_context_unpin(ce); +out: + intel_context_put(ce); + return err; +} + +int +intel_migrate_clear(struct intel_migrate *m, + struct i915_gem_ww_ctx *ww, + struct dma_fence *await, + struct scatterlist *sg, + enum i915_cache_level cache_level, + bool is_lmem, + u32 value, + struct i915_request **out) +{ + struct intel_context *ce; + int err; + + *out = NULL; + if (!m->context) + return -ENODEV; + + ce = intel_migrate_create_context(m); + if (IS_ERR(ce)) + ce = intel_context_get(m->context); + GEM_BUG_ON(IS_ERR(ce)); + + err = intel_context_pin_ww(ce, ww); + if (err) + goto out; + + err = intel_context_migrate_clear(ce, await, sg, cache_level, + is_lmem, value, out); + + intel_context_unpin(ce); +out: + intel_context_put(ce); + return err; +} + +void intel_migrate_fini(struct intel_migrate *m) +{ + struct intel_context *ce; + + ce = fetch_and_zero(&m->context); + if (!ce) + return; + + intel_engine_destroy_pinned_context(ce); +} + +#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) +#include "selftest_migrate.c" +#endif diff --git a/drivers/gpu/drm/i915/gt/intel_migrate.h b/drivers/gpu/drm/i915/gt/intel_migrate.h new file mode 100644 index 000000000000..4e18e755a00b --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_migrate.h @@ -0,0 +1,65 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2020 Intel Corporation + */ + +#ifndef __INTEL_MIGRATE__ +#define __INTEL_MIGRATE__ + +#include <linux/types.h> + +#include "intel_migrate_types.h" + +struct dma_fence; +struct i915_request; +struct i915_gem_ww_ctx; +struct intel_gt; +struct scatterlist; +enum i915_cache_level; + +int intel_migrate_init(struct intel_migrate *m, struct intel_gt *gt); + +struct intel_context *intel_migrate_create_context(struct intel_migrate *m); + +int intel_migrate_copy(struct intel_migrate *m, + struct i915_gem_ww_ctx *ww, + struct dma_fence *await, + struct scatterlist *src, + enum i915_cache_level src_cache_level, + bool src_is_lmem, + struct scatterlist *dst, + enum i915_cache_level dst_cache_level, + bool dst_is_lmem, + struct i915_request **out); + +int intel_context_migrate_copy(struct intel_context *ce, + struct dma_fence *await, + struct scatterlist *src, + enum i915_cache_level src_cache_level, + bool src_is_lmem, + struct scatterlist *dst, + enum i915_cache_level dst_cache_level, + bool dst_is_lmem, + struct i915_request **out); + +int +intel_migrate_clear(struct intel_migrate *m, + struct i915_gem_ww_ctx *ww, + struct dma_fence *await, + struct scatterlist *sg, + enum i915_cache_level cache_level, + bool is_lmem, + u32 value, + struct i915_request **out); +int +intel_context_migrate_clear(struct intel_context *ce, + struct dma_fence *await, + struct scatterlist *sg, + enum i915_cache_level cache_level, + bool is_lmem, + u32 value, + struct i915_request **out); + +void intel_migrate_fini(struct intel_migrate *m); + +#endif /* __INTEL_MIGRATE__ */ diff --git a/drivers/gpu/drm/i915/gt/intel_migrate_types.h b/drivers/gpu/drm/i915/gt/intel_migrate_types.h new file mode 100644 index 000000000000..d98230597f42 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_migrate_types.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2020 Intel Corporation + */ + +#ifndef __INTEL_MIGRATE_TYPES__ +#define __INTEL_MIGRATE_TYPES__ + +struct intel_context; + +struct intel_migrate { + struct intel_context *context; +}; + +#endif /* __INTEL_MIGRATE_TYPES__ */ diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.c b/drivers/gpu/drm/i915/gt/intel_mocs.c index 17848807f111..582c4423b95d 100644 --- a/drivers/gpu/drm/i915/gt/intel_mocs.c +++ b/drivers/gpu/drm/i915/gt/intel_mocs.c @@ -352,7 +352,7 @@ static unsigned int get_mocs_settings(const struct drm_i915_private *i915, table->size = ARRAY_SIZE(icl_mocs_table); table->table = icl_mocs_table; table->n_entries = GEN9_NUM_MOCS_ENTRIES; - } else if (IS_GEN9_BC(i915) || IS_CANNONLAKE(i915)) { + } else if (IS_GEN9_BC(i915)) { table->size = ARRAY_SIZE(skl_mocs_table); table->n_entries = GEN9_NUM_MOCS_ENTRIES; table->table = skl_mocs_table; diff --git a/drivers/gpu/drm/i915/gt/intel_rc6.c b/drivers/gpu/drm/i915/gt/intel_rc6.c index 259d7eb4e165..799d382eea79 100644 --- a/drivers/gpu/drm/i915/gt/intel_rc6.c +++ b/drivers/gpu/drm/i915/gt/intel_rc6.c @@ -62,20 +62,25 @@ static void gen11_rc6_enable(struct intel_rc6 *rc6) u32 pg_enable; int i; - /* 2b: Program RC6 thresholds.*/ - set(uncore, GEN6_RC6_WAKE_RATE_LIMIT, 54 << 16 | 85); - set(uncore, GEN10_MEDIA_WAKE_RATE_LIMIT, 150); + /* + * With GuCRC, these parameters are set by GuC + */ + if (!intel_uc_uses_guc_rc(>->uc)) { + /* 2b: Program RC6 thresholds.*/ + set(uncore, GEN6_RC6_WAKE_RATE_LIMIT, 54 << 16 | 85); + set(uncore, GEN10_MEDIA_WAKE_RATE_LIMIT, 150); - set(uncore, GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */ - set(uncore, GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */ - for_each_engine(engine, rc6_to_gt(rc6), id) - set(uncore, RING_MAX_IDLE(engine->mmio_base), 10); + set(uncore, GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */ + set(uncore, GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */ + for_each_engine(engine, rc6_to_gt(rc6), id) + set(uncore, RING_MAX_IDLE(engine->mmio_base), 10); - set(uncore, GUC_MAX_IDLE_COUNT, 0xA); + set(uncore, GUC_MAX_IDLE_COUNT, 0xA); - set(uncore, GEN6_RC_SLEEP, 0); + set(uncore, GEN6_RC_SLEEP, 0); - set(uncore, GEN6_RC6_THRESHOLD, 50000); /* 50/125ms per EI */ + set(uncore, GEN6_RC6_THRESHOLD, 50000); /* 50/125ms per EI */ + } /* * 2c: Program Coarse Power Gating Policies. @@ -98,11 +103,19 @@ static void gen11_rc6_enable(struct intel_rc6 *rc6) set(uncore, GEN9_MEDIA_PG_IDLE_HYSTERESIS, 60); set(uncore, GEN9_RENDER_PG_IDLE_HYSTERESIS, 60); - /* 3a: Enable RC6 */ - rc6->ctl_enable = - GEN6_RC_CTL_HW_ENABLE | - GEN6_RC_CTL_RC6_ENABLE | - GEN6_RC_CTL_EI_MODE(1); + /* 3a: Enable RC6 + * + * With GuCRC, we do not enable bit 31 of RC_CTL, + * thus allowing GuC to control RC6 entry/exit fully instead. + * We will not set the HW ENABLE and EI bits + */ + if (!intel_guc_rc_enable(>->uc.guc)) + rc6->ctl_enable = GEN6_RC_CTL_RC6_ENABLE; + else + rc6->ctl_enable = + GEN6_RC_CTL_HW_ENABLE | + GEN6_RC_CTL_RC6_ENABLE | + GEN6_RC_CTL_EI_MODE(1); pg_enable = GEN9_RENDER_PG_ENABLE | @@ -126,7 +139,7 @@ static void gen9_rc6_enable(struct intel_rc6 *rc6) enum intel_engine_id id; /* 2b: Program RC6 thresholds.*/ - if (GRAPHICS_VER(rc6_to_i915(rc6)) >= 10) { + if (GRAPHICS_VER(rc6_to_i915(rc6)) >= 11) { set(uncore, GEN6_RC6_WAKE_RATE_LIMIT, 54 << 16 | 85); set(uncore, GEN10_MEDIA_WAKE_RATE_LIMIT, 150); } else if (IS_SKYLAKE(rc6_to_i915(rc6))) { @@ -513,6 +526,10 @@ static void __intel_rc6_disable(struct intel_rc6 *rc6) { struct drm_i915_private *i915 = rc6_to_i915(rc6); struct intel_uncore *uncore = rc6_to_uncore(rc6); + struct intel_gt *gt = rc6_to_gt(rc6); + + /* Take control of RC6 back from GuC */ + intel_guc_rc_disable(>->uc.guc); intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL); if (GRAPHICS_VER(i915) >= 9) diff --git a/drivers/gpu/drm/i915/gt/intel_region_lmem.c b/drivers/gpu/drm/i915/gt/intel_region_lmem.c index f7366b054f8e..a74b72f50cc9 100644 --- a/drivers/gpu/drm/i915/gt/intel_region_lmem.c +++ b/drivers/gpu/drm/i915/gt/intel_region_lmem.c @@ -9,7 +9,8 @@ #include "intel_region_ttm.h" #include "gem/i915_gem_lmem.h" #include "gem/i915_gem_region.h" -#include "intel_region_lmem.h" +#include "gem/i915_gem_ttm.h" +#include "gt/intel_gt.h" static int init_fake_lmem_bar(struct intel_memory_region *mem) { @@ -107,7 +108,7 @@ out_no_io: static const struct intel_memory_region_ops intel_region_lmem_ops = { .init = region_lmem_init, .release = region_lmem_release, - .init_object = __i915_gem_lmem_object_init, + .init_object = __i915_gem_ttm_object_init, }; struct intel_memory_region * @@ -157,7 +158,7 @@ intel_gt_setup_fake_lmem(struct intel_gt *gt) static bool get_legacy_lowmem_region(struct intel_uncore *uncore, u64 *start, u32 *size) { - if (!IS_DG1_REVID(uncore->i915, DG1_REVID_A0, DG1_REVID_B0)) + if (!IS_DG1_GT_STEP(uncore->i915, STEP_A0, STEP_C0)) return false; *start = 0; diff --git a/drivers/gpu/drm/i915/gt/intel_renderstate.h b/drivers/gpu/drm/i915/gt/intel_renderstate.h index 48f009203917..4da4c5234ef0 100644 --- a/drivers/gpu/drm/i915/gt/intel_renderstate.h +++ b/drivers/gpu/drm/i915/gt/intel_renderstate.h @@ -8,6 +8,7 @@ #include <linux/types.h> #include "i915_gem.h" +#include "i915_gem_ww.h" struct i915_request; struct intel_context; diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c index 72251638d4ea..91200c43951f 100644 --- a/drivers/gpu/drm/i915/gt/intel_reset.c +++ b/drivers/gpu/drm/i915/gt/intel_reset.c @@ -22,7 +22,6 @@ #include "intel_reset.h" #include "uc/intel_guc.h" -#include "uc/intel_guc_submission.h" #define RESET_MAX_RETRIES 3 @@ -39,21 +38,6 @@ static void rmw_clear_fw(struct intel_uncore *uncore, i915_reg_t reg, u32 clr) intel_uncore_rmw_fw(uncore, reg, clr, 0); } -static void skip_context(struct i915_request *rq) -{ - struct intel_context *hung_ctx = rq->context; - - list_for_each_entry_from_rcu(rq, &hung_ctx->timeline->requests, link) { - if (!i915_request_is_active(rq)) - return; - - if (rq->context == hung_ctx) { - i915_request_set_error_once(rq, -EIO); - __i915_request_skip(rq); - } - } -} - static void client_mark_guilty(struct i915_gem_context *ctx, bool banned) { struct drm_i915_file_private *file_priv = ctx->file_priv; @@ -88,10 +72,8 @@ static bool mark_guilty(struct i915_request *rq) bool banned; int i; - if (intel_context_is_closed(rq->context)) { - intel_context_set_banned(rq->context); + if (intel_context_is_closed(rq->context)) return true; - } rcu_read_lock(); ctx = rcu_dereference(rq->context->gem_context); @@ -123,11 +105,9 @@ static bool mark_guilty(struct i915_request *rq) banned = !i915_gem_context_is_recoverable(ctx); if (time_before(jiffies, prev_hang + CONTEXT_FAST_HANG_JIFFIES)) banned = true; - if (banned) { + if (banned) drm_dbg(&ctx->i915->drm, "context %s: guilty %d, banned\n", ctx->name, atomic_read(&ctx->guilty_count)); - intel_context_set_banned(rq->context); - } client_mark_guilty(ctx, banned); @@ -149,6 +129,8 @@ static void mark_innocent(struct i915_request *rq) void __i915_request_reset(struct i915_request *rq, bool guilty) { + bool banned = false; + RQ_TRACE(rq, "guilty? %s\n", yesno(guilty)); GEM_BUG_ON(__i915_request_is_complete(rq)); @@ -156,13 +138,15 @@ void __i915_request_reset(struct i915_request *rq, bool guilty) if (guilty) { i915_request_set_error_once(rq, -EIO); __i915_request_skip(rq); - if (mark_guilty(rq)) - skip_context(rq); + banned = mark_guilty(rq); } else { i915_request_set_error_once(rq, -EAGAIN); mark_innocent(rq); } rcu_read_unlock(); + + if (banned) + intel_context_ban(rq->context, rq); } static bool i915_in_reset(struct pci_dev *pdev) @@ -515,8 +499,14 @@ static int gen11_reset_engines(struct intel_gt *gt, [VCS1] = GEN11_GRDOM_MEDIA2, [VCS2] = GEN11_GRDOM_MEDIA3, [VCS3] = GEN11_GRDOM_MEDIA4, + [VCS4] = GEN11_GRDOM_MEDIA5, + [VCS5] = GEN11_GRDOM_MEDIA6, + [VCS6] = GEN11_GRDOM_MEDIA7, + [VCS7] = GEN11_GRDOM_MEDIA8, [VECS0] = GEN11_GRDOM_VECS, [VECS1] = GEN11_GRDOM_VECS2, + [VECS2] = GEN11_GRDOM_VECS3, + [VECS3] = GEN11_GRDOM_VECS4, }; struct intel_engine_cs *engine; intel_engine_mask_t tmp; @@ -826,6 +816,8 @@ static int gt_reset(struct intel_gt *gt, intel_engine_mask_t stalled_mask) __intel_engine_reset(engine, stalled_mask & engine->mask); local_bh_enable(); + intel_uc_reset(>->uc, true); + intel_ggtt_restore_fences(gt->ggtt); return err; @@ -850,6 +842,8 @@ static void reset_finish(struct intel_gt *gt, intel_engine_mask_t awake) if (awake & engine->mask) intel_engine_pm_put(engine); } + + intel_uc_reset_finish(>->uc); } static void nop_submit_request(struct i915_request *request) @@ -903,6 +897,7 @@ static void __intel_gt_set_wedged(struct intel_gt *gt) for_each_engine(engine, gt, id) if (engine->reset.cancel) engine->reset.cancel(engine); + intel_uc_cancel_requests(>->uc); local_bh_enable(); reset_finish(gt, awake); @@ -1191,6 +1186,9 @@ int __intel_engine_reset_bh(struct intel_engine_cs *engine, const char *msg) ENGINE_TRACE(engine, "flags=%lx\n", gt->reset.flags); GEM_BUG_ON(!test_bit(I915_RESET_ENGINE + engine->id, >->reset.flags)); + if (intel_engine_uses_guc(engine)) + return -ENODEV; + if (!intel_engine_pm_get_if_awake(engine)) return 0; @@ -1201,13 +1199,10 @@ int __intel_engine_reset_bh(struct intel_engine_cs *engine, const char *msg) "Resetting %s for %s\n", engine->name, msg); atomic_inc(&engine->i915->gpu_error.reset_engine_count[engine->uabi_class]); - if (intel_engine_uses_guc(engine)) - ret = intel_guc_reset_engine(&engine->gt->uc.guc, engine); - else - ret = intel_gt_reset_engine(engine); + ret = intel_gt_reset_engine(engine); if (ret) { /* If we fail here, we expect to fallback to a global reset */ - ENGINE_TRACE(engine, "Failed to reset, err: %d\n", ret); + ENGINE_TRACE(engine, "Failed to reset %s, err: %d\n", engine->name, ret); goto out; } @@ -1341,7 +1336,8 @@ void intel_gt_handle_error(struct intel_gt *gt, * Try engine reset when available. We fall back to full reset if * single reset fails. */ - if (intel_has_reset_engine(gt) && !intel_gt_is_wedged(gt)) { + if (!intel_uc_uses_guc_submission(>->uc) && + intel_has_reset_engine(gt) && !intel_gt_is_wedged(gt)) { local_bh_disable(); for_each_engine_masked(engine, gt, engine_mask, tmp) { BUILD_BUG_ON(I915_RESET_MODESET >= I915_RESET_ENGINE); diff --git a/drivers/gpu/drm/i915/gt/intel_ring.h b/drivers/gpu/drm/i915/gt/intel_ring.h index dbf5f14a136f..1b32dadfb8c3 100644 --- a/drivers/gpu/drm/i915/gt/intel_ring.h +++ b/drivers/gpu/drm/i915/gt/intel_ring.h @@ -49,6 +49,7 @@ static inline void intel_ring_advance(struct i915_request *rq, u32 *cs) * intel_ring_begin()). */ GEM_BUG_ON((rq->ring->vaddr + rq->ring->emit) != cs); + GEM_BUG_ON(!IS_ALIGNED(rq->ring->emit, 8)); /* RING_TAIL qword align */ } static inline u32 intel_ring_wrap(const struct intel_ring *ring, u32 pos) diff --git a/drivers/gpu/drm/i915/gt/intel_ring_submission.c b/drivers/gpu/drm/i915/gt/intel_ring_submission.c index 37d74d4ed59b..2958e2fae380 100644 --- a/drivers/gpu/drm/i915/gt/intel_ring_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_ring_submission.c @@ -16,6 +16,7 @@ #include "intel_reset.h" #include "intel_ring.h" #include "shmem_utils.h" +#include "intel_engine_heartbeat.h" /* Rough estimate of the typical request size, performing a flush, * set-context and then emitting the batch. @@ -342,9 +343,9 @@ static void reset_rewind(struct intel_engine_cs *engine, bool stalled) u32 head; rq = NULL; - spin_lock_irqsave(&engine->active.lock, flags); + spin_lock_irqsave(&engine->sched_engine->lock, flags); rcu_read_lock(); - list_for_each_entry(pos, &engine->active.requests, sched.link) { + list_for_each_entry(pos, &engine->sched_engine->requests, sched.link) { if (!__i915_request_is_complete(pos)) { rq = pos; break; @@ -399,7 +400,7 @@ static void reset_rewind(struct intel_engine_cs *engine, bool stalled) } engine->legacy.ring->head = intel_ring_wrap(engine->legacy.ring, head); - spin_unlock_irqrestore(&engine->active.lock, flags); + spin_unlock_irqrestore(&engine->sched_engine->lock, flags); } static void reset_finish(struct intel_engine_cs *engine) @@ -411,16 +412,16 @@ static void reset_cancel(struct intel_engine_cs *engine) struct i915_request *request; unsigned long flags; - spin_lock_irqsave(&engine->active.lock, flags); + spin_lock_irqsave(&engine->sched_engine->lock, flags); /* Mark all submitted requests as skipped. */ - list_for_each_entry(request, &engine->active.requests, sched.link) + list_for_each_entry(request, &engine->sched_engine->requests, sched.link) i915_request_put(i915_request_mark_eio(request)); intel_engine_signal_breadcrumbs(engine); /* Remaining _unready_ requests will be nop'ed when submitted */ - spin_unlock_irqrestore(&engine->active.lock, flags); + spin_unlock_irqrestore(&engine->sched_engine->lock, flags); } static void i9xx_submit_request(struct i915_request *request) @@ -586,9 +587,44 @@ static void ring_context_reset(struct intel_context *ce) clear_bit(CONTEXT_VALID_BIT, &ce->flags); } +static void ring_context_ban(struct intel_context *ce, + struct i915_request *rq) +{ + struct intel_engine_cs *engine; + + if (!rq || !i915_request_is_active(rq)) + return; + + engine = rq->engine; + lockdep_assert_held(&engine->sched_engine->lock); + list_for_each_entry_continue(rq, &engine->sched_engine->requests, + sched.link) + if (rq->context == ce) { + i915_request_set_error_once(rq, -EIO); + __i915_request_skip(rq); + } +} + +static void ring_context_cancel_request(struct intel_context *ce, + struct i915_request *rq) +{ + struct intel_engine_cs *engine = NULL; + + i915_request_active_engine(rq, &engine); + + if (engine && intel_engine_pulse(engine)) + intel_gt_handle_error(engine->gt, engine->mask, 0, + "request cancellation by %s", + current->comm); +} + static const struct intel_context_ops ring_context_ops = { .alloc = ring_context_alloc, + .cancel_request = ring_context_cancel_request, + + .ban = ring_context_ban, + .pre_pin = ring_context_pre_pin, .pin = ring_context_pin, .unpin = ring_context_unpin, @@ -1047,6 +1083,25 @@ static void setup_irq(struct intel_engine_cs *engine) } } +static void add_to_engine(struct i915_request *rq) +{ + lockdep_assert_held(&rq->engine->sched_engine->lock); + list_move_tail(&rq->sched.link, &rq->engine->sched_engine->requests); +} + +static void remove_from_engine(struct i915_request *rq) +{ + spin_lock_irq(&rq->engine->sched_engine->lock); + list_del_init(&rq->sched.link); + + /* Prevent further __await_execution() registering a cb, then flush */ + set_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags); + + spin_unlock_irq(&rq->engine->sched_engine->lock); + + i915_request_notify_execute_cb_imm(rq); +} + static void setup_common(struct intel_engine_cs *engine) { struct drm_i915_private *i915 = engine->i915; @@ -1064,6 +1119,9 @@ static void setup_common(struct intel_engine_cs *engine) engine->reset.cancel = reset_cancel; engine->reset.finish = reset_finish; + engine->add_active_request = add_to_engine; + engine->remove_active_request = remove_from_engine; + engine->cops = &ring_context_ops; engine->request_alloc = ring_request_alloc; diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c b/drivers/gpu/drm/i915/gt/intel_rps.c index 06e9a8ed4e03..d812b27835f8 100644 --- a/drivers/gpu/drm/i915/gt/intel_rps.c +++ b/drivers/gpu/drm/i915/gt/intel_rps.c @@ -37,6 +37,20 @@ static struct intel_uncore *rps_to_uncore(struct intel_rps *rps) return rps_to_gt(rps)->uncore; } +static struct intel_guc_slpc *rps_to_slpc(struct intel_rps *rps) +{ + struct intel_gt *gt = rps_to_gt(rps); + + return >->uc.guc.slpc; +} + +static bool rps_uses_slpc(struct intel_rps *rps) +{ + struct intel_gt *gt = rps_to_gt(rps); + + return intel_uc_uses_guc_slpc(>->uc); +} + static u32 rps_pm_sanitize_mask(struct intel_rps *rps, u32 mask) { return mask & ~rps->pm_intrmsk_mbz; @@ -167,6 +181,8 @@ static void rps_enable_interrupts(struct intel_rps *rps) { struct intel_gt *gt = rps_to_gt(rps); + GEM_BUG_ON(rps_uses_slpc(rps)); + GT_TRACE(gt, "interrupts:on rps->pm_events: %x, rps_pm_mask:%x\n", rps->pm_events, rps_pm_mask(rps, rps->last_freq)); @@ -771,6 +787,8 @@ static int gen6_rps_set(struct intel_rps *rps, u8 val) struct drm_i915_private *i915 = rps_to_i915(rps); u32 swreq; + GEM_BUG_ON(rps_uses_slpc(rps)); + if (GRAPHICS_VER(i915) >= 9) swreq = GEN9_FREQUENCY(val); else if (IS_HASWELL(i915) || IS_BROADWELL(i915)) @@ -861,6 +879,9 @@ void intel_rps_park(struct intel_rps *rps) { int adj; + if (!intel_rps_is_enabled(rps)) + return; + GEM_BUG_ON(atomic_read(&rps->num_waiters)); if (!intel_rps_clear_active(rps)) @@ -999,7 +1020,7 @@ static void gen6_rps_init(struct intel_rps *rps) rps->efficient_freq = rps->rp1_freq; if (IS_HASWELL(i915) || IS_BROADWELL(i915) || - IS_GEN9_BC(i915) || GRAPHICS_VER(i915) >= 10) { + IS_GEN9_BC(i915) || GRAPHICS_VER(i915) >= 11) { u32 ddcc_status = 0; if (sandybridge_pcode_read(i915, @@ -1012,7 +1033,7 @@ static void gen6_rps_init(struct intel_rps *rps) rps->max_freq); } - if (IS_GEN9_BC(i915) || GRAPHICS_VER(i915) >= 10) { + if (IS_GEN9_BC(i915) || GRAPHICS_VER(i915) >= 11) { /* Store the frequency values in 16.66 MHZ units, which is * the natural hardware unit for SKL */ @@ -1356,6 +1377,9 @@ void intel_rps_enable(struct intel_rps *rps) if (!HAS_RPS(i915)) return; + if (rps_uses_slpc(rps)) + return; + intel_gt_check_clock_frequency(rps_to_gt(rps)); intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL); @@ -1829,6 +1853,9 @@ void intel_rps_init(struct intel_rps *rps) { struct drm_i915_private *i915 = rps_to_i915(rps); + if (rps_uses_slpc(rps)) + return; + if (IS_CHERRYVIEW(i915)) chv_rps_init(rps); else if (IS_VALLEYVIEW(i915)) @@ -1877,10 +1904,17 @@ void intel_rps_init(struct intel_rps *rps) if (GRAPHICS_VER(i915) >= 8 && GRAPHICS_VER(i915) < 11) rps->pm_intrmsk_mbz |= GEN8_PMINTR_DISABLE_REDIRECT_TO_GUC; + + /* GuC needs ARAT expired interrupt unmasked */ + if (intel_uc_uses_guc_submission(&rps_to_gt(rps)->uc)) + rps->pm_intrmsk_mbz |= ARAT_EXPIRED_INTRMSK; } void intel_rps_sanitize(struct intel_rps *rps) { + if (rps_uses_slpc(rps)) + return; + if (GRAPHICS_VER(rps_to_i915(rps)) >= 6) rps_disable_interrupts(rps); } @@ -1936,6 +1970,176 @@ u32 intel_rps_read_actual_frequency(struct intel_rps *rps) return freq; } +u32 intel_rps_read_punit_req(struct intel_rps *rps) +{ + struct intel_uncore *uncore = rps_to_uncore(rps); + + return intel_uncore_read(uncore, GEN6_RPNSWREQ); +} + +static u32 intel_rps_get_req(u32 pureq) +{ + u32 req = pureq >> GEN9_SW_REQ_UNSLICE_RATIO_SHIFT; + + return req; +} + +u32 intel_rps_read_punit_req_frequency(struct intel_rps *rps) +{ + u32 freq = intel_rps_get_req(intel_rps_read_punit_req(rps)); + + return intel_gpu_freq(rps, freq); +} + +u32 intel_rps_get_requested_frequency(struct intel_rps *rps) +{ + if (rps_uses_slpc(rps)) + return intel_rps_read_punit_req_frequency(rps); + else + return intel_gpu_freq(rps, rps->cur_freq); +} + +u32 intel_rps_get_max_frequency(struct intel_rps *rps) +{ + struct intel_guc_slpc *slpc = rps_to_slpc(rps); + + if (rps_uses_slpc(rps)) + return slpc->max_freq_softlimit; + else + return intel_gpu_freq(rps, rps->max_freq_softlimit); +} + +u32 intel_rps_get_rp0_frequency(struct intel_rps *rps) +{ + struct intel_guc_slpc *slpc = rps_to_slpc(rps); + + if (rps_uses_slpc(rps)) + return slpc->rp0_freq; + else + return intel_gpu_freq(rps, rps->rp0_freq); +} + +u32 intel_rps_get_rp1_frequency(struct intel_rps *rps) +{ + struct intel_guc_slpc *slpc = rps_to_slpc(rps); + + if (rps_uses_slpc(rps)) + return slpc->rp1_freq; + else + return intel_gpu_freq(rps, rps->rp1_freq); +} + +u32 intel_rps_get_rpn_frequency(struct intel_rps *rps) +{ + struct intel_guc_slpc *slpc = rps_to_slpc(rps); + + if (rps_uses_slpc(rps)) + return slpc->min_freq; + else + return intel_gpu_freq(rps, rps->min_freq); +} + +static int set_max_freq(struct intel_rps *rps, u32 val) +{ + struct drm_i915_private *i915 = rps_to_i915(rps); + int ret = 0; + + mutex_lock(&rps->lock); + + val = intel_freq_opcode(rps, val); + if (val < rps->min_freq || + val > rps->max_freq || + val < rps->min_freq_softlimit) { + ret = -EINVAL; + goto unlock; + } + + if (val > rps->rp0_freq) + drm_dbg(&i915->drm, "User requested overclocking to %d\n", + intel_gpu_freq(rps, val)); + + rps->max_freq_softlimit = val; + + val = clamp_t(int, rps->cur_freq, + rps->min_freq_softlimit, + rps->max_freq_softlimit); + + /* + * We still need *_set_rps to process the new max_delay and + * update the interrupt limits and PMINTRMSK even though + * frequency request may be unchanged. + */ + intel_rps_set(rps, val); + +unlock: + mutex_unlock(&rps->lock); + + return ret; +} + +int intel_rps_set_max_frequency(struct intel_rps *rps, u32 val) +{ + struct intel_guc_slpc *slpc = rps_to_slpc(rps); + + if (rps_uses_slpc(rps)) + return intel_guc_slpc_set_max_freq(slpc, val); + else + return set_max_freq(rps, val); +} + +u32 intel_rps_get_min_frequency(struct intel_rps *rps) +{ + struct intel_guc_slpc *slpc = rps_to_slpc(rps); + + if (rps_uses_slpc(rps)) + return slpc->min_freq_softlimit; + else + return intel_gpu_freq(rps, rps->min_freq_softlimit); +} + +static int set_min_freq(struct intel_rps *rps, u32 val) +{ + int ret = 0; + + mutex_lock(&rps->lock); + + val = intel_freq_opcode(rps, val); + if (val < rps->min_freq || + val > rps->max_freq || + val > rps->max_freq_softlimit) { + ret = -EINVAL; + goto unlock; + } + + rps->min_freq_softlimit = val; + + val = clamp_t(int, rps->cur_freq, + rps->min_freq_softlimit, + rps->max_freq_softlimit); + + /* + * We still need *_set_rps to process the new min_delay and + * update the interrupt limits and PMINTRMSK even though + * frequency request may be unchanged. + */ + intel_rps_set(rps, val); + +unlock: + mutex_unlock(&rps->lock); + + return ret; +} + +int intel_rps_set_min_frequency(struct intel_rps *rps, u32 val) +{ + struct intel_guc_slpc *slpc = rps_to_slpc(rps); + + if (rps_uses_slpc(rps)) + return intel_guc_slpc_set_min_freq(slpc, val); + else + return set_min_freq(rps, val); +} + /* External interface for intel_ips.ko */ static struct drm_i915_private __rcu *ips_mchdev; @@ -2129,4 +2333,5 @@ EXPORT_SYMBOL_GPL(i915_gpu_turbo_disable); #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) #include "selftest_rps.c" +#include "selftest_slpc.c" #endif diff --git a/drivers/gpu/drm/i915/gt/intel_rps.h b/drivers/gpu/drm/i915/gt/intel_rps.h index 1d2cfc98b510..4213bcce1667 100644 --- a/drivers/gpu/drm/i915/gt/intel_rps.h +++ b/drivers/gpu/drm/i915/gt/intel_rps.h @@ -31,6 +31,16 @@ int intel_gpu_freq(struct intel_rps *rps, int val); int intel_freq_opcode(struct intel_rps *rps, int val); u32 intel_rps_get_cagf(struct intel_rps *rps, u32 rpstat1); u32 intel_rps_read_actual_frequency(struct intel_rps *rps); +u32 intel_rps_get_requested_frequency(struct intel_rps *rps); +u32 intel_rps_get_min_frequency(struct intel_rps *rps); +int intel_rps_set_min_frequency(struct intel_rps *rps, u32 val); +u32 intel_rps_get_max_frequency(struct intel_rps *rps); +int intel_rps_set_max_frequency(struct intel_rps *rps, u32 val); +u32 intel_rps_get_rp0_frequency(struct intel_rps *rps); +u32 intel_rps_get_rp1_frequency(struct intel_rps *rps); +u32 intel_rps_get_rpn_frequency(struct intel_rps *rps); +u32 intel_rps_read_punit_req(struct intel_rps *rps); +u32 intel_rps_read_punit_req_frequency(struct intel_rps *rps); void gen5_rps_irq_handler(struct intel_rps *rps); void gen6_rps_irq_handler(struct intel_rps *rps, u32 pm_iir); diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.c b/drivers/gpu/drm/i915/gt/intel_sseu.c index 367fd44b81c8..bbd272943c3f 100644 --- a/drivers/gpu/drm/i915/gt/intel_sseu.c +++ b/drivers/gpu/drm/i915/gt/intel_sseu.c @@ -139,17 +139,36 @@ static void gen12_sseu_info_init(struct intel_gt *gt) * Gen12 has Dual-Subslices, which behave similarly to 2 gen11 SS. * Instead of splitting these, provide userspace with an array * of DSS to more closely represent the hardware resource. + * + * In addition, the concept of slice has been removed in Xe_HP. + * To be compatible with prior generations, assume a single slice + * across the entire device. Then calculate out the DSS for each + * workload type within that software slice. */ - intel_sseu_set_info(sseu, 1, 6, 16); + if (IS_DG2(gt->i915) || IS_XEHPSDV(gt->i915)) + intel_sseu_set_info(sseu, 1, 32, 16); + else + intel_sseu_set_info(sseu, 1, 6, 16); - s_en = intel_uncore_read(uncore, GEN11_GT_SLICE_ENABLE) & - GEN11_GT_S_ENA_MASK; + /* + * As mentioned above, Xe_HP does not have the concept of a slice. + * Enable one for software backwards compatibility. + */ + if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 50)) + s_en = 0x1; + else + s_en = intel_uncore_read(uncore, GEN11_GT_SLICE_ENABLE) & + GEN11_GT_S_ENA_MASK; dss_en = intel_uncore_read(uncore, GEN12_GT_DSS_ENABLE); /* one bit per pair of EUs */ - eu_en_fuse = ~(intel_uncore_read(uncore, GEN11_EU_DISABLE) & - GEN11_EU_DIS_MASK); + if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 50)) + eu_en_fuse = intel_uncore_read(uncore, XEHP_EU_ENABLE) & XEHP_EU_ENA_MASK; + else + eu_en_fuse = ~(intel_uncore_read(uncore, GEN11_EU_DISABLE) & + GEN11_EU_DIS_MASK); + for (eu = 0; eu < sseu->max_eus_per_subslice / 2; eu++) if (eu_en_fuse & BIT(eu)) eu_en |= BIT(eu * 2) | BIT(eu * 2 + 1); @@ -188,83 +207,6 @@ static void gen11_sseu_info_init(struct intel_gt *gt) sseu->has_eu_pg = 1; } -static void gen10_sseu_info_init(struct intel_gt *gt) -{ - struct intel_uncore *uncore = gt->uncore; - struct sseu_dev_info *sseu = >->info.sseu; - const u32 fuse2 = intel_uncore_read(uncore, GEN8_FUSE2); - const int eu_mask = 0xff; - u32 subslice_mask, eu_en; - int s, ss; - - intel_sseu_set_info(sseu, 6, 4, 8); - - sseu->slice_mask = (fuse2 & GEN10_F2_S_ENA_MASK) >> - GEN10_F2_S_ENA_SHIFT; - - /* Slice0 */ - eu_en = ~intel_uncore_read(uncore, GEN8_EU_DISABLE0); - for (ss = 0; ss < sseu->max_subslices; ss++) - sseu_set_eus(sseu, 0, ss, (eu_en >> (8 * ss)) & eu_mask); - /* Slice1 */ - sseu_set_eus(sseu, 1, 0, (eu_en >> 24) & eu_mask); - eu_en = ~intel_uncore_read(uncore, GEN8_EU_DISABLE1); - sseu_set_eus(sseu, 1, 1, eu_en & eu_mask); - /* Slice2 */ - sseu_set_eus(sseu, 2, 0, (eu_en >> 8) & eu_mask); - sseu_set_eus(sseu, 2, 1, (eu_en >> 16) & eu_mask); - /* Slice3 */ - sseu_set_eus(sseu, 3, 0, (eu_en >> 24) & eu_mask); - eu_en = ~intel_uncore_read(uncore, GEN8_EU_DISABLE2); - sseu_set_eus(sseu, 3, 1, eu_en & eu_mask); - /* Slice4 */ - sseu_set_eus(sseu, 4, 0, (eu_en >> 8) & eu_mask); - sseu_set_eus(sseu, 4, 1, (eu_en >> 16) & eu_mask); - /* Slice5 */ - sseu_set_eus(sseu, 5, 0, (eu_en >> 24) & eu_mask); - eu_en = ~intel_uncore_read(uncore, GEN10_EU_DISABLE3); - sseu_set_eus(sseu, 5, 1, eu_en & eu_mask); - - subslice_mask = (1 << 4) - 1; - subslice_mask &= ~((fuse2 & GEN10_F2_SS_DIS_MASK) >> - GEN10_F2_SS_DIS_SHIFT); - - for (s = 0; s < sseu->max_slices; s++) { - u32 subslice_mask_with_eus = subslice_mask; - - for (ss = 0; ss < sseu->max_subslices; ss++) { - if (sseu_get_eus(sseu, s, ss) == 0) - subslice_mask_with_eus &= ~BIT(ss); - } - - /* - * Slice0 can have up to 3 subslices, but there are only 2 in - * slice1/2. - */ - intel_sseu_set_subslices(sseu, s, s == 0 ? - subslice_mask_with_eus : - subslice_mask_with_eus & 0x3); - } - - sseu->eu_total = compute_eu_total(sseu); - - /* - * CNL is expected to always have a uniform distribution - * of EU across subslices with the exception that any one - * EU in any one subslice may be fused off for die - * recovery. - */ - sseu->eu_per_subslice = - intel_sseu_subslice_total(sseu) ? - DIV_ROUND_UP(sseu->eu_total, intel_sseu_subslice_total(sseu)) : - 0; - - /* No restrictions on Power Gating */ - sseu->has_slice_pg = 1; - sseu->has_subslice_pg = 1; - sseu->has_eu_pg = 1; -} - static void cherryview_sseu_info_init(struct intel_gt *gt) { struct sseu_dev_info *sseu = >->info.sseu; @@ -592,8 +534,6 @@ void intel_sseu_info_init(struct intel_gt *gt) bdw_sseu_info_init(gt); else if (GRAPHICS_VER(i915) == 9) gen9_sseu_info_init(gt); - else if (GRAPHICS_VER(i915) == 10) - gen10_sseu_info_init(gt); else if (GRAPHICS_VER(i915) == 11) gen11_sseu_info_init(gt); else if (GRAPHICS_VER(i915) >= 12) @@ -759,3 +699,21 @@ void intel_sseu_print_topology(const struct sseu_dev_info *sseu, } } } + +u16 intel_slicemask_from_dssmask(u64 dss_mask, int dss_per_slice) +{ + u16 slice_mask = 0; + int i; + + WARN_ON(sizeof(dss_mask) * 8 / dss_per_slice > 8 * sizeof(slice_mask)); + + for (i = 0; dss_mask; i++) { + if (dss_mask & GENMASK(dss_per_slice - 1, 0)) + slice_mask |= BIT(i); + + dss_mask >>= dss_per_slice; + } + + return slice_mask; +} + diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.h b/drivers/gpu/drm/i915/gt/intel_sseu.h index 4cd1a8a7298a..22fef98887c0 100644 --- a/drivers/gpu/drm/i915/gt/intel_sseu.h +++ b/drivers/gpu/drm/i915/gt/intel_sseu.h @@ -15,13 +15,17 @@ struct drm_i915_private; struct intel_gt; struct drm_printer; -#define GEN_MAX_SLICES (6) /* CNL upper bound */ -#define GEN_MAX_SUBSLICES (8) /* ICL upper bound */ +#define GEN_MAX_SLICES (3) /* SKL upper bound */ +#define GEN_MAX_SUBSLICES (32) /* XEHPSDV upper bound */ #define GEN_SSEU_STRIDE(max_entries) DIV_ROUND_UP(max_entries, BITS_PER_BYTE) #define GEN_MAX_SUBSLICE_STRIDE GEN_SSEU_STRIDE(GEN_MAX_SUBSLICES) #define GEN_MAX_EUS (16) /* TGL upper bound */ #define GEN_MAX_EU_STRIDE GEN_SSEU_STRIDE(GEN_MAX_EUS) +#define GEN_DSS_PER_GSLICE 4 +#define GEN_DSS_PER_CSLICE 8 +#define GEN_DSS_PER_MSLICE 8 + struct sseu_dev_info { u8 slice_mask; u8 subslice_mask[GEN_MAX_SLICES * GEN_MAX_SUBSLICE_STRIDE]; @@ -104,4 +108,6 @@ void intel_sseu_dump(const struct sseu_dev_info *sseu, struct drm_printer *p); void intel_sseu_print_topology(const struct sseu_dev_info *sseu, struct drm_printer *p); +u16 intel_slicemask_from_dssmask(u64 dss_mask, int dss_per_slice); + #endif /* __INTEL_SSEU_H__ */ diff --git a/drivers/gpu/drm/i915/gt/intel_sseu_debugfs.c b/drivers/gpu/drm/i915/gt/intel_sseu_debugfs.c index 714fe8495775..1ba8b7da9d37 100644 --- a/drivers/gpu/drm/i915/gt/intel_sseu_debugfs.c +++ b/drivers/gpu/drm/i915/gt/intel_sseu_debugfs.c @@ -50,10 +50,10 @@ static void cherryview_sseu_device_status(struct intel_gt *gt, #undef SS_MAX } -static void gen10_sseu_device_status(struct intel_gt *gt, +static void gen11_sseu_device_status(struct intel_gt *gt, struct sseu_dev_info *sseu) { -#define SS_MAX 6 +#define SS_MAX 8 struct intel_uncore *uncore = gt->uncore; const struct intel_gt_info *info = >->info; u32 s_reg[SS_MAX], eu_reg[2 * SS_MAX], eu_mask[2]; @@ -267,8 +267,8 @@ int intel_sseu_status(struct seq_file *m, struct intel_gt *gt) bdw_sseu_device_status(gt, &sseu); else if (GRAPHICS_VER(i915) == 9) gen9_sseu_device_status(gt, &sseu); - else if (GRAPHICS_VER(i915) >= 10) - gen10_sseu_device_status(gt, &sseu); + else if (GRAPHICS_VER(i915) >= 11) + gen11_sseu_device_status(gt, &sseu); } i915_print_sseu_info(m, false, HAS_POOLED_EU(i915), &sseu); diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index b62d1e31a645..aae609d7d85d 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -150,13 +150,14 @@ static void _wa_add(struct i915_wa_list *wal, const struct i915_wa *wa) } static void wa_add(struct i915_wa_list *wal, i915_reg_t reg, - u32 clear, u32 set, u32 read_mask) + u32 clear, u32 set, u32 read_mask, bool masked_reg) { struct i915_wa wa = { .reg = reg, .clr = clear, .set = set, .read = read_mask, + .masked_reg = masked_reg, }; _wa_add(wal, &wa); @@ -165,7 +166,7 @@ static void wa_add(struct i915_wa_list *wal, i915_reg_t reg, static void wa_write_clr_set(struct i915_wa_list *wal, i915_reg_t reg, u32 clear, u32 set) { - wa_add(wal, reg, clear, set, clear); + wa_add(wal, reg, clear, set, clear, false); } static void @@ -200,20 +201,20 @@ wa_write_clr(struct i915_wa_list *wal, i915_reg_t reg, u32 clr) static void wa_masked_en(struct i915_wa_list *wal, i915_reg_t reg, u32 val) { - wa_add(wal, reg, 0, _MASKED_BIT_ENABLE(val), val); + wa_add(wal, reg, 0, _MASKED_BIT_ENABLE(val), val, true); } static void wa_masked_dis(struct i915_wa_list *wal, i915_reg_t reg, u32 val) { - wa_add(wal, reg, 0, _MASKED_BIT_DISABLE(val), val); + wa_add(wal, reg, 0, _MASKED_BIT_DISABLE(val), val, true); } static void wa_masked_field_set(struct i915_wa_list *wal, i915_reg_t reg, u32 mask, u32 val) { - wa_add(wal, reg, 0, _MASKED_FIELD(mask, val), mask); + wa_add(wal, reg, 0, _MASKED_FIELD(mask, val), mask, true); } static void gen6_ctx_workarounds_init(struct intel_engine_cs *engine, @@ -514,53 +515,15 @@ static void cfl_ctx_workarounds_init(struct intel_engine_cs *engine, GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE); } -static void cnl_ctx_workarounds_init(struct intel_engine_cs *engine, - struct i915_wa_list *wal) -{ - /* WaForceContextSaveRestoreNonCoherent:cnl */ - wa_masked_en(wal, CNL_HDC_CHICKEN0, - HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT); - - /* WaDisableReplayBufferBankArbitrationOptimization:cnl */ - wa_masked_en(wal, COMMON_SLICE_CHICKEN2, - GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION); - - /* WaPushConstantDereferenceHoldDisable:cnl */ - wa_masked_en(wal, GEN7_ROW_CHICKEN2, PUSH_CONSTANT_DEREF_DISABLE); - - /* FtrEnableFastAnisoL1BankingFix:cnl */ - wa_masked_en(wal, HALF_SLICE_CHICKEN3, CNL_FAST_ANISO_L1_BANKING_FIX); - - /* WaDisable3DMidCmdPreemption:cnl */ - wa_masked_dis(wal, GEN8_CS_CHICKEN1, GEN9_PREEMPT_3D_OBJECT_LEVEL); - - /* WaDisableGPGPUMidCmdPreemption:cnl */ - wa_masked_field_set(wal, GEN8_CS_CHICKEN1, - GEN9_PREEMPT_GPGPU_LEVEL_MASK, - GEN9_PREEMPT_GPGPU_COMMAND_LEVEL); - - /* WaDisableEarlyEOT:cnl */ - wa_masked_en(wal, GEN8_ROW_CHICKEN, DISABLE_EARLY_EOT); -} - static void icl_ctx_workarounds_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) { - struct drm_i915_private *i915 = engine->i915; - - /* WaDisableBankHangMode:icl */ + /* Wa_1406697149 (WaDisableBankHangMode:icl) */ wa_write(wal, GEN8_L3CNTLREG, intel_uncore_read(engine->uncore, GEN8_L3CNTLREG) | GEN8_ERRDETBCTRL); - /* Wa_1604370585:icl (pre-prod) - * Formerly known as WaPushConstantDereferenceHoldDisable - */ - if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_B0)) - wa_masked_en(wal, GEN7_ROW_CHICKEN2, - PUSH_CONSTANT_DEREF_DISABLE); - /* WaForceEnableNonCoherent:icl * This is not the same workaround as in early Gen9 platforms, where * lacking this could cause system hangs, but coherency performance @@ -570,23 +533,11 @@ static void icl_ctx_workarounds_init(struct intel_engine_cs *engine, */ wa_masked_en(wal, ICL_HDC_MODE, HDC_FORCE_NON_COHERENT); - /* Wa_2006611047:icl (pre-prod) - * Formerly known as WaDisableImprovedTdlClkGating - */ - if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_A0)) - wa_masked_en(wal, GEN7_ROW_CHICKEN2, - GEN11_TDL_CLOCK_GATING_FIX_DISABLE); - - /* Wa_2006665173:icl (pre-prod) */ - if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_A0)) - wa_masked_en(wal, GEN11_COMMON_SLICE_CHICKEN3, - GEN11_BLEND_EMB_FIX_DISABLE_IN_RCC); - /* WaEnableFloatBlendOptimization:icl */ - wa_write_clr_set(wal, - GEN10_CACHE_MODE_SS, - 0, /* write-only, so skip validation */ - _MASKED_BIT_ENABLE(FLOAT_BLEND_OPTIMIZATION_ENABLE)); + wa_add(wal, GEN10_CACHE_MODE_SS, 0, + _MASKED_BIT_ENABLE(FLOAT_BLEND_OPTIMIZATION_ENABLE), + 0 /* write-only, so skip validation */, + true); /* WaDisableGPGPUMidThreadPreemption:icl */ wa_masked_field_set(wal, GEN8_CS_CHICKEN1, @@ -631,7 +582,7 @@ static void gen12_ctx_gt_tuning_init(struct intel_engine_cs *engine, FF_MODE2, FF_MODE2_TDS_TIMER_MASK, FF_MODE2_TDS_TIMER_128, - 0); + 0, false); } static void gen12_ctx_workarounds_init(struct intel_engine_cs *engine, @@ -640,15 +591,16 @@ static void gen12_ctx_workarounds_init(struct intel_engine_cs *engine, gen12_ctx_gt_tuning_init(engine, wal); /* - * Wa_1409142259:tgl - * Wa_1409347922:tgl - * Wa_1409252684:tgl - * Wa_1409217633:tgl - * Wa_1409207793:tgl - * Wa_1409178076:tgl - * Wa_1408979724:tgl - * Wa_14010443199:rkl - * Wa_14010698770:rkl + * Wa_1409142259:tgl,dg1,adl-p + * Wa_1409347922:tgl,dg1,adl-p + * Wa_1409252684:tgl,dg1,adl-p + * Wa_1409217633:tgl,dg1,adl-p + * Wa_1409207793:tgl,dg1,adl-p + * Wa_1409178076:tgl,dg1,adl-p + * Wa_1408979724:tgl,dg1,adl-p + * Wa_14010443199:tgl,rkl,dg1,adl-p + * Wa_14010698770:tgl,rkl,dg1,adl-s,adl-p + * Wa_1409342910:tgl,rkl,dg1,adl-s,adl-p */ wa_masked_en(wal, GEN11_COMMON_SLICE_CHICKEN3, GEN12_DISABLE_CPS_AWARE_COLOR_PIPE); @@ -668,7 +620,14 @@ static void gen12_ctx_workarounds_init(struct intel_engine_cs *engine, FF_MODE2, FF_MODE2_GS_TIMER_MASK, FF_MODE2_GS_TIMER_224, - 0); + 0, false); + + /* + * Wa_14012131227:dg1 + * Wa_1508744258:tgl,rkl,dg1,adl-s,adl-p + */ + wa_masked_en(wal, GEN7_COMMON_SLICE_CHICKEN1, + GEN9_RHWO_OPTIMIZATION_DISABLE); } static void dg1_ctx_workarounds_init(struct intel_engine_cs *engine, @@ -703,8 +662,6 @@ __intel_engine_init_ctx_wa(struct intel_engine_cs *engine, gen12_ctx_workarounds_init(engine, wal); else if (GRAPHICS_VER(i915) == 11) icl_ctx_workarounds_init(engine, wal); - else if (IS_CANNONLAKE(i915)) - cnl_ctx_workarounds_init(engine, wal); else if (IS_COFFEELAKE(i915) || IS_COMETLAKE(i915)) cfl_ctx_workarounds_init(engine, wal); else if (IS_GEMINILAKE(i915)) @@ -839,7 +796,7 @@ hsw_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) wa_add(wal, HSW_ROW_CHICKEN3, 0, _MASKED_BIT_ENABLE(HSW_ROW_CHICKEN3_L3_GLOBAL_ATOMICS_DISABLE), - 0 /* XXX does this reg exist? */); + 0 /* XXX does this reg exist? */, true); /* WaVSRefCountFullforceMissDisable:hsw */ wa_write_clr(wal, GEN7_FF_THREAD_MODE, GEN7_FF_VS_REF_CNT_FFME); @@ -882,30 +839,19 @@ skl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE); /* WaInPlaceDecompressionHang:skl */ - if (IS_SKL_REVID(i915, SKL_REVID_H0, REVID_FOREVER)) + if (IS_SKL_GT_STEP(i915, STEP_A0, STEP_H0)) wa_write_or(wal, GEN9_GAMT_ECO_REG_RW_IA, GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS); } static void -bxt_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) -{ - gen9_gt_workarounds_init(i915, wal); - - /* WaInPlaceDecompressionHang:bxt */ - wa_write_or(wal, - GEN9_GAMT_ECO_REG_RW_IA, - GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS); -} - -static void kbl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) { gen9_gt_workarounds_init(i915, wal); /* WaDisableDynamicCreditSharing:kbl */ - if (IS_KBL_GT_STEP(i915, 0, STEP_B0)) + if (IS_KBL_GT_STEP(i915, 0, STEP_C0)) wa_write_or(wal, GAMT_CHKN_BIT_REG, GAMT_CHKN_DISABLE_DYNAMIC_CREDIT_SHARING); @@ -943,98 +889,144 @@ cfl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS); } +static void __set_mcr_steering(struct i915_wa_list *wal, + i915_reg_t steering_reg, + unsigned int slice, unsigned int subslice) +{ + u32 mcr, mcr_mask; + + mcr = GEN11_MCR_SLICE(slice) | GEN11_MCR_SUBSLICE(subslice); + mcr_mask = GEN11_MCR_SLICE_MASK | GEN11_MCR_SUBSLICE_MASK; + + wa_write_clr_set(wal, steering_reg, mcr_mask, mcr); +} + +static void __add_mcr_wa(struct drm_i915_private *i915, struct i915_wa_list *wal, + unsigned int slice, unsigned int subslice) +{ + drm_dbg(&i915->drm, "MCR slice=0x%x, subslice=0x%x\n", slice, subslice); + + __set_mcr_steering(wal, GEN8_MCR_SELECTOR, slice, subslice); +} + static void -wa_init_mcr(struct drm_i915_private *i915, struct i915_wa_list *wal) +icl_wa_init_mcr(struct drm_i915_private *i915, struct i915_wa_list *wal) { const struct sseu_dev_info *sseu = &i915->gt.info.sseu; unsigned int slice, subslice; - u32 l3_en, mcr, mcr_mask; - GEM_BUG_ON(GRAPHICS_VER(i915) < 10); + GEM_BUG_ON(GRAPHICS_VER(i915) < 11); + GEM_BUG_ON(hweight8(sseu->slice_mask) > 1); + slice = 0; /* - * WaProgramMgsrForL3BankSpecificMmioReads: cnl,icl - * L3Banks could be fused off in single slice scenario. If that is - * the case, we might need to program MCR select to a valid L3Bank - * by default, to make sure we correctly read certain registers - * later on (in the range 0xB100 - 0xB3FF). + * Although a platform may have subslices, we need to always steer + * reads to the lowest instance that isn't fused off. When Render + * Power Gating is enabled, grabbing forcewake will only power up a + * single subslice (the "minconfig") if there isn't a real workload + * that needs to be run; this means that if we steer register reads to + * one of the higher subslices, we run the risk of reading back 0's or + * random garbage. + */ + subslice = __ffs(intel_sseu_get_subslices(sseu, slice)); + + /* + * If the subslice we picked above also steers us to a valid L3 bank, + * then we can just rely on the default steering and won't need to + * worry about explicitly re-steering L3BANK reads later. + */ + if (i915->gt.info.l3bank_mask & BIT(subslice)) + i915->gt.steering_table[L3BANK] = NULL; + + __add_mcr_wa(i915, wal, slice, subslice); +} + +static void +xehp_init_mcr(struct intel_gt *gt, struct i915_wa_list *wal) +{ + struct drm_i915_private *i915 = gt->i915; + const struct sseu_dev_info *sseu = >->info.sseu; + unsigned long slice, subslice = 0, slice_mask = 0; + u64 dss_mask = 0; + u32 lncf_mask = 0; + int i; + + /* + * On Xe_HP the steering increases in complexity. There are now several + * more units that require steering and we're not guaranteed to be able + * to find a common setting for all of them. These are: + * - GSLICE (fusable) + * - DSS (sub-unit within gslice; fusable) + * - L3 Bank (fusable) + * - MSLICE (fusable) + * - LNCF (sub-unit within mslice; always present if mslice is present) * - * WaProgramMgsrForCorrectSliceSpecificMmioReads:cnl,icl - * Before any MMIO read into slice/subslice specific registers, MCR - * packet control register needs to be programmed to point to any - * enabled s/ss pair. Otherwise, incorrect values will be returned. - * This means each subsequent MMIO read will be forwarded to an - * specific s/ss combination, but this is OK since these registers - * are consistent across s/ss in almost all cases. In the rare - * occasions, such as INSTDONE, where this value is dependent - * on s/ss combo, the read should be done with read_subslice_reg. + * We'll do our default/implicit steering based on GSLICE (in the + * sliceid field) and DSS (in the subsliceid field). If we can + * find overlap between the valid MSLICE and/or LNCF values with + * a suitable GSLICE, then we can just re-use the default value and + * skip and explicit steering at runtime. * - * Since GEN8_MCR_SELECTOR contains dual-purpose bits which select both - * to which subslice, or to which L3 bank, the respective mmio reads - * will go, we have to find a common index which works for both - * accesses. + * We only need to look for overlap between GSLICE/MSLICE/LNCF to find + * a valid sliceid value. DSS steering is the only type of steering + * that utilizes the 'subsliceid' bits. * - * Case where we cannot find a common index fortunately should not - * happen in production hardware, so we only emit a warning instead of - * implementing something more complex that requires checking the range - * of every MMIO read. + * Also note that, even though the steering domain is called "GSlice" + * and it is encoded in the register using the gslice format, the spec + * says that the combined (geometry | compute) fuse should be used to + * select the steering. */ - if (GRAPHICS_VER(i915) >= 10 && is_power_of_2(sseu->slice_mask)) { - u32 l3_fuse = - intel_uncore_read(&i915->uncore, GEN10_MIRROR_FUSE3) & - GEN10_L3BANK_MASK; + /* Find the potential gslice candidates */ + dss_mask = intel_sseu_get_subslices(sseu, 0); + slice_mask = intel_slicemask_from_dssmask(dss_mask, GEN_DSS_PER_GSLICE); - drm_dbg(&i915->drm, "L3 fuse = %x\n", l3_fuse); - l3_en = ~(l3_fuse << GEN10_L3BANK_PAIR_COUNT | l3_fuse); - } else { - l3_en = ~0; - } + /* + * Find the potential LNCF candidates. Either LNCF within a valid + * mslice is fine. + */ + for_each_set_bit(i, >->info.mslice_mask, GEN12_MAX_MSLICES) + lncf_mask |= (0x3 << (i * 2)); - slice = fls(sseu->slice_mask) - 1; - subslice = fls(l3_en & intel_sseu_get_subslices(sseu, slice)); - if (!subslice) { - drm_warn(&i915->drm, - "No common index found between subslice mask %x and L3 bank mask %x!\n", - intel_sseu_get_subslices(sseu, slice), l3_en); - subslice = fls(l3_en); - drm_WARN_ON(&i915->drm, !subslice); - } - subslice--; - - if (GRAPHICS_VER(i915) >= 11) { - mcr = GEN11_MCR_SLICE(slice) | GEN11_MCR_SUBSLICE(subslice); - mcr_mask = GEN11_MCR_SLICE_MASK | GEN11_MCR_SUBSLICE_MASK; - } else { - mcr = GEN8_MCR_SLICE(slice) | GEN8_MCR_SUBSLICE(subslice); - mcr_mask = GEN8_MCR_SLICE_MASK | GEN8_MCR_SUBSLICE_MASK; + /* + * Are there any sliceid values that work for both GSLICE and LNCF + * steering? + */ + if (slice_mask & lncf_mask) { + slice_mask &= lncf_mask; + gt->steering_table[LNCF] = NULL; } - drm_dbg(&i915->drm, "MCR slice/subslice = %x\n", mcr); + /* How about sliceid values that also work for MSLICE steering? */ + if (slice_mask & gt->info.mslice_mask) { + slice_mask &= gt->info.mslice_mask; + gt->steering_table[MSLICE] = NULL; + } - wa_write_clr_set(wal, GEN8_MCR_SELECTOR, mcr_mask, mcr); -} + slice = __ffs(slice_mask); + subslice = __ffs(dss_mask >> (slice * GEN_DSS_PER_GSLICE)); + WARN_ON(subslice > GEN_DSS_PER_GSLICE); + WARN_ON(dss_mask >> (slice * GEN_DSS_PER_GSLICE) == 0); -static void -cnl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) -{ - wa_init_mcr(i915, wal); + __add_mcr_wa(i915, wal, slice, subslice); - /* WaInPlaceDecompressionHang:cnl */ - wa_write_or(wal, - GEN9_GAMT_ECO_REG_RW_IA, - GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS); + /* + * SQIDI ranges are special because they use different steering + * registers than everything else we work with. On XeHP SDV and + * DG2-G10, any value in the steering registers will work fine since + * all instances are present, but DG2-G11 only has SQIDI instances at + * ID's 2 and 3, so we need to steer to one of those. For simplicity + * we'll just steer to a hardcoded "2" since that value will work + * everywhere. + */ + __set_mcr_steering(wal, MCFG_MCR_SELECTOR, 0, 2); + __set_mcr_steering(wal, SF_MCR_SELECTOR, 0, 2); } static void icl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) { - wa_init_mcr(i915, wal); - - /* WaInPlaceDecompressionHang:icl */ - wa_write_or(wal, - GEN9_GAMT_ECO_REG_RW_IA, - GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS); + icl_wa_init_mcr(i915, wal); /* WaModifyGamTlbPartitioning:icl */ wa_write_clr_set(wal, @@ -1057,18 +1049,6 @@ icl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) GEN8_GAMW_ECO_DEV_RW_IA, GAMW_ECO_DEV_CTX_RELOAD_DISABLE); - /* Wa_1405779004:icl (pre-prod) */ - if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_A0)) - wa_write_or(wal, - SLICE_UNIT_LEVEL_CLKGATE, - MSCUNIT_CLKGATE_DIS); - - /* Wa_1406838659:icl (pre-prod) */ - if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_B0)) - wa_write_or(wal, - INF_UNIT_LEVEL_CLKGATE, - CGPSF_CLKGATE_DIS); - /* Wa_1406463099:icl * Formerly known as WaGamTlbPendError */ @@ -1078,10 +1058,16 @@ icl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) /* Wa_1607087056:icl,ehl,jsl */ if (IS_ICELAKE(i915) || - IS_JSL_EHL_REVID(i915, EHL_REVID_A0, EHL_REVID_A0)) + IS_JSL_EHL_GT_STEP(i915, STEP_A0, STEP_B0)) wa_write_or(wal, SLICE_UNIT_LEVEL_CLKGATE, L3_CLKGATE_DIS | L3_CR2X_CLKGATE_DIS); + + /* + * This is not a documented workaround, but rather an optimization + * to reduce sampler power. + */ + wa_write_clr(wal, GEN10_DFR_RATIO_EN_AND_CHICKEN, DFR_DISABLE); } /* @@ -1111,10 +1097,13 @@ static void gen12_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) { - wa_init_mcr(i915, wal); + icl_wa_init_mcr(i915, wal); - /* Wa_14011060649:tgl,rkl,dg1,adls */ + /* Wa_14011060649:tgl,rkl,dg1,adl-s,adl-p */ wa_14011060649(i915, wal); + + /* Wa_14011059788:tgl,rkl,adl-s,dg1,adl-p */ + wa_write_or(wal, GEN10_DFR_RATIO_EN_AND_CHICKEN, DFR_DISABLE); } static void @@ -1123,19 +1112,19 @@ tgl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) gen12_gt_workarounds_init(i915, wal); /* Wa_1409420604:tgl */ - if (IS_TGL_UY_GT_STEP(i915, STEP_A0, STEP_A0)) + if (IS_TGL_UY_GT_STEP(i915, STEP_A0, STEP_B0)) wa_write_or(wal, SUBSLICE_UNIT_LEVEL_CLKGATE2, CPSSUNIT_CLKGATE_DIS); /* Wa_1607087056:tgl also know as BUG:1409180338 */ - if (IS_TGL_UY_GT_STEP(i915, STEP_A0, STEP_A0)) + if (IS_TGL_UY_GT_STEP(i915, STEP_A0, STEP_B0)) wa_write_or(wal, SLICE_UNIT_LEVEL_CLKGATE, L3_CLKGATE_DIS | L3_CR2X_CLKGATE_DIS); /* Wa_1408615072:tgl[a0] */ - if (IS_TGL_UY_GT_STEP(i915, STEP_A0, STEP_A0)) + if (IS_TGL_UY_GT_STEP(i915, STEP_A0, STEP_B0)) wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE2, VSUNIT_CLKGATE_DIS_TGL); } @@ -1146,7 +1135,7 @@ dg1_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) gen12_gt_workarounds_init(i915, wal); /* Wa_1607087056:dg1 */ - if (IS_DG1_REVID(i915, DG1_REVID_A0, DG1_REVID_A0)) + if (IS_DG1_GT_STEP(i915, STEP_A0, STEP_B0)) wa_write_or(wal, SLICE_UNIT_LEVEL_CLKGATE, L3_CLKGATE_DIS | L3_CR2X_CLKGATE_DIS); @@ -1165,9 +1154,17 @@ dg1_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) } static void +xehpsdv_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) +{ + xehp_init_mcr(&i915->gt, wal); +} + +static void gt_init_workarounds(struct drm_i915_private *i915, struct i915_wa_list *wal) { - if (IS_DG1(i915)) + if (IS_XEHPSDV(i915)) + xehpsdv_gt_workarounds_init(i915, wal); + else if (IS_DG1(i915)) dg1_gt_workarounds_init(i915, wal); else if (IS_TIGERLAKE(i915)) tgl_gt_workarounds_init(i915, wal); @@ -1175,8 +1172,6 @@ gt_init_workarounds(struct drm_i915_private *i915, struct i915_wa_list *wal) gen12_gt_workarounds_init(i915, wal); else if (GRAPHICS_VER(i915) == 11) icl_gt_workarounds_init(i915, wal); - else if (IS_CANNONLAKE(i915)) - cnl_gt_workarounds_init(i915, wal); else if (IS_COFFEELAKE(i915) || IS_COMETLAKE(i915)) cfl_gt_workarounds_init(i915, wal); else if (IS_GEMINILAKE(i915)) @@ -1184,7 +1179,7 @@ gt_init_workarounds(struct drm_i915_private *i915, struct i915_wa_list *wal) else if (IS_KABYLAKE(i915)) kbl_gt_workarounds_init(i915, wal); else if (IS_BROXTON(i915)) - bxt_gt_workarounds_init(i915, wal); + gen9_gt_workarounds_init(i915, wal); else if (IS_SKYLAKE(i915)) skl_gt_workarounds_init(i915, wal); else if (IS_HASWELL(i915)) @@ -1247,8 +1242,9 @@ wa_verify(const struct i915_wa *wa, u32 cur, const char *name, const char *from) } static void -wa_list_apply(struct intel_uncore *uncore, const struct i915_wa_list *wal) +wa_list_apply(struct intel_gt *gt, const struct i915_wa_list *wal) { + struct intel_uncore *uncore = gt->uncore; enum forcewake_domains fw; unsigned long flags; struct i915_wa *wa; @@ -1263,13 +1259,16 @@ wa_list_apply(struct intel_uncore *uncore, const struct i915_wa_list *wal) intel_uncore_forcewake_get__locked(uncore, fw); for (i = 0, wa = wal->list; i < wal->count; i++, wa++) { - if (wa->clr) - intel_uncore_rmw_fw(uncore, wa->reg, wa->clr, wa->set); - else - intel_uncore_write_fw(uncore, wa->reg, wa->set); + u32 val, old = 0; + + /* open-coded rmw due to steering */ + old = wa->clr ? intel_gt_read_register_fw(gt, wa->reg) : 0; + val = (old & ~wa->clr) | wa->set; + if (val != old || !wa->clr) + intel_uncore_write_fw(uncore, wa->reg, val); + if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) - wa_verify(wa, - intel_uncore_read_fw(uncore, wa->reg), + wa_verify(wa, intel_gt_read_register_fw(gt, wa->reg), wal->name, "application"); } @@ -1279,28 +1278,39 @@ wa_list_apply(struct intel_uncore *uncore, const struct i915_wa_list *wal) void intel_gt_apply_workarounds(struct intel_gt *gt) { - wa_list_apply(gt->uncore, >->i915->gt_wa_list); + wa_list_apply(gt, >->i915->gt_wa_list); } -static bool wa_list_verify(struct intel_uncore *uncore, +static bool wa_list_verify(struct intel_gt *gt, const struct i915_wa_list *wal, const char *from) { + struct intel_uncore *uncore = gt->uncore; struct i915_wa *wa; + enum forcewake_domains fw; + unsigned long flags; unsigned int i; bool ok = true; + fw = wal_get_fw_for_rmw(uncore, wal); + + spin_lock_irqsave(&uncore->lock, flags); + intel_uncore_forcewake_get__locked(uncore, fw); + for (i = 0, wa = wal->list; i < wal->count; i++, wa++) ok &= wa_verify(wa, - intel_uncore_read(uncore, wa->reg), + intel_gt_read_register_fw(gt, wa->reg), wal->name, from); + intel_uncore_forcewake_put__locked(uncore, fw); + spin_unlock_irqrestore(&uncore->lock, flags); + return ok; } bool intel_gt_verify_workarounds(struct intel_gt *gt, const char *from) { - return wa_list_verify(gt->uncore, >->i915->gt_wa_list, from); + return wa_list_verify(gt, >->i915->gt_wa_list, from); } __maybe_unused @@ -1438,17 +1448,6 @@ static void cml_whitelist_build(struct intel_engine_cs *engine) cfl_whitelist_build(engine); } -static void cnl_whitelist_build(struct intel_engine_cs *engine) -{ - struct i915_wa_list *w = &engine->whitelist; - - if (engine->class != RENDER_CLASS) - return; - - /* WaEnablePreemptionGranularityControlByUMD:cnl */ - whitelist_reg(w, GEN8_CS_CHICKEN1); -} - static void icl_whitelist_build(struct intel_engine_cs *engine) { struct i915_wa_list *w = &engine->whitelist; @@ -1542,7 +1541,7 @@ static void dg1_whitelist_build(struct intel_engine_cs *engine) tgl_whitelist_build(engine); /* GEN:BUG:1409280441:dg1 */ - if (IS_DG1_REVID(engine->i915, DG1_REVID_A0, DG1_REVID_A0) && + if (IS_DG1_GT_STEP(engine->i915, STEP_A0, STEP_B0) && (engine->class == RENDER_CLASS || engine->class == COPY_ENGINE_CLASS)) whitelist_reg_ext(w, RING_ID(engine->mmio_base), @@ -1562,8 +1561,6 @@ void intel_engine_init_whitelist(struct intel_engine_cs *engine) tgl_whitelist_build(engine); else if (GRAPHICS_VER(i915) == 11) icl_whitelist_build(engine); - else if (IS_CANNONLAKE(i915)) - cnl_whitelist_build(engine); else if (IS_COMETLAKE(i915)) cml_whitelist_build(engine); else if (IS_COFFEELAKE(i915)) @@ -1612,8 +1609,8 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) { struct drm_i915_private *i915 = engine->i915; - if (IS_DG1_REVID(i915, DG1_REVID_A0, DG1_REVID_A0) || - IS_TGL_UY_GT_STEP(i915, STEP_A0, STEP_A0)) { + if (IS_DG1_GT_STEP(i915, STEP_A0, STEP_B0) || + IS_TGL_UY_GT_STEP(i915, STEP_A0, STEP_B0)) { /* * Wa_1607138336:tgl[a0],dg1[a0] * Wa_1607063988:tgl[a0],dg1[a0] @@ -1623,7 +1620,7 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) GEN12_DISABLE_POSH_BUSY_FF_DOP_CG); } - if (IS_TGL_UY_GT_STEP(i915, STEP_A0, STEP_A0)) { + if (IS_TGL_UY_GT_STEP(i915, STEP_A0, STEP_B0)) { /* * Wa_1606679103:tgl * (see also Wa_1606682166:icl) @@ -1633,44 +1630,46 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) GEN7_DISABLE_SAMPLER_PREFETCH); } - if (IS_ALDERLAKE_S(i915) || IS_DG1(i915) || + if (IS_ALDERLAKE_P(i915) || IS_ALDERLAKE_S(i915) || IS_DG1(i915) || IS_ROCKETLAKE(i915) || IS_TIGERLAKE(i915)) { - /* Wa_1606931601:tgl,rkl,dg1,adl-s */ + /* Wa_1606931601:tgl,rkl,dg1,adl-s,adl-p */ wa_masked_en(wal, GEN7_ROW_CHICKEN2, GEN12_DISABLE_EARLY_READ); /* * Wa_1407928979:tgl A* * Wa_18011464164:tgl[B0+],dg1[B0+] * Wa_22010931296:tgl[B0+],dg1[B0+] - * Wa_14010919138:rkl,dg1,adl-s + * Wa_14010919138:rkl,dg1,adl-s,adl-p */ wa_write_or(wal, GEN7_FF_THREAD_MODE, GEN12_FF_TESSELATION_DOP_GATE_DISABLE); /* - * Wa_1606700617:tgl,dg1 - * Wa_22010271021:tgl,rkl,dg1, adl-s + * Wa_1606700617:tgl,dg1,adl-p + * Wa_22010271021:tgl,rkl,dg1,adl-s,adl-p + * Wa_14010826681:tgl,dg1,rkl,adl-p */ wa_masked_en(wal, GEN9_CS_DEBUG_MODE1, FF_DOP_CLOCK_GATE_DISABLE); } - if (IS_ALDERLAKE_S(i915) || IS_DG1_REVID(i915, DG1_REVID_A0, DG1_REVID_A0) || + if (IS_ALDERLAKE_P(i915) || IS_ALDERLAKE_S(i915) || + IS_DG1_GT_STEP(i915, STEP_A0, STEP_B0) || IS_ROCKETLAKE(i915) || IS_TIGERLAKE(i915)) { - /* Wa_1409804808:tgl,rkl,dg1[a0],adl-s */ + /* Wa_1409804808:tgl,rkl,dg1[a0],adl-s,adl-p */ wa_masked_en(wal, GEN7_ROW_CHICKEN2, GEN12_PUSH_CONST_DEREF_HOLD_DIS); /* * Wa_1409085225:tgl - * Wa_14010229206:tgl,rkl,dg1[a0],adl-s + * Wa_14010229206:tgl,rkl,dg1[a0],adl-s,adl-p */ wa_masked_en(wal, GEN9_ROW_CHICKEN4, GEN12_DISABLE_TDL_PUSH); } - if (IS_DG1_REVID(i915, DG1_REVID_A0, DG1_REVID_A0) || + if (IS_DG1_GT_STEP(i915, STEP_A0, STEP_B0) || IS_ROCKETLAKE(i915) || IS_TIGERLAKE(i915)) { /* * Wa_1607030317:tgl @@ -1688,8 +1687,9 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) GEN8_RC_SEMA_IDLE_MSG_DISABLE); } - if (IS_DG1(i915) || IS_ROCKETLAKE(i915) || IS_TIGERLAKE(i915)) { - /* Wa_1406941453:tgl,rkl,dg1 */ + if (IS_DG1(i915) || IS_ROCKETLAKE(i915) || IS_TIGERLAKE(i915) || + IS_ALDERLAKE_S(i915) || IS_ALDERLAKE_P(i915)) { + /* Wa_1406941453:tgl,rkl,dg1,adl-s,adl-p */ wa_masked_en(wal, GEN10_SAMPLER_MODE, ENABLE_SMALLPL); @@ -1701,11 +1701,6 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) _3D_CHICKEN3, _3D_CHICKEN3_AA_LINE_QUALITY_FIX_ENABLE); - /* WaPipelineFlushCoherentLines:icl */ - wa_write_or(wal, - GEN8_L3SQCREG4, - GEN8_LQSC_FLUSH_COHERENT_LINES); - /* * Wa_1405543622:icl * Formerly known as WaGAPZPriorityScheme @@ -1735,19 +1730,6 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) GEN8_L3SQCREG4, GEN11_LQSC_CLEAN_EVICT_DISABLE); - /* WaForwardProgressSoftReset:icl */ - wa_write_or(wal, - GEN10_SCRATCH_LNCF2, - PMFLUSHDONE_LNICRSDROP | - PMFLUSH_GAPL3UNBLOCK | - PMFLUSHDONE_LNEBLK); - - /* Wa_1406609255:icl (pre-prod) */ - if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_B0)) - wa_write_or(wal, - GEN7_SARCHKMD, - GEN7_DISABLE_DEMAND_PREFETCH); - /* Wa_1606682166:icl */ wa_write_or(wal, GEN7_SARCHKMD, @@ -1947,10 +1929,10 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) * disable bit, which we don't touch here, but it's good * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM). */ - wa_add(wal, GEN7_GT_MODE, 0, - _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, - GEN6_WIZ_HASHING_16x4), - GEN6_WIZ_HASHING_16x4); + wa_masked_field_set(wal, + GEN7_GT_MODE, + GEN6_WIZ_HASHING_MASK, + GEN6_WIZ_HASHING_16x4); } if (IS_GRAPHICS_VER(i915, 6, 7)) @@ -2000,10 +1982,10 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) * disable bit, which we don't touch here, but it's good * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM). */ - wa_add(wal, - GEN6_GT_MODE, 0, - _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4), - GEN6_WIZ_HASHING_16x4); + wa_masked_field_set(wal, + GEN6_GT_MODE, + GEN6_WIZ_HASHING_MASK, + GEN6_WIZ_HASHING_16x4); /* WaDisable_RenderCache_OperationalFlush:snb */ wa_masked_dis(wal, CACHE_MODE_0, RC_OP_FLUSH_ENABLE); @@ -2024,7 +2006,7 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) wa_add(wal, MI_MODE, 0, _MASKED_BIT_ENABLE(VS_TIMER_DISPATCH), /* XXX bit doesn't stick on Broadwater */ - IS_I965G(i915) ? 0 : VS_TIMER_DISPATCH); + IS_I965G(i915) ? 0 : VS_TIMER_DISPATCH, true); if (GRAPHICS_VER(i915) == 4) /* @@ -2039,7 +2021,8 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) */ wa_add(wal, ECOSKPD, 0, _MASKED_BIT_ENABLE(ECO_CONSTANT_BUFFER_SR_DISABLE), - 0 /* XXX bit doesn't stick on Broadwater */); + 0 /* XXX bit doesn't stick on Broadwater */, + true); } static void @@ -2048,7 +2031,7 @@ xcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) struct drm_i915_private *i915 = engine->i915; /* WaKBLVECSSemaphoreWaitPoll:kbl */ - if (IS_KBL_GT_STEP(i915, STEP_A0, STEP_E0)) { + if (IS_KBL_GT_STEP(i915, STEP_A0, STEP_F0)) { wa_write(wal, RING_SEMA_WAIT_POLL(engine->mmio_base), 1); @@ -2081,7 +2064,7 @@ void intel_engine_init_workarounds(struct intel_engine_cs *engine) void intel_engine_apply_workarounds(struct intel_engine_cs *engine) { - wa_list_apply(engine->uncore, &engine->wa_list); + wa_list_apply(engine->gt, &engine->wa_list); } struct mcr_range { @@ -2107,12 +2090,31 @@ static const struct mcr_range mcr_ranges_gen12[] = { {}, }; +static const struct mcr_range mcr_ranges_xehp[] = { + { .start = 0x4000, .end = 0x4aff }, + { .start = 0x5200, .end = 0x52ff }, + { .start = 0x5400, .end = 0x7fff }, + { .start = 0x8140, .end = 0x815f }, + { .start = 0x8c80, .end = 0x8dff }, + { .start = 0x94d0, .end = 0x955f }, + { .start = 0x9680, .end = 0x96ff }, + { .start = 0xb000, .end = 0xb3ff }, + { .start = 0xc800, .end = 0xcfff }, + { .start = 0xd800, .end = 0xd8ff }, + { .start = 0xdc00, .end = 0xffff }, + { .start = 0x17000, .end = 0x17fff }, + { .start = 0x24a00, .end = 0x24a7f }, + {}, +}; + static bool mcr_range(struct drm_i915_private *i915, u32 offset) { const struct mcr_range *mcr_ranges; int i; - if (GRAPHICS_VER(i915) >= 12) + if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50)) + mcr_ranges = mcr_ranges_xehp; + else if (GRAPHICS_VER(i915) >= 12) mcr_ranges = mcr_ranges_gen12; else if (GRAPHICS_VER(i915) >= 8) mcr_ranges = mcr_ranges_gen8; diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds_types.h b/drivers/gpu/drm/i915/gt/intel_workarounds_types.h index c214111ea367..1e873681795d 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds_types.h +++ b/drivers/gpu/drm/i915/gt/intel_workarounds_types.h @@ -15,6 +15,7 @@ struct i915_wa { u32 clr; u32 set; u32 read; + bool masked_reg; }; struct i915_wa_list { diff --git a/drivers/gpu/drm/i915/gt/mock_engine.c b/drivers/gpu/drm/i915/gt/mock_engine.c index 32589c6625e1..2c1af030310c 100644 --- a/drivers/gpu/drm/i915/gt/mock_engine.c +++ b/drivers/gpu/drm/i915/gt/mock_engine.c @@ -235,6 +235,34 @@ static void mock_submit_request(struct i915_request *request) spin_unlock_irqrestore(&engine->hw_lock, flags); } +static void mock_add_to_engine(struct i915_request *rq) +{ + lockdep_assert_held(&rq->engine->sched_engine->lock); + list_move_tail(&rq->sched.link, &rq->engine->sched_engine->requests); +} + +static void mock_remove_from_engine(struct i915_request *rq) +{ + struct intel_engine_cs *engine, *locked; + + /* + * Virtual engines complicate acquiring the engine timeline lock, + * as their rq->engine pointer is not stable until under that + * engine lock. The simple ploy we use is to take the lock then + * check that the rq still belongs to the newly locked engine. + */ + + locked = READ_ONCE(rq->engine); + spin_lock_irq(&locked->sched_engine->lock); + while (unlikely(locked != (engine = READ_ONCE(rq->engine)))) { + spin_unlock(&locked->sched_engine->lock); + spin_lock(&engine->sched_engine->lock); + locked = engine; + } + list_del_init(&rq->sched.link); + spin_unlock_irq(&locked->sched_engine->lock); +} + static void mock_reset_prepare(struct intel_engine_cs *engine) { } @@ -253,10 +281,10 @@ static void mock_reset_cancel(struct intel_engine_cs *engine) del_timer_sync(&mock->hw_delay); - spin_lock_irqsave(&engine->active.lock, flags); + spin_lock_irqsave(&engine->sched_engine->lock, flags); /* Mark all submitted requests as skipped. */ - list_for_each_entry(rq, &engine->active.requests, sched.link) + list_for_each_entry(rq, &engine->sched_engine->requests, sched.link) i915_request_put(i915_request_mark_eio(rq)); intel_engine_signal_breadcrumbs(engine); @@ -269,7 +297,7 @@ static void mock_reset_cancel(struct intel_engine_cs *engine) } INIT_LIST_HEAD(&mock->hw_queue); - spin_unlock_irqrestore(&engine->active.lock, flags); + spin_unlock_irqrestore(&engine->sched_engine->lock, flags); } static void mock_reset_finish(struct intel_engine_cs *engine) @@ -283,7 +311,8 @@ static void mock_engine_release(struct intel_engine_cs *engine) GEM_BUG_ON(timer_pending(&mock->hw_delay)); - intel_breadcrumbs_free(engine->breadcrumbs); + i915_sched_engine_put(engine->sched_engine); + intel_breadcrumbs_put(engine->breadcrumbs); intel_context_unpin(engine->kernel_context); intel_context_put(engine->kernel_context); @@ -320,6 +349,8 @@ struct intel_engine_cs *mock_engine(struct drm_i915_private *i915, engine->base.emit_flush = mock_emit_flush; engine->base.emit_fini_breadcrumb = mock_emit_breadcrumb; engine->base.submit_request = mock_submit_request; + engine->base.add_active_request = mock_add_to_engine; + engine->base.remove_active_request = mock_remove_from_engine; engine->base.reset.prepare = mock_reset_prepare; engine->base.reset.rewind = mock_reset_rewind; @@ -345,14 +376,18 @@ int mock_engine_init(struct intel_engine_cs *engine) { struct intel_context *ce; - intel_engine_init_active(engine, ENGINE_MOCK); + engine->sched_engine = i915_sched_engine_create(ENGINE_MOCK); + if (!engine->sched_engine) + return -ENOMEM; + engine->sched_engine->private_data = engine; + intel_engine_init_execlists(engine); intel_engine_init__pm(engine); intel_engine_init_retire(engine); engine->breadcrumbs = intel_breadcrumbs_create(NULL); if (!engine->breadcrumbs) - return -ENOMEM; + goto err_schedule; ce = create_kernel_context(engine); if (IS_ERR(ce)) @@ -365,7 +400,9 @@ int mock_engine_init(struct intel_engine_cs *engine) return 0; err_breadcrumbs: - intel_breadcrumbs_free(engine->breadcrumbs); + intel_breadcrumbs_put(engine->breadcrumbs); +err_schedule: + i915_sched_engine_put(engine->sched_engine); return -ENOMEM; } diff --git a/drivers/gpu/drm/i915/gt/selftest_context.c b/drivers/gpu/drm/i915/gt/selftest_context.c index 26685b927169..fa7b99a671dd 100644 --- a/drivers/gpu/drm/i915/gt/selftest_context.c +++ b/drivers/gpu/drm/i915/gt/selftest_context.c @@ -209,7 +209,13 @@ static int __live_active_context(struct intel_engine_cs *engine) * This test makes sure that the context is kept alive until a * subsequent idle-barrier (emitted when the engine wakeref hits 0 * with no more outstanding requests). + * + * In GuC submission mode we don't use idle barriers and we instead + * get a message from the GuC to signal that it is safe to unpin the + * context from memory. */ + if (intel_engine_uses_guc(engine)) + return 0; if (intel_engine_pm_is_awake(engine)) { pr_err("%s is awake before starting %s!\n", @@ -357,7 +363,11 @@ static int __live_remote_context(struct intel_engine_cs *engine) * on the context image remotely (intel_context_prepare_remote_request), * which inserts foreign fences into intel_context.active, does not * clobber the idle-barrier. + * + * In GuC submission mode we don't use idle barriers. */ + if (intel_engine_uses_guc(engine)) + return 0; if (intel_engine_pm_is_awake(engine)) { pr_err("%s is awake before starting %s!\n", diff --git a/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c b/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c index 4896e4ccad50..317eebf086c3 100644 --- a/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c +++ b/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c @@ -405,3 +405,25 @@ void st_engine_heartbeat_enable(struct intel_engine_cs *engine) engine->props.heartbeat_interval_ms = engine->defaults.heartbeat_interval_ms; } + +void st_engine_heartbeat_disable_no_pm(struct intel_engine_cs *engine) +{ + engine->props.heartbeat_interval_ms = 0; + + /* + * Park the heartbeat but without holding the PM lock as that + * makes the engines appear not-idle. Note that if/when unpark + * is called due to the PM lock being acquired later the + * heartbeat still won't be enabled because of the above = 0. + */ + if (intel_engine_pm_get_if_awake(engine)) { + intel_engine_park_heartbeat(engine); + intel_engine_pm_put(engine); + } +} + +void st_engine_heartbeat_enable_no_pm(struct intel_engine_cs *engine) +{ + engine->props.heartbeat_interval_ms = + engine->defaults.heartbeat_interval_ms; +} diff --git a/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.h b/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.h index cd27113d5400..81da2cd8e406 100644 --- a/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.h +++ b/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.h @@ -9,6 +9,8 @@ struct intel_engine_cs; void st_engine_heartbeat_disable(struct intel_engine_cs *engine); +void st_engine_heartbeat_disable_no_pm(struct intel_engine_cs *engine); void st_engine_heartbeat_enable(struct intel_engine_cs *engine); +void st_engine_heartbeat_enable_no_pm(struct intel_engine_cs *engine); #endif /* SELFTEST_ENGINE_HEARTBEAT_H */ diff --git a/drivers/gpu/drm/i915/gt/selftest_engine_pm.c b/drivers/gpu/drm/i915/gt/selftest_engine_pm.c index 72cca3f0da21..75569666105d 100644 --- a/drivers/gpu/drm/i915/gt/selftest_engine_pm.c +++ b/drivers/gpu/drm/i915/gt/selftest_engine_pm.c @@ -173,8 +173,8 @@ static int __live_engine_timestamps(struct intel_engine_cs *engine) d_ctx = trifilter(s_ctx); d_ctx *= engine->gt->clock_frequency; - if (IS_ICELAKE(engine->i915)) - d_ring *= 12500000; /* Fixed 80ns for icl ctx timestamp? */ + if (GRAPHICS_VER(engine->i915) == 11) + d_ring *= 12500000; /* Fixed 80ns for GEN11 ctx timestamp? */ else d_ring *= engine->gt->clock_frequency; diff --git a/drivers/gpu/drm/i915/gt/selftest_execlists.c b/drivers/gpu/drm/i915/gt/selftest_execlists.c index 1c8108d30b85..f12ffe797639 100644 --- a/drivers/gpu/drm/i915/gt/selftest_execlists.c +++ b/drivers/gpu/drm/i915/gt/selftest_execlists.c @@ -43,7 +43,7 @@ static int wait_for_submit(struct intel_engine_cs *engine, unsigned long timeout) { /* Ignore our own attempts to suppress excess tasklets */ - tasklet_hi_schedule(&engine->execlists.tasklet); + tasklet_hi_schedule(&engine->sched_engine->tasklet); timeout += jiffies; do { @@ -273,7 +273,7 @@ static int live_unlite_restore(struct intel_gt *gt, int prio) }; /* Alternatively preempt the spinner with ce[1] */ - engine->schedule(rq[1], &attr); + engine->sched_engine->schedule(rq[1], &attr); } /* And switch back to ce[0] for good measure */ @@ -553,13 +553,13 @@ static int live_pin_rewind(void *arg) static int engine_lock_reset_tasklet(struct intel_engine_cs *engine) { - tasklet_disable(&engine->execlists.tasklet); + tasklet_disable(&engine->sched_engine->tasklet); local_bh_disable(); if (test_and_set_bit(I915_RESET_ENGINE + engine->id, &engine->gt->reset.flags)) { local_bh_enable(); - tasklet_enable(&engine->execlists.tasklet); + tasklet_enable(&engine->sched_engine->tasklet); intel_gt_set_wedged(engine->gt); return -EBUSY; @@ -574,7 +574,7 @@ static void engine_unlock_reset_tasklet(struct intel_engine_cs *engine) &engine->gt->reset.flags); local_bh_enable(); - tasklet_enable(&engine->execlists.tasklet); + tasklet_enable(&engine->sched_engine->tasklet); } static int live_hold_reset(void *arg) @@ -628,7 +628,7 @@ static int live_hold_reset(void *arg) if (err) goto out; - engine->execlists.tasklet.callback(&engine->execlists.tasklet); + engine->sched_engine->tasklet.callback(&engine->sched_engine->tasklet); GEM_BUG_ON(execlists_active(&engine->execlists) != rq); i915_request_get(rq); @@ -917,7 +917,7 @@ release_queue(struct intel_engine_cs *engine, i915_request_add(rq); local_bh_disable(); - engine->schedule(rq, &attr); + engine->sched_engine->schedule(rq, &attr); local_bh_enable(); /* kick tasklet */ i915_request_put(rq); @@ -1200,7 +1200,7 @@ static int live_timeslice_rewind(void *arg) while (i915_request_is_active(rq[A2])) { /* semaphore yield! */ /* Wait for the timeslice to kick in */ del_timer(&engine->execlists.timer); - tasklet_hi_schedule(&engine->execlists.tasklet); + tasklet_hi_schedule(&engine->sched_engine->tasklet); intel_engine_flush_submission(engine); } /* -> ELSP[] = { { A:rq1 }, { B:rq1 } } */ @@ -1342,7 +1342,7 @@ static int live_timeslice_queue(void *arg) err = PTR_ERR(rq); goto err_heartbeat; } - engine->schedule(rq, &attr); + engine->sched_engine->schedule(rq, &attr); err = wait_for_submit(engine, rq, HZ / 2); if (err) { pr_err("%s: Timed out trying to submit semaphores\n", @@ -1539,12 +1539,12 @@ static int live_busywait_preempt(void *arg) * preempt the busywaits used to synchronise between rings. */ - ctx_hi = kernel_context(gt->i915); + ctx_hi = kernel_context(gt->i915, NULL); if (!ctx_hi) return -ENOMEM; ctx_hi->sched.priority = I915_CONTEXT_MAX_USER_PRIORITY; - ctx_lo = kernel_context(gt->i915); + ctx_lo = kernel_context(gt->i915, NULL); if (!ctx_lo) goto err_ctx_hi; ctx_lo->sched.priority = I915_CONTEXT_MIN_USER_PRIORITY; @@ -1741,12 +1741,12 @@ static int live_preempt(void *arg) if (igt_spinner_init(&spin_lo, gt)) goto err_spin_hi; - ctx_hi = kernel_context(gt->i915); + ctx_hi = kernel_context(gt->i915, NULL); if (!ctx_hi) goto err_spin_lo; ctx_hi->sched.priority = I915_CONTEXT_MAX_USER_PRIORITY; - ctx_lo = kernel_context(gt->i915); + ctx_lo = kernel_context(gt->i915, NULL); if (!ctx_lo) goto err_ctx_hi; ctx_lo->sched.priority = I915_CONTEXT_MIN_USER_PRIORITY; @@ -1833,11 +1833,11 @@ static int live_late_preempt(void *arg) if (igt_spinner_init(&spin_lo, gt)) goto err_spin_hi; - ctx_hi = kernel_context(gt->i915); + ctx_hi = kernel_context(gt->i915, NULL); if (!ctx_hi) goto err_spin_lo; - ctx_lo = kernel_context(gt->i915); + ctx_lo = kernel_context(gt->i915, NULL); if (!ctx_lo) goto err_ctx_hi; @@ -1884,7 +1884,7 @@ static int live_late_preempt(void *arg) } attr.priority = I915_PRIORITY_MAX; - engine->schedule(rq, &attr); + engine->sched_engine->schedule(rq, &attr); if (!igt_wait_for_spinner(&spin_hi, rq)) { pr_err("High priority context failed to preempt the low priority context\n"); @@ -1927,7 +1927,7 @@ struct preempt_client { static int preempt_client_init(struct intel_gt *gt, struct preempt_client *c) { - c->ctx = kernel_context(gt->i915); + c->ctx = kernel_context(gt->i915, NULL); if (!c->ctx) return -ENOMEM; @@ -2497,7 +2497,7 @@ static int live_suppress_self_preempt(void *arg) i915_request_add(rq_b); GEM_BUG_ON(i915_request_completed(rq_a)); - engine->schedule(rq_a, &attr); + engine->sched_engine->schedule(rq_a, &attr); igt_spinner_end(&a.spin); if (!igt_wait_for_spinner(&b.spin, rq_b)) { @@ -2629,7 +2629,7 @@ static int live_chain_preempt(void *arg) i915_request_get(rq); i915_request_add(rq); - engine->schedule(rq, &attr); + engine->sched_engine->schedule(rq, &attr); igt_spinner_end(&hi.spin); if (i915_request_wait(rq, 0, HZ / 5) < 0) { @@ -2810,7 +2810,7 @@ static int __live_preempt_ring(struct intel_engine_cs *engine, goto err_ce; } - tmp->ring = __intel_context_ring_size(ring_sz); + tmp->ring_size = ring_sz; err = intel_context_pin(tmp); if (err) { @@ -2988,7 +2988,7 @@ static int live_preempt_gang(void *arg) break; /* Submit each spinner at increasing priority */ - engine->schedule(rq, &attr); + engine->sched_engine->schedule(rq, &attr); } while (prio <= I915_PRIORITY_MAX && !__igt_timeout(end_time, NULL)); pr_debug("%s: Preempt chain of %d requests\n", @@ -3236,7 +3236,7 @@ static int preempt_user(struct intel_engine_cs *engine, i915_request_get(rq); i915_request_add(rq); - engine->schedule(rq, &attr); + engine->sched_engine->schedule(rq, &attr); if (i915_request_wait(rq, 0, HZ / 2) < 0) err = -ETIME; @@ -3384,12 +3384,12 @@ static int live_preempt_timeout(void *arg) if (igt_spinner_init(&spin_lo, gt)) return -ENOMEM; - ctx_hi = kernel_context(gt->i915); + ctx_hi = kernel_context(gt->i915, NULL); if (!ctx_hi) goto err_spin_lo; ctx_hi->sched.priority = I915_CONTEXT_MAX_USER_PRIORITY; - ctx_lo = kernel_context(gt->i915); + ctx_lo = kernel_context(gt->i915, NULL); if (!ctx_lo) goto err_ctx_hi; ctx_lo->sched.priority = I915_CONTEXT_MIN_USER_PRIORITY; @@ -3561,12 +3561,16 @@ static int smoke_crescendo(struct preempt_smoke *smoke, unsigned int flags) #define BATCH BIT(0) { struct task_struct *tsk[I915_NUM_ENGINES] = {}; - struct preempt_smoke arg[I915_NUM_ENGINES]; + struct preempt_smoke *arg; struct intel_engine_cs *engine; enum intel_engine_id id; unsigned long count; int err = 0; + arg = kmalloc_array(I915_NUM_ENGINES, sizeof(*arg), GFP_KERNEL); + if (!arg) + return -ENOMEM; + for_each_engine(engine, smoke->gt, id) { arg[id] = *smoke; arg[id].engine = engine; @@ -3574,7 +3578,7 @@ static int smoke_crescendo(struct preempt_smoke *smoke, unsigned int flags) arg[id].batch = NULL; arg[id].count = 0; - tsk[id] = kthread_run(smoke_crescendo_thread, &arg, + tsk[id] = kthread_run(smoke_crescendo_thread, arg, "igt/smoke:%d", id); if (IS_ERR(tsk[id])) { err = PTR_ERR(tsk[id]); @@ -3603,6 +3607,8 @@ static int smoke_crescendo(struct preempt_smoke *smoke, unsigned int flags) pr_info("Submitted %lu crescendo:%x requests across %d engines and %d contexts\n", count, flags, smoke->gt->info.num_engines, smoke->ncontext); + + kfree(arg); return 0; } @@ -3676,7 +3682,7 @@ static int live_preempt_smoke(void *arg) } for (n = 0; n < smoke.ncontext; n++) { - smoke.contexts[n] = kernel_context(smoke.gt->i915); + smoke.contexts[n] = kernel_context(smoke.gt->i915, NULL); if (!smoke.contexts[n]) goto err_ctx; } @@ -3727,7 +3733,7 @@ static int nop_virtual_engine(struct intel_gt *gt, GEM_BUG_ON(!nctx || nctx > ARRAY_SIZE(ve)); for (n = 0; n < nctx; n++) { - ve[n] = intel_execlists_create_virtual(siblings, nsibling); + ve[n] = intel_engine_create_virtual(siblings, nsibling); if (IS_ERR(ve[n])) { err = PTR_ERR(ve[n]); nctx = n; @@ -3923,7 +3929,7 @@ static int mask_virtual_engine(struct intel_gt *gt, * restrict it to our desired engine within the virtual engine. */ - ve = intel_execlists_create_virtual(siblings, nsibling); + ve = intel_engine_create_virtual(siblings, nsibling); if (IS_ERR(ve)) { err = PTR_ERR(ve); goto out_close; @@ -4054,7 +4060,7 @@ static int slicein_virtual_engine(struct intel_gt *gt, i915_request_add(rq); } - ce = intel_execlists_create_virtual(siblings, nsibling); + ce = intel_engine_create_virtual(siblings, nsibling); if (IS_ERR(ce)) { err = PTR_ERR(ce); goto out; @@ -4106,7 +4112,7 @@ static int sliceout_virtual_engine(struct intel_gt *gt, /* XXX We do not handle oversubscription and fairness with normal rq */ for (n = 0; n < nsibling; n++) { - ce = intel_execlists_create_virtual(siblings, nsibling); + ce = intel_engine_create_virtual(siblings, nsibling); if (IS_ERR(ce)) { err = PTR_ERR(ce); goto out; @@ -4208,7 +4214,7 @@ static int preserved_virtual_engine(struct intel_gt *gt, if (err) goto out_scratch; - ve = intel_execlists_create_virtual(siblings, nsibling); + ve = intel_engine_create_virtual(siblings, nsibling); if (IS_ERR(ve)) { err = PTR_ERR(ve); goto out_scratch; @@ -4328,234 +4334,6 @@ static int live_virtual_preserved(void *arg) return 0; } -static int bond_virtual_engine(struct intel_gt *gt, - unsigned int class, - struct intel_engine_cs **siblings, - unsigned int nsibling, - unsigned int flags) -#define BOND_SCHEDULE BIT(0) -{ - struct intel_engine_cs *master; - struct i915_request *rq[16]; - enum intel_engine_id id; - struct igt_spinner spin; - unsigned long n; - int err; - - /* - * A set of bonded requests is intended to be run concurrently - * across a number of engines. We use one request per-engine - * and a magic fence to schedule each of the bonded requests - * at the same time. A consequence of our current scheduler is that - * we only move requests to the HW ready queue when the request - * becomes ready, that is when all of its prerequisite fences have - * been signaled. As one of those fences is the master submit fence, - * there is a delay on all secondary fences as the HW may be - * currently busy. Equally, as all the requests are independent, - * they may have other fences that delay individual request - * submission to HW. Ergo, we do not guarantee that all requests are - * immediately submitted to HW at the same time, just that if the - * rules are abided by, they are ready at the same time as the - * first is submitted. Userspace can embed semaphores in its batch - * to ensure parallel execution of its phases as it requires. - * Though naturally it gets requested that perhaps the scheduler should - * take care of parallel execution, even across preemption events on - * different HW. (The proper answer is of course "lalalala".) - * - * With the submit-fence, we have identified three possible phases - * of synchronisation depending on the master fence: queued (not - * ready), executing, and signaled. The first two are quite simple - * and checked below. However, the signaled master fence handling is - * contentious. Currently we do not distinguish between a signaled - * fence and an expired fence, as once signaled it does not convey - * any information about the previous execution. It may even be freed - * and hence checking later it may not exist at all. Ergo we currently - * do not apply the bonding constraint for an already signaled fence, - * as our expectation is that it should not constrain the secondaries - * and is outside of the scope of the bonded request API (i.e. all - * userspace requests are meant to be running in parallel). As - * it imposes no constraint, and is effectively a no-op, we do not - * check below as normal execution flows are checked extensively above. - * - * XXX Is the degenerate handling of signaled submit fences the - * expected behaviour for userpace? - */ - - GEM_BUG_ON(nsibling >= ARRAY_SIZE(rq) - 1); - - if (igt_spinner_init(&spin, gt)) - return -ENOMEM; - - err = 0; - rq[0] = ERR_PTR(-ENOMEM); - for_each_engine(master, gt, id) { - struct i915_sw_fence fence = {}; - struct intel_context *ce; - - if (master->class == class) - continue; - - ce = intel_context_create(master); - if (IS_ERR(ce)) { - err = PTR_ERR(ce); - goto out; - } - - memset_p((void *)rq, ERR_PTR(-EINVAL), ARRAY_SIZE(rq)); - - rq[0] = igt_spinner_create_request(&spin, ce, MI_NOOP); - intel_context_put(ce); - if (IS_ERR(rq[0])) { - err = PTR_ERR(rq[0]); - goto out; - } - i915_request_get(rq[0]); - - if (flags & BOND_SCHEDULE) { - onstack_fence_init(&fence); - err = i915_sw_fence_await_sw_fence_gfp(&rq[0]->submit, - &fence, - GFP_KERNEL); - } - - i915_request_add(rq[0]); - if (err < 0) - goto out; - - if (!(flags & BOND_SCHEDULE) && - !igt_wait_for_spinner(&spin, rq[0])) { - err = -EIO; - goto out; - } - - for (n = 0; n < nsibling; n++) { - struct intel_context *ve; - - ve = intel_execlists_create_virtual(siblings, nsibling); - if (IS_ERR(ve)) { - err = PTR_ERR(ve); - onstack_fence_fini(&fence); - goto out; - } - - err = intel_virtual_engine_attach_bond(ve->engine, - master, - siblings[n]); - if (err) { - intel_context_put(ve); - onstack_fence_fini(&fence); - goto out; - } - - err = intel_context_pin(ve); - intel_context_put(ve); - if (err) { - onstack_fence_fini(&fence); - goto out; - } - - rq[n + 1] = i915_request_create(ve); - intel_context_unpin(ve); - if (IS_ERR(rq[n + 1])) { - err = PTR_ERR(rq[n + 1]); - onstack_fence_fini(&fence); - goto out; - } - i915_request_get(rq[n + 1]); - - err = i915_request_await_execution(rq[n + 1], - &rq[0]->fence, - ve->engine->bond_execute); - i915_request_add(rq[n + 1]); - if (err < 0) { - onstack_fence_fini(&fence); - goto out; - } - } - onstack_fence_fini(&fence); - intel_engine_flush_submission(master); - igt_spinner_end(&spin); - - if (i915_request_wait(rq[0], 0, HZ / 10) < 0) { - pr_err("Master request did not execute (on %s)!\n", - rq[0]->engine->name); - err = -EIO; - goto out; - } - - for (n = 0; n < nsibling; n++) { - if (i915_request_wait(rq[n + 1], 0, - MAX_SCHEDULE_TIMEOUT) < 0) { - err = -EIO; - goto out; - } - - if (rq[n + 1]->engine != siblings[n]) { - pr_err("Bonded request did not execute on target engine: expected %s, used %s; master was %s\n", - siblings[n]->name, - rq[n + 1]->engine->name, - rq[0]->engine->name); - err = -EINVAL; - goto out; - } - } - - for (n = 0; !IS_ERR(rq[n]); n++) - i915_request_put(rq[n]); - rq[0] = ERR_PTR(-ENOMEM); - } - -out: - for (n = 0; !IS_ERR(rq[n]); n++) - i915_request_put(rq[n]); - if (igt_flush_test(gt->i915)) - err = -EIO; - - igt_spinner_fini(&spin); - return err; -} - -static int live_virtual_bond(void *arg) -{ - static const struct phase { - const char *name; - unsigned int flags; - } phases[] = { - { "", 0 }, - { "schedule", BOND_SCHEDULE }, - { }, - }; - struct intel_gt *gt = arg; - struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1]; - unsigned int class; - int err; - - if (intel_uc_uses_guc_submission(>->uc)) - return 0; - - for (class = 0; class <= MAX_ENGINE_CLASS; class++) { - const struct phase *p; - int nsibling; - - nsibling = select_siblings(gt, class, siblings); - if (nsibling < 2) - continue; - - for (p = phases; p->name; p++) { - err = bond_virtual_engine(gt, - class, siblings, nsibling, - p->flags); - if (err) { - pr_err("%s(%s): failed class=%d, nsibling=%d, err=%d\n", - __func__, p->name, class, nsibling, err); - return err; - } - } - } - - return 0; -} - static int reset_virtual_engine(struct intel_gt *gt, struct intel_engine_cs **siblings, unsigned int nsibling) @@ -4576,7 +4354,7 @@ static int reset_virtual_engine(struct intel_gt *gt, if (igt_spinner_init(&spin, gt)) return -ENOMEM; - ve = intel_execlists_create_virtual(siblings, nsibling); + ve = intel_engine_create_virtual(siblings, nsibling); if (IS_ERR(ve)) { err = PTR_ERR(ve); goto out_spin; @@ -4606,13 +4384,13 @@ static int reset_virtual_engine(struct intel_gt *gt, if (err) goto out_heartbeat; - engine->execlists.tasklet.callback(&engine->execlists.tasklet); + engine->sched_engine->tasklet.callback(&engine->sched_engine->tasklet); GEM_BUG_ON(execlists_active(&engine->execlists) != rq); /* Fake a preemption event; failed of course */ - spin_lock_irq(&engine->active.lock); + spin_lock_irq(&engine->sched_engine->lock); __unwind_incomplete_requests(engine); - spin_unlock_irq(&engine->active.lock); + spin_unlock_irq(&engine->sched_engine->lock); GEM_BUG_ON(rq->engine != engine); /* Reset the engine while keeping our active request on hold */ @@ -4721,7 +4499,6 @@ int intel_execlists_live_selftests(struct drm_i915_private *i915) SUBTEST(live_virtual_mask), SUBTEST(live_virtual_preserved), SUBTEST(live_virtual_slice), - SUBTEST(live_virtual_bond), SUBTEST(live_virtual_reset), }; diff --git a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c index 853246fad05f..2c1ed32ca5ac 100644 --- a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c +++ b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c @@ -17,6 +17,8 @@ #include "selftests/igt_flush_test.h" #include "selftests/igt_reset.h" #include "selftests/igt_atomic.h" +#include "selftests/igt_spinner.h" +#include "selftests/intel_scheduler_helpers.h" #include "selftests/mock_drm.h" @@ -42,7 +44,7 @@ static int hang_init(struct hang *h, struct intel_gt *gt) memset(h, 0, sizeof(*h)); h->gt = gt; - h->ctx = kernel_context(gt->i915); + h->ctx = kernel_context(gt->i915, NULL); if (IS_ERR(h->ctx)) return PTR_ERR(h->ctx); @@ -378,6 +380,7 @@ static int igt_reset_nop(void *arg) ce = intel_context_create(engine); if (IS_ERR(ce)) { err = PTR_ERR(ce); + pr_err("[%s] Create context failed: %d!\n", engine->name, err); break; } @@ -387,6 +390,8 @@ static int igt_reset_nop(void *arg) rq = intel_context_create_request(ce); if (IS_ERR(rq)) { err = PTR_ERR(rq); + pr_err("[%s] Create request failed: %d!\n", + engine->name, err); break; } @@ -401,24 +406,31 @@ static int igt_reset_nop(void *arg) igt_global_reset_unlock(gt); if (intel_gt_is_wedged(gt)) { + pr_err("[%s] GT is wedged!\n", engine->name); err = -EIO; break; } if (i915_reset_count(global) != reset_count + ++count) { - pr_err("Full GPU reset not recorded!\n"); + pr_err("[%s] Reset not recorded: %d vs %d + %d!\n", + engine->name, i915_reset_count(global), reset_count, count); err = -EINVAL; break; } err = igt_flush_test(gt->i915); - if (err) + if (err) { + pr_err("[%s] Flush failed: %d!\n", engine->name, err); break; + } } while (time_before(jiffies, end_time)); pr_info("%s: %d resets\n", __func__, count); - if (igt_flush_test(gt->i915)) + if (igt_flush_test(gt->i915)) { + pr_err("Post flush failed: %d!\n", err); err = -EIO; + } + return err; } @@ -440,9 +452,19 @@ static int igt_reset_nop_engine(void *arg) IGT_TIMEOUT(end_time); int err; + if (intel_engine_uses_guc(engine)) { + /* Engine level resets are triggered by GuC when a hang + * is detected. They can't be triggered by the KMD any + * more. Thus a nop batch cannot be used as a reset test + */ + continue; + } + ce = intel_context_create(engine); - if (IS_ERR(ce)) + if (IS_ERR(ce)) { + pr_err("[%s] Create context failed: %pe!\n", engine->name, ce); return PTR_ERR(ce); + } reset_count = i915_reset_count(global); reset_engine_count = i915_reset_engine_count(global, engine); @@ -549,9 +571,15 @@ static int igt_reset_fail_engine(void *arg) IGT_TIMEOUT(end_time); int err; + /* Can't manually break the reset if i915 doesn't perform it */ + if (intel_engine_uses_guc(engine)) + continue; + ce = intel_context_create(engine); - if (IS_ERR(ce)) + if (IS_ERR(ce)) { + pr_err("[%s] Create context failed: %pe!\n", engine->name, ce); return PTR_ERR(ce); + } st_engine_heartbeat_disable(engine); set_bit(I915_RESET_ENGINE + id, >->reset.flags); @@ -686,8 +714,12 @@ static int __igt_reset_engine(struct intel_gt *gt, bool active) for_each_engine(engine, gt, id) { unsigned int reset_count, reset_engine_count; unsigned long count; + bool using_guc = intel_engine_uses_guc(engine); IGT_TIMEOUT(end_time); + if (using_guc && !active) + continue; + if (active && !intel_engine_can_store_dword(engine)) continue; @@ -705,13 +737,24 @@ static int __igt_reset_engine(struct intel_gt *gt, bool active) set_bit(I915_RESET_ENGINE + id, >->reset.flags); count = 0; do { - if (active) { - struct i915_request *rq; + struct i915_request *rq = NULL; + struct intel_selftest_saved_policy saved; + int err2; + + err = intel_selftest_modify_policy(engine, &saved, + SELFTEST_SCHEDULER_MODIFY_FAST_RESET); + if (err) { + pr_err("[%s] Modify policy failed: %d!\n", engine->name, err); + break; + } + if (active) { rq = hang_create_request(&h, engine); if (IS_ERR(rq)) { err = PTR_ERR(rq); - break; + pr_err("[%s] Create hang request failed: %d!\n", + engine->name, err); + goto restore; } i915_request_get(rq); @@ -727,34 +770,59 @@ static int __igt_reset_engine(struct intel_gt *gt, bool active) i915_request_put(rq); err = -EIO; - break; + goto restore; } + } - i915_request_put(rq); + if (!using_guc) { + err = intel_engine_reset(engine, NULL); + if (err) { + pr_err("intel_engine_reset(%s) failed, err:%d\n", + engine->name, err); + goto skip; + } } - err = intel_engine_reset(engine, NULL); - if (err) { - pr_err("intel_engine_reset(%s) failed, err:%d\n", - engine->name, err); - break; + if (rq) { + /* Ensure the reset happens and kills the engine */ + err = intel_selftest_wait_for_rq(rq); + if (err) + pr_err("[%s] Wait for request %lld:%lld [0x%04X] failed: %d!\n", + engine->name, rq->fence.context, + rq->fence.seqno, rq->context->guc_id, err); } +skip: + if (rq) + i915_request_put(rq); + if (i915_reset_count(global) != reset_count) { pr_err("Full GPU reset recorded! (engine reset expected)\n"); err = -EINVAL; - break; + goto restore; } - if (i915_reset_engine_count(global, engine) != - ++reset_engine_count) { - pr_err("%s engine reset not recorded!\n", - engine->name); - err = -EINVAL; - break; + /* GuC based resets are not logged per engine */ + if (!using_guc) { + if (i915_reset_engine_count(global, engine) != + ++reset_engine_count) { + pr_err("%s engine reset not recorded!\n", + engine->name); + err = -EINVAL; + goto restore; + } } count++; + +restore: + err2 = intel_selftest_restore_policy(engine, &saved); + if (err2) + pr_err("[%s] Restore policy failed: %d!\n", engine->name, err); + if (err == 0) + err = err2; + if (err) + break; } while (time_before(jiffies, end_time)); clear_bit(I915_RESET_ENGINE + id, >->reset.flags); st_engine_heartbeat_enable(engine); @@ -765,12 +833,16 @@ static int __igt_reset_engine(struct intel_gt *gt, bool active) break; err = igt_flush_test(gt->i915); - if (err) + if (err) { + pr_err("[%s] Flush failed: %d!\n", engine->name, err); break; + } } - if (intel_gt_is_wedged(gt)) + if (intel_gt_is_wedged(gt)) { + pr_err("GT is wedged!\n"); err = -EIO; + } if (active) hang_fini(&h); @@ -807,7 +879,7 @@ static int active_request_put(struct i915_request *rq) if (!rq) return 0; - if (i915_request_wait(rq, 0, 5 * HZ) < 0) { + if (i915_request_wait(rq, 0, 10 * HZ) < 0) { GEM_TRACE("%s timed out waiting for completion of fence %llx:%lld\n", rq->engine->name, rq->fence.context, @@ -837,6 +909,7 @@ static int active_engine(void *data) ce[count] = intel_context_create(engine); if (IS_ERR(ce[count])) { err = PTR_ERR(ce[count]); + pr_err("[%s] Create context #%ld failed: %d!\n", engine->name, count, err); while (--count) intel_context_put(ce[count]); return err; @@ -852,23 +925,26 @@ static int active_engine(void *data) new = intel_context_create_request(ce[idx]); if (IS_ERR(new)) { err = PTR_ERR(new); + pr_err("[%s] Create request #%d failed: %d!\n", engine->name, idx, err); break; } rq[idx] = i915_request_get(new); i915_request_add(new); - if (engine->schedule && arg->flags & TEST_PRIORITY) { + if (engine->sched_engine->schedule && arg->flags & TEST_PRIORITY) { struct i915_sched_attr attr = { .priority = i915_prandom_u32_max_state(512, &prng), }; - engine->schedule(rq[idx], &attr); + engine->sched_engine->schedule(rq[idx], &attr); } err = active_request_put(old); - if (err) + if (err) { + pr_err("[%s] Request put failed: %d!\n", engine->name, err); break; + } cond_resched(); } @@ -876,6 +952,9 @@ static int active_engine(void *data) for (count = 0; count < ARRAY_SIZE(rq); count++) { int err__ = active_request_put(rq[count]); + if (err) + pr_err("[%s] Request put #%ld failed: %d!\n", engine->name, count, err); + /* Keep the first error */ if (!err) err = err__; @@ -916,10 +995,13 @@ static int __igt_reset_engines(struct intel_gt *gt, struct active_engine threads[I915_NUM_ENGINES] = {}; unsigned long device = i915_reset_count(global); unsigned long count = 0, reported; + bool using_guc = intel_engine_uses_guc(engine); IGT_TIMEOUT(end_time); - if (flags & TEST_ACTIVE && - !intel_engine_can_store_dword(engine)) + if (flags & TEST_ACTIVE) { + if (!intel_engine_can_store_dword(engine)) + continue; + } else if (using_guc) continue; if (!wait_for_idle(engine)) { @@ -949,6 +1031,7 @@ static int __igt_reset_engines(struct intel_gt *gt, "igt/%s", other->name); if (IS_ERR(tsk)) { err = PTR_ERR(tsk); + pr_err("[%s] Thread spawn failed: %d!\n", engine->name, err); goto unwind; } @@ -958,16 +1041,27 @@ static int __igt_reset_engines(struct intel_gt *gt, yield(); /* start all threads before we begin */ - st_engine_heartbeat_disable(engine); + st_engine_heartbeat_disable_no_pm(engine); set_bit(I915_RESET_ENGINE + id, >->reset.flags); do { struct i915_request *rq = NULL; + struct intel_selftest_saved_policy saved; + int err2; + + err = intel_selftest_modify_policy(engine, &saved, + SELFTEST_SCHEDULER_MODIFY_FAST_RESET); + if (err) { + pr_err("[%s] Modify policy failed: %d!\n", engine->name, err); + break; + } if (flags & TEST_ACTIVE) { rq = hang_create_request(&h, engine); if (IS_ERR(rq)) { err = PTR_ERR(rq); - break; + pr_err("[%s] Create hang request failed: %d!\n", + engine->name, err); + goto restore; } i915_request_get(rq); @@ -983,32 +1077,44 @@ static int __igt_reset_engines(struct intel_gt *gt, i915_request_put(rq); err = -EIO; - break; + goto restore; } + } else { + intel_engine_pm_get(engine); } - err = intel_engine_reset(engine, NULL); - if (err) { - pr_err("i915_reset_engine(%s:%s): failed, err=%d\n", - engine->name, test_name, err); - break; + if (!using_guc) { + err = intel_engine_reset(engine, NULL); + if (err) { + pr_err("i915_reset_engine(%s:%s): failed, err=%d\n", + engine->name, test_name, err); + goto restore; + } + } + + if (rq) { + /* Ensure the reset happens and kills the engine */ + err = intel_selftest_wait_for_rq(rq); + if (err) + pr_err("[%s] Wait for request %lld:%lld [0x%04X] failed: %d!\n", + engine->name, rq->fence.context, + rq->fence.seqno, rq->context->guc_id, err); } count++; if (rq) { if (rq->fence.error != -EIO) { - pr_err("i915_reset_engine(%s:%s):" - " failed to reset request %llx:%lld\n", + pr_err("i915_reset_engine(%s:%s): failed to reset request %lld:%lld [0x%04X]\n", engine->name, test_name, rq->fence.context, - rq->fence.seqno); + rq->fence.seqno, rq->context->guc_id); i915_request_put(rq); GEM_TRACE_DUMP(); intel_gt_set_wedged(gt); err = -EIO; - break; + goto restore; } if (i915_request_wait(rq, 0, HZ / 5) < 0) { @@ -1027,12 +1133,15 @@ static int __igt_reset_engines(struct intel_gt *gt, GEM_TRACE_DUMP(); intel_gt_set_wedged(gt); err = -EIO; - break; + goto restore; } i915_request_put(rq); } + if (!(flags & TEST_ACTIVE)) + intel_engine_pm_put(engine); + if (!(flags & TEST_SELF) && !wait_for_idle(engine)) { struct drm_printer p = drm_info_printer(gt->i915->drm.dev); @@ -1044,22 +1153,34 @@ static int __igt_reset_engines(struct intel_gt *gt, "%s\n", engine->name); err = -EIO; - break; + goto restore; } + +restore: + err2 = intel_selftest_restore_policy(engine, &saved); + if (err2) + pr_err("[%s] Restore policy failed: %d!\n", engine->name, err2); + if (err == 0) + err = err2; + if (err) + break; } while (time_before(jiffies, end_time)); clear_bit(I915_RESET_ENGINE + id, >->reset.flags); - st_engine_heartbeat_enable(engine); + st_engine_heartbeat_enable_no_pm(engine); pr_info("i915_reset_engine(%s:%s): %lu resets\n", engine->name, test_name, count); - reported = i915_reset_engine_count(global, engine); - reported -= threads[engine->id].resets; - if (reported != count) { - pr_err("i915_reset_engine(%s:%s): reset %lu times, but reported %lu\n", - engine->name, test_name, count, reported); - if (!err) - err = -EINVAL; + /* GuC based resets are not logged per engine */ + if (!using_guc) { + reported = i915_reset_engine_count(global, engine); + reported -= threads[engine->id].resets; + if (reported != count) { + pr_err("i915_reset_engine(%s:%s): reset %lu times, but reported %lu\n", + engine->name, test_name, count, reported); + if (!err) + err = -EINVAL; + } } unwind: @@ -1078,15 +1199,18 @@ unwind: } put_task_struct(threads[tmp].task); - if (other->uabi_class != engine->uabi_class && - threads[tmp].resets != - i915_reset_engine_count(global, other)) { - pr_err("Innocent engine %s was reset (count=%ld)\n", - other->name, - i915_reset_engine_count(global, other) - - threads[tmp].resets); - if (!err) - err = -EINVAL; + /* GuC based resets are not logged per engine */ + if (!using_guc) { + if (other->uabi_class != engine->uabi_class && + threads[tmp].resets != + i915_reset_engine_count(global, other)) { + pr_err("Innocent engine %s was reset (count=%ld)\n", + other->name, + i915_reset_engine_count(global, other) - + threads[tmp].resets); + if (!err) + err = -EINVAL; + } } } @@ -1101,8 +1225,10 @@ unwind: break; err = igt_flush_test(gt->i915); - if (err) + if (err) { + pr_err("[%s] Flush failed: %d!\n", engine->name, err); break; + } } if (intel_gt_is_wedged(gt)) @@ -1180,12 +1306,15 @@ static int igt_reset_wait(void *arg) igt_global_reset_lock(gt); err = hang_init(&h, gt); - if (err) + if (err) { + pr_err("[%s] Hang init failed: %d!\n", engine->name, err); goto unlock; + } rq = hang_create_request(&h, engine); if (IS_ERR(rq)) { err = PTR_ERR(rq); + pr_err("[%s] Create hang request failed: %d!\n", engine->name, err); goto fini; } @@ -1310,12 +1439,15 @@ static int __igt_reset_evict_vma(struct intel_gt *gt, /* Check that we can recover an unbind stuck on a hanging request */ err = hang_init(&h, gt); - if (err) + if (err) { + pr_err("[%s] Hang init failed: %d!\n", engine->name, err); return err; + } obj = i915_gem_object_create_internal(gt->i915, SZ_1M); if (IS_ERR(obj)) { err = PTR_ERR(obj); + pr_err("[%s] Create object failed: %d!\n", engine->name, err); goto fini; } @@ -1330,12 +1462,14 @@ static int __igt_reset_evict_vma(struct intel_gt *gt, arg.vma = i915_vma_instance(obj, vm, NULL); if (IS_ERR(arg.vma)) { err = PTR_ERR(arg.vma); + pr_err("[%s] VMA instance failed: %d!\n", engine->name, err); goto out_obj; } rq = hang_create_request(&h, engine); if (IS_ERR(rq)) { err = PTR_ERR(rq); + pr_err("[%s] Create hang request failed: %d!\n", engine->name, err); goto out_obj; } @@ -1347,6 +1481,7 @@ static int __igt_reset_evict_vma(struct intel_gt *gt, err = i915_vma_pin(arg.vma, 0, 0, pin_flags); if (err) { i915_request_add(rq); + pr_err("[%s] VMA pin failed: %d!\n", engine->name, err); goto out_obj; } @@ -1363,8 +1498,14 @@ static int __igt_reset_evict_vma(struct intel_gt *gt, i915_vma_lock(arg.vma); err = i915_request_await_object(rq, arg.vma->obj, flags & EXEC_OBJECT_WRITE); - if (err == 0) + if (err == 0) { err = i915_vma_move_to_active(arg.vma, rq, flags); + if (err) + pr_err("[%s] Move to active failed: %d!\n", engine->name, err); + } else { + pr_err("[%s] Request await failed: %d!\n", engine->name, err); + } + i915_vma_unlock(arg.vma); if (flags & EXEC_OBJECT_NEEDS_FENCE) @@ -1392,6 +1533,7 @@ static int __igt_reset_evict_vma(struct intel_gt *gt, tsk = kthread_run(fn, &arg, "igt/evict_vma"); if (IS_ERR(tsk)) { err = PTR_ERR(tsk); + pr_err("[%s] Thread spawn failed: %d!\n", engine->name, err); tsk = NULL; goto out_reset; } @@ -1508,17 +1650,29 @@ static int igt_reset_queue(void *arg) goto unlock; for_each_engine(engine, gt, id) { + struct intel_selftest_saved_policy saved; struct i915_request *prev; IGT_TIMEOUT(end_time); unsigned int count; + bool using_guc = intel_engine_uses_guc(engine); if (!intel_engine_can_store_dword(engine)) continue; + if (using_guc) { + err = intel_selftest_modify_policy(engine, &saved, + SELFTEST_SCHEDULER_MODIFY_NO_HANGCHECK); + if (err) { + pr_err("[%s] Modify policy failed: %d!\n", engine->name, err); + goto fini; + } + } + prev = hang_create_request(&h, engine); if (IS_ERR(prev)) { err = PTR_ERR(prev); - goto fini; + pr_err("[%s] Create 'prev' hang request failed: %d!\n", engine->name, err); + goto restore; } i915_request_get(prev); @@ -1532,7 +1686,8 @@ static int igt_reset_queue(void *arg) rq = hang_create_request(&h, engine); if (IS_ERR(rq)) { err = PTR_ERR(rq); - goto fini; + pr_err("[%s] Create hang request failed: %d!\n", engine->name, err); + goto restore; } i915_request_get(rq); @@ -1557,7 +1712,7 @@ static int igt_reset_queue(void *arg) GEM_TRACE_DUMP(); intel_gt_set_wedged(gt); - goto fini; + goto restore; } if (!wait_until_running(&h, prev)) { @@ -1575,7 +1730,7 @@ static int igt_reset_queue(void *arg) intel_gt_set_wedged(gt); err = -EIO; - goto fini; + goto restore; } reset_count = fake_hangcheck(gt, BIT(id)); @@ -1586,7 +1741,7 @@ static int igt_reset_queue(void *arg) i915_request_put(rq); i915_request_put(prev); err = -EINVAL; - goto fini; + goto restore; } if (rq->fence.error) { @@ -1595,7 +1750,7 @@ static int igt_reset_queue(void *arg) i915_request_put(rq); i915_request_put(prev); err = -EINVAL; - goto fini; + goto restore; } if (i915_reset_count(global) == reset_count) { @@ -1603,7 +1758,7 @@ static int igt_reset_queue(void *arg) i915_request_put(rq); i915_request_put(prev); err = -EINVAL; - goto fini; + goto restore; } i915_request_put(prev); @@ -1618,9 +1773,24 @@ static int igt_reset_queue(void *arg) i915_request_put(prev); - err = igt_flush_test(gt->i915); +restore: + if (using_guc) { + int err2 = intel_selftest_restore_policy(engine, &saved); + + if (err2) + pr_err("%s:%d> [%s] Restore policy failed: %d!\n", + __func__, __LINE__, engine->name, err2); + if (err == 0) + err = err2; + } if (err) + goto fini; + + err = igt_flush_test(gt->i915); + if (err) { + pr_err("[%s] Flush failed: %d!\n", engine->name, err); break; + } } fini: @@ -1653,12 +1823,15 @@ static int igt_handle_error(void *arg) return 0; err = hang_init(&h, gt); - if (err) + if (err) { + pr_err("[%s] Hang init failed: %d!\n", engine->name, err); return err; + } rq = hang_create_request(&h, engine); if (IS_ERR(rq)) { err = PTR_ERR(rq); + pr_err("[%s] Create hang request failed: %d!\n", engine->name, err); goto err_fini; } @@ -1702,7 +1875,7 @@ static int __igt_atomic_reset_engine(struct intel_engine_cs *engine, const struct igt_atomic_section *p, const char *mode) { - struct tasklet_struct * const t = &engine->execlists.tasklet; + struct tasklet_struct * const t = &engine->sched_engine->tasklet; int err; GEM_TRACE("i915_reset_engine(%s:%s) under %s\n", @@ -1743,12 +1916,15 @@ static int igt_atomic_reset_engine(struct intel_engine_cs *engine, return err; err = hang_init(&h, engine->gt); - if (err) + if (err) { + pr_err("[%s] Hang init failed: %d!\n", engine->name, err); return err; + } rq = hang_create_request(&h, engine); if (IS_ERR(rq)) { err = PTR_ERR(rq); + pr_err("[%s] Create hang request failed: %d!\n", engine->name, err); goto out; } diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c index 3119016d9910..b0977a3b699b 100644 --- a/drivers/gpu/drm/i915/gt/selftest_lrc.c +++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c @@ -49,7 +49,7 @@ static int wait_for_submit(struct intel_engine_cs *engine, unsigned long timeout) { /* Ignore our own attempts to suppress excess tasklets */ - tasklet_hi_schedule(&engine->execlists.tasklet); + tasklet_hi_schedule(&engine->sched_engine->tasklet); timeout += jiffies; do { @@ -1613,12 +1613,12 @@ static void garbage_reset(struct intel_engine_cs *engine, local_bh_disable(); if (!test_and_set_bit(bit, lock)) { - tasklet_disable(&engine->execlists.tasklet); + tasklet_disable(&engine->sched_engine->tasklet); if (!rq->fence.error) __intel_engine_reset_bh(engine, NULL); - tasklet_enable(&engine->execlists.tasklet); + tasklet_enable(&engine->sched_engine->tasklet); clear_and_wake_up_bit(bit, lock); } local_bh_enable(); diff --git a/drivers/gpu/drm/i915/gt/selftest_migrate.c b/drivers/gpu/drm/i915/gt/selftest_migrate.c new file mode 100644 index 000000000000..12ef2837c89b --- /dev/null +++ b/drivers/gpu/drm/i915/gt/selftest_migrate.c @@ -0,0 +1,669 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2020 Intel Corporation + */ + +#include <linux/sort.h> + +#include "selftests/i915_random.h" + +static const unsigned int sizes[] = { + SZ_4K, + SZ_64K, + SZ_2M, + CHUNK_SZ - SZ_4K, + CHUNK_SZ, + CHUNK_SZ + SZ_4K, + SZ_64M, +}; + +static struct drm_i915_gem_object * +create_lmem_or_internal(struct drm_i915_private *i915, size_t size) +{ + struct drm_i915_gem_object *obj; + + obj = i915_gem_object_create_lmem(i915, size, 0); + if (!IS_ERR(obj)) + return obj; + + return i915_gem_object_create_internal(i915, size); +} + +static int copy(struct intel_migrate *migrate, + int (*fn)(struct intel_migrate *migrate, + struct i915_gem_ww_ctx *ww, + struct drm_i915_gem_object *src, + struct drm_i915_gem_object *dst, + struct i915_request **out), + u32 sz, struct rnd_state *prng) +{ + struct drm_i915_private *i915 = migrate->context->engine->i915; + struct drm_i915_gem_object *src, *dst; + struct i915_request *rq; + struct i915_gem_ww_ctx ww; + u32 *vaddr; + int err = 0; + int i; + + src = create_lmem_or_internal(i915, sz); + if (IS_ERR(src)) + return 0; + + dst = i915_gem_object_create_internal(i915, sz); + if (IS_ERR(dst)) + goto err_free_src; + + for_i915_gem_ww(&ww, err, true) { + err = i915_gem_object_lock(src, &ww); + if (err) + continue; + + err = i915_gem_object_lock(dst, &ww); + if (err) + continue; + + vaddr = i915_gem_object_pin_map(src, I915_MAP_WC); + if (IS_ERR(vaddr)) { + err = PTR_ERR(vaddr); + continue; + } + + for (i = 0; i < sz / sizeof(u32); i++) + vaddr[i] = i; + i915_gem_object_flush_map(src); + + vaddr = i915_gem_object_pin_map(dst, I915_MAP_WC); + if (IS_ERR(vaddr)) { + err = PTR_ERR(vaddr); + goto unpin_src; + } + + for (i = 0; i < sz / sizeof(u32); i++) + vaddr[i] = ~i; + i915_gem_object_flush_map(dst); + + err = fn(migrate, &ww, src, dst, &rq); + if (!err) + continue; + + if (err != -EDEADLK && err != -EINTR && err != -ERESTARTSYS) + pr_err("%ps failed, size: %u\n", fn, sz); + if (rq) { + i915_request_wait(rq, 0, HZ); + i915_request_put(rq); + } + i915_gem_object_unpin_map(dst); +unpin_src: + i915_gem_object_unpin_map(src); + } + if (err) + goto err_out; + + if (rq) { + if (i915_request_wait(rq, 0, HZ) < 0) { + pr_err("%ps timed out, size: %u\n", fn, sz); + err = -ETIME; + } + i915_request_put(rq); + } + + for (i = 0; !err && i < sz / PAGE_SIZE; i++) { + int x = i * 1024 + i915_prandom_u32_max_state(1024, prng); + + if (vaddr[x] != x) { + pr_err("%ps failed, size: %u, offset: %zu\n", + fn, sz, x * sizeof(u32)); + igt_hexdump(vaddr + i * 1024, 4096); + err = -EINVAL; + } + } + + i915_gem_object_unpin_map(dst); + i915_gem_object_unpin_map(src); + +err_out: + i915_gem_object_put(dst); +err_free_src: + i915_gem_object_put(src); + + return err; +} + +static int clear(struct intel_migrate *migrate, + int (*fn)(struct intel_migrate *migrate, + struct i915_gem_ww_ctx *ww, + struct drm_i915_gem_object *obj, + u32 value, + struct i915_request **out), + u32 sz, struct rnd_state *prng) +{ + struct drm_i915_private *i915 = migrate->context->engine->i915; + struct drm_i915_gem_object *obj; + struct i915_request *rq; + struct i915_gem_ww_ctx ww; + u32 *vaddr; + int err = 0; + int i; + + obj = create_lmem_or_internal(i915, sz); + if (IS_ERR(obj)) + return 0; + + for_i915_gem_ww(&ww, err, true) { + err = i915_gem_object_lock(obj, &ww); + if (err) + continue; + + vaddr = i915_gem_object_pin_map(obj, I915_MAP_WC); + if (IS_ERR(vaddr)) { + err = PTR_ERR(vaddr); + continue; + } + + for (i = 0; i < sz / sizeof(u32); i++) + vaddr[i] = ~i; + i915_gem_object_flush_map(obj); + + err = fn(migrate, &ww, obj, sz, &rq); + if (!err) + continue; + + if (err != -EDEADLK && err != -EINTR && err != -ERESTARTSYS) + pr_err("%ps failed, size: %u\n", fn, sz); + if (rq) { + i915_request_wait(rq, 0, HZ); + i915_request_put(rq); + } + i915_gem_object_unpin_map(obj); + } + if (err) + goto err_out; + + if (rq) { + if (i915_request_wait(rq, 0, HZ) < 0) { + pr_err("%ps timed out, size: %u\n", fn, sz); + err = -ETIME; + } + i915_request_put(rq); + } + + for (i = 0; !err && i < sz / PAGE_SIZE; i++) { + int x = i * 1024 + i915_prandom_u32_max_state(1024, prng); + + if (vaddr[x] != sz) { + pr_err("%ps failed, size: %u, offset: %zu\n", + fn, sz, x * sizeof(u32)); + igt_hexdump(vaddr + i * 1024, 4096); + err = -EINVAL; + } + } + + i915_gem_object_unpin_map(obj); +err_out: + i915_gem_object_put(obj); + + return err; +} + +static int __migrate_copy(struct intel_migrate *migrate, + struct i915_gem_ww_ctx *ww, + struct drm_i915_gem_object *src, + struct drm_i915_gem_object *dst, + struct i915_request **out) +{ + return intel_migrate_copy(migrate, ww, NULL, + src->mm.pages->sgl, src->cache_level, + i915_gem_object_is_lmem(src), + dst->mm.pages->sgl, dst->cache_level, + i915_gem_object_is_lmem(dst), + out); +} + +static int __global_copy(struct intel_migrate *migrate, + struct i915_gem_ww_ctx *ww, + struct drm_i915_gem_object *src, + struct drm_i915_gem_object *dst, + struct i915_request **out) +{ + return intel_context_migrate_copy(migrate->context, NULL, + src->mm.pages->sgl, src->cache_level, + i915_gem_object_is_lmem(src), + dst->mm.pages->sgl, dst->cache_level, + i915_gem_object_is_lmem(dst), + out); +} + +static int +migrate_copy(struct intel_migrate *migrate, u32 sz, struct rnd_state *prng) +{ + return copy(migrate, __migrate_copy, sz, prng); +} + +static int +global_copy(struct intel_migrate *migrate, u32 sz, struct rnd_state *prng) +{ + return copy(migrate, __global_copy, sz, prng); +} + +static int __migrate_clear(struct intel_migrate *migrate, + struct i915_gem_ww_ctx *ww, + struct drm_i915_gem_object *obj, + u32 value, + struct i915_request **out) +{ + return intel_migrate_clear(migrate, ww, NULL, + obj->mm.pages->sgl, + obj->cache_level, + i915_gem_object_is_lmem(obj), + value, out); +} + +static int __global_clear(struct intel_migrate *migrate, + struct i915_gem_ww_ctx *ww, + struct drm_i915_gem_object *obj, + u32 value, + struct i915_request **out) +{ + return intel_context_migrate_clear(migrate->context, NULL, + obj->mm.pages->sgl, + obj->cache_level, + i915_gem_object_is_lmem(obj), + value, out); +} + +static int +migrate_clear(struct intel_migrate *migrate, u32 sz, struct rnd_state *prng) +{ + return clear(migrate, __migrate_clear, sz, prng); +} + +static int +global_clear(struct intel_migrate *migrate, u32 sz, struct rnd_state *prng) +{ + return clear(migrate, __global_clear, sz, prng); +} + +static int live_migrate_copy(void *arg) +{ + struct intel_migrate *migrate = arg; + struct drm_i915_private *i915 = migrate->context->engine->i915; + I915_RND_STATE(prng); + int i; + + for (i = 0; i < ARRAY_SIZE(sizes); i++) { + int err; + + err = migrate_copy(migrate, sizes[i], &prng); + if (err == 0) + err = global_copy(migrate, sizes[i], &prng); + i915_gem_drain_freed_objects(i915); + if (err) + return err; + } + + return 0; +} + +static int live_migrate_clear(void *arg) +{ + struct intel_migrate *migrate = arg; + struct drm_i915_private *i915 = migrate->context->engine->i915; + I915_RND_STATE(prng); + int i; + + for (i = 0; i < ARRAY_SIZE(sizes); i++) { + int err; + + err = migrate_clear(migrate, sizes[i], &prng); + if (err == 0) + err = global_clear(migrate, sizes[i], &prng); + + i915_gem_drain_freed_objects(i915); + if (err) + return err; + } + + return 0; +} + +struct threaded_migrate { + struct intel_migrate *migrate; + struct task_struct *tsk; + struct rnd_state prng; +}; + +static int threaded_migrate(struct intel_migrate *migrate, + int (*fn)(void *arg), + unsigned int flags) +{ + const unsigned int n_cpus = num_online_cpus() + 1; + struct threaded_migrate *thread; + I915_RND_STATE(prng); + unsigned int i; + int err = 0; + + thread = kcalloc(n_cpus, sizeof(*thread), GFP_KERNEL); + if (!thread) + return 0; + + for (i = 0; i < n_cpus; ++i) { + struct task_struct *tsk; + + thread[i].migrate = migrate; + thread[i].prng = + I915_RND_STATE_INITIALIZER(prandom_u32_state(&prng)); + + tsk = kthread_run(fn, &thread[i], "igt-%d", i); + if (IS_ERR(tsk)) { + err = PTR_ERR(tsk); + break; + } + + get_task_struct(tsk); + thread[i].tsk = tsk; + } + + msleep(10); /* start all threads before we kthread_stop() */ + + for (i = 0; i < n_cpus; ++i) { + struct task_struct *tsk = thread[i].tsk; + int status; + + if (IS_ERR_OR_NULL(tsk)) + continue; + + status = kthread_stop(tsk); + if (status && !err) + err = status; + + put_task_struct(tsk); + } + + kfree(thread); + return err; +} + +static int __thread_migrate_copy(void *arg) +{ + struct threaded_migrate *tm = arg; + + return migrate_copy(tm->migrate, 2 * CHUNK_SZ, &tm->prng); +} + +static int thread_migrate_copy(void *arg) +{ + return threaded_migrate(arg, __thread_migrate_copy, 0); +} + +static int __thread_global_copy(void *arg) +{ + struct threaded_migrate *tm = arg; + + return global_copy(tm->migrate, 2 * CHUNK_SZ, &tm->prng); +} + +static int thread_global_copy(void *arg) +{ + return threaded_migrate(arg, __thread_global_copy, 0); +} + +static int __thread_migrate_clear(void *arg) +{ + struct threaded_migrate *tm = arg; + + return migrate_clear(tm->migrate, 2 * CHUNK_SZ, &tm->prng); +} + +static int __thread_global_clear(void *arg) +{ + struct threaded_migrate *tm = arg; + + return global_clear(tm->migrate, 2 * CHUNK_SZ, &tm->prng); +} + +static int thread_migrate_clear(void *arg) +{ + return threaded_migrate(arg, __thread_migrate_clear, 0); +} + +static int thread_global_clear(void *arg) +{ + return threaded_migrate(arg, __thread_global_clear, 0); +} + +int intel_migrate_live_selftests(struct drm_i915_private *i915) +{ + static const struct i915_subtest tests[] = { + SUBTEST(live_migrate_copy), + SUBTEST(live_migrate_clear), + SUBTEST(thread_migrate_copy), + SUBTEST(thread_migrate_clear), + SUBTEST(thread_global_copy), + SUBTEST(thread_global_clear), + }; + struct intel_gt *gt = &i915->gt; + + if (!gt->migrate.context) + return 0; + + return i915_subtests(tests, >->migrate); +} + +static struct drm_i915_gem_object * +create_init_lmem_internal(struct intel_gt *gt, size_t sz, bool try_lmem) +{ + struct drm_i915_gem_object *obj = NULL; + int err; + + if (try_lmem) + obj = i915_gem_object_create_lmem(gt->i915, sz, 0); + + if (IS_ERR_OR_NULL(obj)) { + obj = i915_gem_object_create_internal(gt->i915, sz); + if (IS_ERR(obj)) + return obj; + } + + i915_gem_object_trylock(obj); + err = i915_gem_object_pin_pages(obj); + if (err) { + i915_gem_object_unlock(obj); + i915_gem_object_put(obj); + return ERR_PTR(err); + } + + return obj; +} + +static int wrap_ktime_compare(const void *A, const void *B) +{ + const ktime_t *a = A, *b = B; + + return ktime_compare(*a, *b); +} + +static int __perf_clear_blt(struct intel_context *ce, + struct scatterlist *sg, + enum i915_cache_level cache_level, + bool is_lmem, + size_t sz) +{ + ktime_t t[5]; + int pass; + int err = 0; + + for (pass = 0; pass < ARRAY_SIZE(t); pass++) { + struct i915_request *rq; + ktime_t t0, t1; + + t0 = ktime_get(); + + err = intel_context_migrate_clear(ce, NULL, sg, cache_level, + is_lmem, 0, &rq); + if (rq) { + if (i915_request_wait(rq, 0, MAX_SCHEDULE_TIMEOUT) < 0) + err = -EIO; + i915_request_put(rq); + } + if (err) + break; + + t1 = ktime_get(); + t[pass] = ktime_sub(t1, t0); + } + if (err) + return err; + + sort(t, ARRAY_SIZE(t), sizeof(*t), wrap_ktime_compare, NULL); + pr_info("%s: %zd KiB fill: %lld MiB/s\n", + ce->engine->name, sz >> 10, + div64_u64(mul_u32_u32(4 * sz, + 1000 * 1000 * 1000), + t[1] + 2 * t[2] + t[3]) >> 20); + return 0; +} + +static int perf_clear_blt(void *arg) +{ + struct intel_gt *gt = arg; + static const unsigned long sizes[] = { + SZ_4K, + SZ_64K, + SZ_2M, + SZ_64M + }; + int i; + + for (i = 0; i < ARRAY_SIZE(sizes); i++) { + struct drm_i915_gem_object *dst; + int err; + + dst = create_init_lmem_internal(gt, sizes[i], true); + if (IS_ERR(dst)) + return PTR_ERR(dst); + + err = __perf_clear_blt(gt->migrate.context, + dst->mm.pages->sgl, + I915_CACHE_NONE, + i915_gem_object_is_lmem(dst), + sizes[i]); + + i915_gem_object_unlock(dst); + i915_gem_object_put(dst); + if (err) + return err; + } + + return 0; +} + +static int __perf_copy_blt(struct intel_context *ce, + struct scatterlist *src, + enum i915_cache_level src_cache_level, + bool src_is_lmem, + struct scatterlist *dst, + enum i915_cache_level dst_cache_level, + bool dst_is_lmem, + size_t sz) +{ + ktime_t t[5]; + int pass; + int err = 0; + + for (pass = 0; pass < ARRAY_SIZE(t); pass++) { + struct i915_request *rq; + ktime_t t0, t1; + + t0 = ktime_get(); + + err = intel_context_migrate_copy(ce, NULL, + src, src_cache_level, + src_is_lmem, + dst, dst_cache_level, + dst_is_lmem, + &rq); + if (rq) { + if (i915_request_wait(rq, 0, MAX_SCHEDULE_TIMEOUT) < 0) + err = -EIO; + i915_request_put(rq); + } + if (err) + break; + + t1 = ktime_get(); + t[pass] = ktime_sub(t1, t0); + } + if (err) + return err; + + sort(t, ARRAY_SIZE(t), sizeof(*t), wrap_ktime_compare, NULL); + pr_info("%s: %zd KiB copy: %lld MiB/s\n", + ce->engine->name, sz >> 10, + div64_u64(mul_u32_u32(4 * sz, + 1000 * 1000 * 1000), + t[1] + 2 * t[2] + t[3]) >> 20); + return 0; +} + +static int perf_copy_blt(void *arg) +{ + struct intel_gt *gt = arg; + static const unsigned long sizes[] = { + SZ_4K, + SZ_64K, + SZ_2M, + SZ_64M + }; + int i; + + for (i = 0; i < ARRAY_SIZE(sizes); i++) { + struct drm_i915_gem_object *src, *dst; + int err; + + src = create_init_lmem_internal(gt, sizes[i], true); + if (IS_ERR(src)) + return PTR_ERR(src); + + dst = create_init_lmem_internal(gt, sizes[i], false); + if (IS_ERR(dst)) { + err = PTR_ERR(dst); + goto err_src; + } + + err = __perf_copy_blt(gt->migrate.context, + src->mm.pages->sgl, + I915_CACHE_NONE, + i915_gem_object_is_lmem(src), + dst->mm.pages->sgl, + I915_CACHE_NONE, + i915_gem_object_is_lmem(dst), + sizes[i]); + + i915_gem_object_unlock(dst); + i915_gem_object_put(dst); +err_src: + i915_gem_object_unlock(src); + i915_gem_object_put(src); + if (err) + return err; + } + + return 0; +} + +int intel_migrate_perf_selftests(struct drm_i915_private *i915) +{ + static const struct i915_subtest tests[] = { + SUBTEST(perf_clear_blt), + SUBTEST(perf_copy_blt), + }; + struct intel_gt *gt = &i915->gt; + + if (intel_gt_is_wedged(gt)) + return 0; + + if (!gt->migrate.context) + return 0; + + return intel_gt_live_subtests(tests, gt); +} diff --git a/drivers/gpu/drm/i915/gt/selftest_mocs.c b/drivers/gpu/drm/i915/gt/selftest_mocs.c index b9bb0e6e97f7..13d25bf2a94a 100644 --- a/drivers/gpu/drm/i915/gt/selftest_mocs.c +++ b/drivers/gpu/drm/i915/gt/selftest_mocs.c @@ -10,6 +10,7 @@ #include "gem/selftests/mock_context.h" #include "selftests/igt_reset.h" #include "selftests/igt_spinner.h" +#include "selftests/intel_scheduler_helpers.h" struct live_mocs { struct drm_i915_mocs_table table; @@ -28,7 +29,7 @@ static struct intel_context *mocs_context_create(struct intel_engine_cs *engine) return ce; /* We build large requests to read the registers from the ring */ - ce->ring = __intel_context_ring_size(SZ_16K); + ce->ring_size = SZ_16K; return ce; } @@ -318,7 +319,8 @@ static int live_mocs_clean(void *arg) } static int active_engine_reset(struct intel_context *ce, - const char *reason) + const char *reason, + bool using_guc) { struct igt_spinner spin; struct i915_request *rq; @@ -335,9 +337,13 @@ static int active_engine_reset(struct intel_context *ce, } err = request_add_spin(rq, &spin); - if (err == 0) + if (err == 0 && !using_guc) err = intel_engine_reset(ce->engine, reason); + /* Ensure the reset happens and kills the engine */ + if (err == 0) + err = intel_selftest_wait_for_rq(rq); + igt_spinner_end(&spin); igt_spinner_fini(&spin); @@ -345,21 +351,23 @@ static int active_engine_reset(struct intel_context *ce, } static int __live_mocs_reset(struct live_mocs *mocs, - struct intel_context *ce) + struct intel_context *ce, bool using_guc) { struct intel_gt *gt = ce->engine->gt; int err; if (intel_has_reset_engine(gt)) { - err = intel_engine_reset(ce->engine, "mocs"); - if (err) - return err; - - err = check_mocs_engine(mocs, ce); - if (err) - return err; + if (!using_guc) { + err = intel_engine_reset(ce->engine, "mocs"); + if (err) + return err; + + err = check_mocs_engine(mocs, ce); + if (err) + return err; + } - err = active_engine_reset(ce, "mocs"); + err = active_engine_reset(ce, "mocs", using_guc); if (err) return err; @@ -395,19 +403,33 @@ static int live_mocs_reset(void *arg) igt_global_reset_lock(gt); for_each_engine(engine, gt, id) { + bool using_guc = intel_engine_uses_guc(engine); + struct intel_selftest_saved_policy saved; struct intel_context *ce; + int err2; + + err = intel_selftest_modify_policy(engine, &saved, + SELFTEST_SCHEDULER_MODIFY_FAST_RESET); + if (err) + break; ce = mocs_context_create(engine); if (IS_ERR(ce)) { err = PTR_ERR(ce); - break; + goto restore; } intel_engine_pm_get(engine); - err = __live_mocs_reset(&mocs, ce); - intel_engine_pm_put(engine); + err = __live_mocs_reset(&mocs, ce, using_guc); + + intel_engine_pm_put(engine); intel_context_put(ce); + +restore: + err2 = intel_selftest_restore_policy(engine, &saved); + if (err == 0) + err = err2; if (err) break; } diff --git a/drivers/gpu/drm/i915/gt/selftest_reset.c b/drivers/gpu/drm/i915/gt/selftest_reset.c index 8784257ec808..7a50c9f4071b 100644 --- a/drivers/gpu/drm/i915/gt/selftest_reset.c +++ b/drivers/gpu/drm/i915/gt/selftest_reset.c @@ -321,7 +321,7 @@ static int igt_atomic_engine_reset(void *arg) goto out_unlock; for_each_engine(engine, gt, id) { - struct tasklet_struct *t = &engine->execlists.tasklet; + struct tasklet_struct *t = &engine->sched_engine->tasklet; if (t->func) tasklet_disable(t); diff --git a/drivers/gpu/drm/i915/gt/selftest_slpc.c b/drivers/gpu/drm/i915/gt/selftest_slpc.c new file mode 100644 index 000000000000..9334bad131a2 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/selftest_slpc.c @@ -0,0 +1,311 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2021 Intel Corporation + */ + +#define NUM_STEPS 5 +#define H2G_DELAY 50000 +#define delay_for_h2g() usleep_range(H2G_DELAY, H2G_DELAY + 10000) +#define FREQUENCY_REQ_UNIT DIV_ROUND_CLOSEST(GT_FREQUENCY_MULTIPLIER, \ + GEN9_FREQ_SCALER) + +static int slpc_set_min_freq(struct intel_guc_slpc *slpc, u32 freq) +{ + int ret; + + ret = intel_guc_slpc_set_min_freq(slpc, freq); + if (ret) + pr_err("Could not set min frequency to [%u]\n", freq); + else /* Delay to ensure h2g completes */ + delay_for_h2g(); + + return ret; +} + +static int slpc_set_max_freq(struct intel_guc_slpc *slpc, u32 freq) +{ + int ret; + + ret = intel_guc_slpc_set_max_freq(slpc, freq); + if (ret) + pr_err("Could not set maximum frequency [%u]\n", + freq); + else /* Delay to ensure h2g completes */ + delay_for_h2g(); + + return ret; +} + +static int live_slpc_clamp_min(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct intel_gt *gt = &i915->gt; + struct intel_guc_slpc *slpc = >->uc.guc.slpc; + struct intel_rps *rps = >->rps; + struct intel_engine_cs *engine; + enum intel_engine_id id; + struct igt_spinner spin; + u32 slpc_min_freq, slpc_max_freq; + int err = 0; + + if (!intel_uc_uses_guc_slpc(>->uc)) + return 0; + + if (igt_spinner_init(&spin, gt)) + return -ENOMEM; + + if (intel_guc_slpc_get_max_freq(slpc, &slpc_max_freq)) { + pr_err("Could not get SLPC max freq\n"); + return -EIO; + } + + if (intel_guc_slpc_get_min_freq(slpc, &slpc_min_freq)) { + pr_err("Could not get SLPC min freq\n"); + return -EIO; + } + + if (slpc_min_freq == slpc_max_freq) { + pr_err("Min/Max are fused to the same value\n"); + return -EINVAL; + } + + intel_gt_pm_wait_for_idle(gt); + intel_gt_pm_get(gt); + for_each_engine(engine, gt, id) { + struct i915_request *rq; + u32 step, min_freq, req_freq; + u32 act_freq, max_act_freq; + + if (!intel_engine_can_store_dword(engine)) + continue; + + /* Go from min to max in 5 steps */ + step = (slpc_max_freq - slpc_min_freq) / NUM_STEPS; + max_act_freq = slpc_min_freq; + for (min_freq = slpc_min_freq; min_freq < slpc_max_freq; + min_freq += step) { + err = slpc_set_min_freq(slpc, min_freq); + if (err) + break; + + st_engine_heartbeat_disable(engine); + + rq = igt_spinner_create_request(&spin, + engine->kernel_context, + MI_NOOP); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + st_engine_heartbeat_enable(engine); + break; + } + + i915_request_add(rq); + + if (!igt_wait_for_spinner(&spin, rq)) { + pr_err("%s: Spinner did not start\n", + engine->name); + igt_spinner_end(&spin); + st_engine_heartbeat_enable(engine); + intel_gt_set_wedged(engine->gt); + err = -EIO; + break; + } + + /* Wait for GuC to detect business and raise + * requested frequency if necessary. + */ + delay_for_h2g(); + + req_freq = intel_rps_read_punit_req_frequency(rps); + + /* GuC requests freq in multiples of 50/3 MHz */ + if (req_freq < (min_freq - FREQUENCY_REQ_UNIT)) { + pr_err("SWReq is %d, should be at least %d\n", req_freq, + min_freq - FREQUENCY_REQ_UNIT); + igt_spinner_end(&spin); + st_engine_heartbeat_enable(engine); + err = -EINVAL; + break; + } + + act_freq = intel_rps_read_actual_frequency(rps); + if (act_freq > max_act_freq) + max_act_freq = act_freq; + + igt_spinner_end(&spin); + st_engine_heartbeat_enable(engine); + } + + pr_info("Max actual frequency for %s was %d\n", + engine->name, max_act_freq); + + /* Actual frequency should rise above min */ + if (max_act_freq == slpc_min_freq) { + pr_err("Actual freq did not rise above min\n"); + err = -EINVAL; + } + + if (err) + break; + } + + /* Restore min/max frequencies */ + slpc_set_max_freq(slpc, slpc_max_freq); + slpc_set_min_freq(slpc, slpc_min_freq); + + if (igt_flush_test(gt->i915)) + err = -EIO; + + intel_gt_pm_put(gt); + igt_spinner_fini(&spin); + intel_gt_pm_wait_for_idle(gt); + + return err; +} + +static int live_slpc_clamp_max(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct intel_gt *gt = &i915->gt; + struct intel_guc_slpc *slpc; + struct intel_rps *rps; + struct intel_engine_cs *engine; + enum intel_engine_id id; + struct igt_spinner spin; + int err = 0; + u32 slpc_min_freq, slpc_max_freq; + + slpc = >->uc.guc.slpc; + rps = >->rps; + + if (!intel_uc_uses_guc_slpc(>->uc)) + return 0; + + if (igt_spinner_init(&spin, gt)) + return -ENOMEM; + + if (intel_guc_slpc_get_max_freq(slpc, &slpc_max_freq)) { + pr_err("Could not get SLPC max freq\n"); + return -EIO; + } + + if (intel_guc_slpc_get_min_freq(slpc, &slpc_min_freq)) { + pr_err("Could not get SLPC min freq\n"); + return -EIO; + } + + if (slpc_min_freq == slpc_max_freq) { + pr_err("Min/Max are fused to the same value\n"); + return -EINVAL; + } + + intel_gt_pm_wait_for_idle(gt); + intel_gt_pm_get(gt); + for_each_engine(engine, gt, id) { + struct i915_request *rq; + u32 max_freq, req_freq; + u32 act_freq, max_act_freq; + u32 step; + + if (!intel_engine_can_store_dword(engine)) + continue; + + /* Go from max to min in 5 steps */ + step = (slpc_max_freq - slpc_min_freq) / NUM_STEPS; + max_act_freq = slpc_min_freq; + for (max_freq = slpc_max_freq; max_freq > slpc_min_freq; + max_freq -= step) { + err = slpc_set_max_freq(slpc, max_freq); + if (err) + break; + + st_engine_heartbeat_disable(engine); + + rq = igt_spinner_create_request(&spin, + engine->kernel_context, + MI_NOOP); + if (IS_ERR(rq)) { + st_engine_heartbeat_enable(engine); + err = PTR_ERR(rq); + break; + } + + i915_request_add(rq); + + if (!igt_wait_for_spinner(&spin, rq)) { + pr_err("%s: SLPC spinner did not start\n", + engine->name); + igt_spinner_end(&spin); + st_engine_heartbeat_enable(engine); + intel_gt_set_wedged(engine->gt); + err = -EIO; + break; + } + + delay_for_h2g(); + + /* Verify that SWREQ indeed was set to specific value */ + req_freq = intel_rps_read_punit_req_frequency(rps); + + /* GuC requests freq in multiples of 50/3 MHz */ + if (req_freq > (max_freq + FREQUENCY_REQ_UNIT)) { + pr_err("SWReq is %d, should be at most %d\n", req_freq, + max_freq + FREQUENCY_REQ_UNIT); + igt_spinner_end(&spin); + st_engine_heartbeat_enable(engine); + err = -EINVAL; + break; + } + + act_freq = intel_rps_read_actual_frequency(rps); + if (act_freq > max_act_freq) + max_act_freq = act_freq; + + st_engine_heartbeat_enable(engine); + igt_spinner_end(&spin); + + if (err) + break; + } + + pr_info("Max actual frequency for %s was %d\n", + engine->name, max_act_freq); + + /* Actual frequency should rise above min */ + if (max_act_freq == slpc_min_freq) { + pr_err("Actual freq did not rise above min\n"); + err = -EINVAL; + } + + if (igt_flush_test(gt->i915)) { + err = -EIO; + break; + } + + if (err) + break; + } + + /* Restore min/max freq */ + slpc_set_max_freq(slpc, slpc_max_freq); + slpc_set_min_freq(slpc, slpc_min_freq); + + intel_gt_pm_put(gt); + igt_spinner_fini(&spin); + intel_gt_pm_wait_for_idle(gt); + + return err; +} + +int intel_slpc_live_selftests(struct drm_i915_private *i915) +{ + static const struct i915_subtest tests[] = { + SUBTEST(live_slpc_clamp_max), + SUBTEST(live_slpc_clamp_min), + }; + + if (intel_gt_is_wedged(&i915->gt)) + return 0; + + return i915_live_subtests(tests, i915); +} diff --git a/drivers/gpu/drm/i915/gt/selftest_timeline.c b/drivers/gpu/drm/i915/gt/selftest_timeline.c index 64da0c91dec1..d0b6a3afcf44 100644 --- a/drivers/gpu/drm/i915/gt/selftest_timeline.c +++ b/drivers/gpu/drm/i915/gt/selftest_timeline.c @@ -874,7 +874,7 @@ static int create_watcher(struct hwsp_watcher *w, if (IS_ERR(ce)) return PTR_ERR(ce); - ce->ring = __intel_context_ring_size(ringsz); + ce->ring_size = ringsz; w->rq = intel_context_create_request(ce); intel_context_put(ce); if (IS_ERR(w->rq)) diff --git a/drivers/gpu/drm/i915/gt/selftest_workarounds.c b/drivers/gpu/drm/i915/gt/selftest_workarounds.c index c30754daf4b1..e623ac45f4aa 100644 --- a/drivers/gpu/drm/i915/gt/selftest_workarounds.c +++ b/drivers/gpu/drm/i915/gt/selftest_workarounds.c @@ -12,6 +12,7 @@ #include "selftests/igt_flush_test.h" #include "selftests/igt_reset.h" #include "selftests/igt_spinner.h" +#include "selftests/intel_scheduler_helpers.h" #include "selftests/mock_drm.h" #include "gem/selftests/igt_gem_utils.h" @@ -261,28 +262,34 @@ static int do_engine_reset(struct intel_engine_cs *engine) return intel_engine_reset(engine, "live_workarounds"); } +static int do_guc_reset(struct intel_engine_cs *engine) +{ + /* Currently a no-op as the reset is handled by GuC */ + return 0; +} + static int switch_to_scratch_context(struct intel_engine_cs *engine, - struct igt_spinner *spin) + struct igt_spinner *spin, + struct i915_request **rq) { struct intel_context *ce; - struct i915_request *rq; int err = 0; ce = intel_context_create(engine); if (IS_ERR(ce)) return PTR_ERR(ce); - rq = igt_spinner_create_request(spin, ce, MI_NOOP); + *rq = igt_spinner_create_request(spin, ce, MI_NOOP); intel_context_put(ce); - if (IS_ERR(rq)) { + if (IS_ERR(*rq)) { spin = NULL; - err = PTR_ERR(rq); + err = PTR_ERR(*rq); goto err; } - err = request_add_spin(rq, spin); + err = request_add_spin(*rq, spin); err: if (err && spin) igt_spinner_end(spin); @@ -296,6 +303,7 @@ static int check_whitelist_across_reset(struct intel_engine_cs *engine, { struct intel_context *ce, *tmp; struct igt_spinner spin; + struct i915_request *rq; intel_wakeref_t wakeref; int err; @@ -316,13 +324,24 @@ static int check_whitelist_across_reset(struct intel_engine_cs *engine, goto out_spin; } - err = switch_to_scratch_context(engine, &spin); + err = switch_to_scratch_context(engine, &spin, &rq); if (err) goto out_spin; + /* Ensure the spinner hasn't aborted */ + if (i915_request_completed(rq)) { + pr_err("%s spinner failed to start\n", name); + err = -ETIMEDOUT; + goto out_spin; + } + with_intel_runtime_pm(engine->uncore->rpm, wakeref) err = reset(engine); + /* Ensure the reset happens and kills the engine */ + if (err == 0) + err = intel_selftest_wait_for_rq(rq); + igt_spinner_end(&spin); if (err) { @@ -787,9 +806,28 @@ static int live_reset_whitelist(void *arg) continue; if (intel_has_reset_engine(gt)) { - err = check_whitelist_across_reset(engine, - do_engine_reset, - "engine"); + if (intel_engine_uses_guc(engine)) { + struct intel_selftest_saved_policy saved; + int err2; + + err = intel_selftest_modify_policy(engine, &saved, + SELFTEST_SCHEDULER_MODIFY_FAST_RESET); + if (err) + goto out; + + err = check_whitelist_across_reset(engine, + do_guc_reset, + "guc"); + + err2 = intel_selftest_restore_policy(engine, &saved); + if (err == 0) + err = err2; + } else { + err = check_whitelist_across_reset(engine, + do_engine_reset, + "engine"); + } + if (err) goto out; } @@ -1147,7 +1185,7 @@ verify_wa_lists(struct intel_gt *gt, struct wa_lists *lists, enum intel_engine_id id; bool ok = true; - ok &= wa_list_verify(gt->uncore, &lists->gt_wa_list, str); + ok &= wa_list_verify(gt, &lists->gt_wa_list, str); for_each_engine(engine, gt, id) { struct intel_context *ce; @@ -1175,31 +1213,36 @@ live_gpu_reset_workarounds(void *arg) { struct intel_gt *gt = arg; intel_wakeref_t wakeref; - struct wa_lists lists; + struct wa_lists *lists; bool ok; if (!intel_has_gpu_reset(gt)) return 0; + lists = kzalloc(sizeof(*lists), GFP_KERNEL); + if (!lists) + return -ENOMEM; + pr_info("Verifying after GPU reset...\n"); igt_global_reset_lock(gt); wakeref = intel_runtime_pm_get(gt->uncore->rpm); - reference_lists_init(gt, &lists); + reference_lists_init(gt, lists); - ok = verify_wa_lists(gt, &lists, "before reset"); + ok = verify_wa_lists(gt, lists, "before reset"); if (!ok) goto out; intel_gt_reset(gt, ALL_ENGINES, "live_workarounds"); - ok = verify_wa_lists(gt, &lists, "after reset"); + ok = verify_wa_lists(gt, lists, "after reset"); out: - reference_lists_fini(gt, &lists); + reference_lists_fini(gt, lists); intel_runtime_pm_put(gt->uncore->rpm, wakeref); igt_global_reset_unlock(gt); + kfree(lists); return ok ? 0 : -ESRCH; } @@ -1214,43 +1257,57 @@ live_engine_reset_workarounds(void *arg) struct igt_spinner spin; struct i915_request *rq; intel_wakeref_t wakeref; - struct wa_lists lists; + struct wa_lists *lists; int ret = 0; if (!intel_has_reset_engine(gt)) return 0; + lists = kzalloc(sizeof(*lists), GFP_KERNEL); + if (!lists) + return -ENOMEM; + igt_global_reset_lock(gt); wakeref = intel_runtime_pm_get(gt->uncore->rpm); - reference_lists_init(gt, &lists); + reference_lists_init(gt, lists); for_each_engine(engine, gt, id) { + struct intel_selftest_saved_policy saved; + bool using_guc = intel_engine_uses_guc(engine); bool ok; + int ret2; pr_info("Verifying after %s reset...\n", engine->name); + ret = intel_selftest_modify_policy(engine, &saved, + SELFTEST_SCHEDULER_MODIFY_FAST_RESET); + if (ret) + break; + ce = intel_context_create(engine); if (IS_ERR(ce)) { ret = PTR_ERR(ce); - break; + goto restore; } - ok = verify_wa_lists(gt, &lists, "before reset"); - if (!ok) { - ret = -ESRCH; - goto err; - } + if (!using_guc) { + ok = verify_wa_lists(gt, lists, "before reset"); + if (!ok) { + ret = -ESRCH; + goto err; + } - ret = intel_engine_reset(engine, "live_workarounds:idle"); - if (ret) { - pr_err("%s: Reset failed while idle\n", engine->name); - goto err; - } + ret = intel_engine_reset(engine, "live_workarounds:idle"); + if (ret) { + pr_err("%s: Reset failed while idle\n", engine->name); + goto err; + } - ok = verify_wa_lists(gt, &lists, "after idle reset"); - if (!ok) { - ret = -ESRCH; - goto err; + ok = verify_wa_lists(gt, lists, "after idle reset"); + if (!ok) { + ret = -ESRCH; + goto err; + } } ret = igt_spinner_init(&spin, engine->gt); @@ -1271,32 +1328,49 @@ live_engine_reset_workarounds(void *arg) goto err; } - ret = intel_engine_reset(engine, "live_workarounds:active"); - if (ret) { - pr_err("%s: Reset failed on an active spinner\n", - engine->name); - igt_spinner_fini(&spin); - goto err; + /* Ensure the spinner hasn't aborted */ + if (i915_request_completed(rq)) { + ret = -ETIMEDOUT; + goto skip; } + if (!using_guc) { + ret = intel_engine_reset(engine, "live_workarounds:active"); + if (ret) { + pr_err("%s: Reset failed on an active spinner\n", + engine->name); + igt_spinner_fini(&spin); + goto err; + } + } + + /* Ensure the reset happens and kills the engine */ + if (ret == 0) + ret = intel_selftest_wait_for_rq(rq); + +skip: igt_spinner_end(&spin); igt_spinner_fini(&spin); - ok = verify_wa_lists(gt, &lists, "after busy reset"); - if (!ok) { + ok = verify_wa_lists(gt, lists, "after busy reset"); + if (!ok) ret = -ESRCH; - goto err; - } err: intel_context_put(ce); + +restore: + ret2 = intel_selftest_restore_policy(engine, &saved); + if (ret == 0) + ret = ret2; if (ret) break; } - reference_lists_fini(gt, &lists); + reference_lists_fini(gt, lists); intel_runtime_pm_put(gt->uncore->rpm, wakeref); igt_global_reset_unlock(gt); + kfree(lists); igt_flush_test(gt->i915); diff --git a/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h b/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h index 90efef8a73e4..8ff582222aff 100644 --- a/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h +++ b/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h @@ -6,6 +6,113 @@ #ifndef _ABI_GUC_ACTIONS_ABI_H #define _ABI_GUC_ACTIONS_ABI_H +/** + * DOC: HOST2GUC_REGISTER_CTB + * + * This message is used as part of the `CTB based communication`_ setup. + * + * This message must be sent as `MMIO HXG Message`_. + * + * +---+-------+--------------------------------------------------------------+ + * | | Bits | Description | + * +===+=======+==============================================================+ + * | 0 | 31 | ORIGIN = GUC_HXG_ORIGIN_HOST_ | + * | +-------+--------------------------------------------------------------+ + * | | 30:28 | TYPE = GUC_HXG_TYPE_REQUEST_ | + * | +-------+--------------------------------------------------------------+ + * | | 27:16 | DATA0 = MBZ | + * | +-------+--------------------------------------------------------------+ + * | | 15:0 | ACTION = _`GUC_ACTION_HOST2GUC_REGISTER_CTB` = 0x4505 | + * +---+-------+--------------------------------------------------------------+ + * | 1 | 31:12 | RESERVED = MBZ | + * | +-------+--------------------------------------------------------------+ + * | | 11:8 | **TYPE** - type for the `CT Buffer`_ | + * | | | | + * | | | - _`GUC_CTB_TYPE_HOST2GUC` = 0 | + * | | | - _`GUC_CTB_TYPE_GUC2HOST` = 1 | + * | +-------+--------------------------------------------------------------+ + * | | 7:0 | **SIZE** - size of the `CT Buffer`_ in 4K units minus 1 | + * +---+-------+--------------------------------------------------------------+ + * | 2 | 31:0 | **DESC_ADDR** - GGTT address of the `CTB Descriptor`_ | + * +---+-------+--------------------------------------------------------------+ + * | 3 | 31:0 | **BUFF_ADDF** - GGTT address of the `CT Buffer`_ | + * +---+-------+--------------------------------------------------------------+ + * + * +---+-------+--------------------------------------------------------------+ + * | | Bits | Description | + * +===+=======+==============================================================+ + * | 0 | 31 | ORIGIN = GUC_HXG_ORIGIN_GUC_ | + * | +-------+--------------------------------------------------------------+ + * | | 30:28 | TYPE = GUC_HXG_TYPE_RESPONSE_SUCCESS_ | + * | +-------+--------------------------------------------------------------+ + * | | 27:0 | DATA0 = MBZ | + * +---+-------+--------------------------------------------------------------+ + */ +#define GUC_ACTION_HOST2GUC_REGISTER_CTB 0x4505 + +#define HOST2GUC_REGISTER_CTB_REQUEST_MSG_LEN (GUC_HXG_REQUEST_MSG_MIN_LEN + 3u) +#define HOST2GUC_REGISTER_CTB_REQUEST_MSG_0_MBZ GUC_HXG_REQUEST_MSG_0_DATA0 +#define HOST2GUC_REGISTER_CTB_REQUEST_MSG_1_MBZ (0xfffff << 12) +#define HOST2GUC_REGISTER_CTB_REQUEST_MSG_1_TYPE (0xf << 8) +#define GUC_CTB_TYPE_HOST2GUC 0u +#define GUC_CTB_TYPE_GUC2HOST 1u +#define HOST2GUC_REGISTER_CTB_REQUEST_MSG_1_SIZE (0xff << 0) +#define HOST2GUC_REGISTER_CTB_REQUEST_MSG_2_DESC_ADDR GUC_HXG_REQUEST_MSG_n_DATAn +#define HOST2GUC_REGISTER_CTB_REQUEST_MSG_3_BUFF_ADDR GUC_HXG_REQUEST_MSG_n_DATAn + +#define HOST2GUC_REGISTER_CTB_RESPONSE_MSG_LEN GUC_HXG_RESPONSE_MSG_MIN_LEN +#define HOST2GUC_REGISTER_CTB_RESPONSE_MSG_0_MBZ GUC_HXG_RESPONSE_MSG_0_DATA0 + +/** + * DOC: HOST2GUC_DEREGISTER_CTB + * + * This message is used as part of the `CTB based communication`_ teardown. + * + * This message must be sent as `MMIO HXG Message`_. + * + * +---+-------+--------------------------------------------------------------+ + * | | Bits | Description | + * +===+=======+==============================================================+ + * | 0 | 31 | ORIGIN = GUC_HXG_ORIGIN_HOST_ | + * | +-------+--------------------------------------------------------------+ + * | | 30:28 | TYPE = GUC_HXG_TYPE_REQUEST_ | + * | +-------+--------------------------------------------------------------+ + * | | 27:16 | DATA0 = MBZ | + * | +-------+--------------------------------------------------------------+ + * | | 15:0 | ACTION = _`GUC_ACTION_HOST2GUC_DEREGISTER_CTB` = 0x4506 | + * +---+-------+--------------------------------------------------------------+ + * | 1 | 31:12 | RESERVED = MBZ | + * | +-------+--------------------------------------------------------------+ + * | | 11:8 | **TYPE** - type of the `CT Buffer`_ | + * | | | | + * | | | see `GUC_ACTION_HOST2GUC_REGISTER_CTB`_ | + * | +-------+--------------------------------------------------------------+ + * | | 7:0 | RESERVED = MBZ | + * +---+-------+--------------------------------------------------------------+ + * + * +---+-------+--------------------------------------------------------------+ + * | | Bits | Description | + * +===+=======+==============================================================+ + * | 0 | 31 | ORIGIN = GUC_HXG_ORIGIN_GUC_ | + * | +-------+--------------------------------------------------------------+ + * | | 30:28 | TYPE = GUC_HXG_TYPE_RESPONSE_SUCCESS_ | + * | +-------+--------------------------------------------------------------+ + * | | 27:0 | DATA0 = MBZ | + * +---+-------+--------------------------------------------------------------+ + */ +#define GUC_ACTION_HOST2GUC_DEREGISTER_CTB 0x4506 + +#define HOST2GUC_DEREGISTER_CTB_REQUEST_MSG_LEN (GUC_HXG_REQUEST_MSG_MIN_LEN + 1u) +#define HOST2GUC_DEREGISTER_CTB_REQUEST_MSG_0_MBZ GUC_HXG_REQUEST_MSG_0_DATA0 +#define HOST2GUC_DEREGISTER_CTB_REQUEST_MSG_1_MBZ (0xfffff << 12) +#define HOST2GUC_DEREGISTER_CTB_REQUEST_MSG_1_TYPE (0xf << 8) +#define HOST2GUC_DEREGISTER_CTB_REQUEST_MSG_1_MBZ2 (0xff << 0) + +#define HOST2GUC_DEREGISTER_CTB_RESPONSE_MSG_LEN GUC_HXG_RESPONSE_MSG_MIN_LEN +#define HOST2GUC_DEREGISTER_CTB_RESPONSE_MSG_0_MBZ GUC_HXG_RESPONSE_MSG_0_DATA0 + +/* legacy definitions */ + enum intel_guc_action { INTEL_GUC_ACTION_DEFAULT = 0x0, INTEL_GUC_ACTION_REQUEST_PREEMPTION = 0x2, @@ -17,13 +124,33 @@ enum intel_guc_action { INTEL_GUC_ACTION_FORCE_LOG_BUFFER_FLUSH = 0x302, INTEL_GUC_ACTION_ENTER_S_STATE = 0x501, INTEL_GUC_ACTION_EXIT_S_STATE = 0x502, - INTEL_GUC_ACTION_SLPC_REQUEST = 0x3003, + INTEL_GUC_ACTION_GLOBAL_SCHED_POLICY_CHANGE = 0x506, + INTEL_GUC_ACTION_SCHED_CONTEXT = 0x1000, + INTEL_GUC_ACTION_SCHED_CONTEXT_MODE_SET = 0x1001, + INTEL_GUC_ACTION_SCHED_CONTEXT_MODE_DONE = 0x1002, + INTEL_GUC_ACTION_SCHED_ENGINE_MODE_SET = 0x1003, + INTEL_GUC_ACTION_SCHED_ENGINE_MODE_DONE = 0x1004, + INTEL_GUC_ACTION_SET_CONTEXT_PRIORITY = 0x1005, + INTEL_GUC_ACTION_SET_CONTEXT_EXECUTION_QUANTUM = 0x1006, + INTEL_GUC_ACTION_SET_CONTEXT_PREEMPTION_TIMEOUT = 0x1007, + INTEL_GUC_ACTION_CONTEXT_RESET_NOTIFICATION = 0x1008, + INTEL_GUC_ACTION_ENGINE_FAILURE_NOTIFICATION = 0x1009, + INTEL_GUC_ACTION_SETUP_PC_GUCRC = 0x3004, INTEL_GUC_ACTION_AUTHENTICATE_HUC = 0x4000, + INTEL_GUC_ACTION_REGISTER_CONTEXT = 0x4502, + INTEL_GUC_ACTION_DEREGISTER_CONTEXT = 0x4503, INTEL_GUC_ACTION_REGISTER_COMMAND_TRANSPORT_BUFFER = 0x4505, INTEL_GUC_ACTION_DEREGISTER_COMMAND_TRANSPORT_BUFFER = 0x4506, + INTEL_GUC_ACTION_DEREGISTER_CONTEXT_DONE = 0x4600, + INTEL_GUC_ACTION_RESET_CLIENT = 0x5507, INTEL_GUC_ACTION_LIMIT }; +enum intel_guc_rc_options { + INTEL_GUCRC_HOST_CONTROL, + INTEL_GUCRC_FIRMWARE_CONTROL, +}; + enum intel_guc_preempt_options { INTEL_GUC_PREEMPT_OPTION_DROP_WORK_Q = 0x4, INTEL_GUC_PREEMPT_OPTION_DROP_SUBMIT_Q = 0x8, diff --git a/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_slpc_abi.h b/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_slpc_abi.h new file mode 100644 index 000000000000..7a8d4bfc5f6a --- /dev/null +++ b/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_slpc_abi.h @@ -0,0 +1,235 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2021 Intel Corporation + */ + +#ifndef _GUC_ACTIONS_SLPC_ABI_H_ +#define _GUC_ACTIONS_SLPC_ABI_H_ + +#include <linux/types.h> +#include "i915_reg.h" + +/** + * DOC: SLPC SHARED DATA STRUCTURE + * + * +----+------+--------------------------------------------------------------+ + * | CL | Bytes| Description | + * +====+======+==============================================================+ + * | 1 | 0-3 | SHARED DATA SIZE | + * | +------+--------------------------------------------------------------+ + * | | 4-7 | GLOBAL STATE | + * | +------+--------------------------------------------------------------+ + * | | 8-11 | DISPLAY DATA ADDRESS | + * | +------+--------------------------------------------------------------+ + * | | 12:63| PADDING | + * +----+------+--------------------------------------------------------------+ + * | | 0:63 | PADDING(PLATFORM INFO) | + * +----+------+--------------------------------------------------------------+ + * | 3 | 0-3 | TASK STATE DATA | + * + +------+--------------------------------------------------------------+ + * | | 4:63 | PADDING | + * +----+------+--------------------------------------------------------------+ + * |4-21|0:1087| OVERRIDE PARAMS AND BIT FIELDS | + * +----+------+--------------------------------------------------------------+ + * | | | PADDING + EXTRA RESERVED PAGE | + * +----+------+--------------------------------------------------------------+ + */ + +/* + * SLPC exposes certain parameters for global configuration by the host. + * These are referred to as override parameters, because in most cases + * the host will not need to modify the default values used by SLPC. + * SLPC remembers the default values which allows the host to easily restore + * them by simply unsetting the override. The host can set or unset override + * parameters during SLPC (re-)initialization using the SLPC Reset event. + * The host can also set or unset override parameters on the fly using the + * Parameter Set and Parameter Unset events + */ + +#define SLPC_MAX_OVERRIDE_PARAMETERS 256 +#define SLPC_OVERRIDE_BITFIELD_SIZE \ + (SLPC_MAX_OVERRIDE_PARAMETERS / 32) + +#define SLPC_PAGE_SIZE_BYTES 4096 +#define SLPC_CACHELINE_SIZE_BYTES 64 +#define SLPC_SHARED_DATA_SIZE_BYTE_HEADER SLPC_CACHELINE_SIZE_BYTES +#define SLPC_SHARED_DATA_SIZE_BYTE_PLATFORM_INFO SLPC_CACHELINE_SIZE_BYTES +#define SLPC_SHARED_DATA_SIZE_BYTE_TASK_STATE SLPC_CACHELINE_SIZE_BYTES +#define SLPC_SHARED_DATA_MODE_DEFN_TABLE_SIZE SLPC_PAGE_SIZE_BYTES +#define SLPC_SHARED_DATA_SIZE_BYTE_MAX (2 * SLPC_PAGE_SIZE_BYTES) + +/* + * Cacheline size aligned (Total size needed for + * SLPM_KMD_MAX_OVERRIDE_PARAMETERS=256 is 1088 bytes) + */ +#define SLPC_OVERRIDE_PARAMS_TOTAL_BYTES (((((SLPC_MAX_OVERRIDE_PARAMETERS * 4) \ + + ((SLPC_MAX_OVERRIDE_PARAMETERS / 32) * 4)) \ + + (SLPC_CACHELINE_SIZE_BYTES - 1)) / SLPC_CACHELINE_SIZE_BYTES) * \ + SLPC_CACHELINE_SIZE_BYTES) + +#define SLPC_SHARED_DATA_SIZE_BYTE_OTHER (SLPC_SHARED_DATA_SIZE_BYTE_MAX - \ + (SLPC_SHARED_DATA_SIZE_BYTE_HEADER \ + + SLPC_SHARED_DATA_SIZE_BYTE_PLATFORM_INFO \ + + SLPC_SHARED_DATA_SIZE_BYTE_TASK_STATE \ + + SLPC_OVERRIDE_PARAMS_TOTAL_BYTES \ + + SLPC_SHARED_DATA_MODE_DEFN_TABLE_SIZE)) + +enum slpc_task_enable { + SLPC_PARAM_TASK_DEFAULT = 0, + SLPC_PARAM_TASK_ENABLED, + SLPC_PARAM_TASK_DISABLED, + SLPC_PARAM_TASK_UNKNOWN +}; + +enum slpc_global_state { + SLPC_GLOBAL_STATE_NOT_RUNNING = 0, + SLPC_GLOBAL_STATE_INITIALIZING = 1, + SLPC_GLOBAL_STATE_RESETTING = 2, + SLPC_GLOBAL_STATE_RUNNING = 3, + SLPC_GLOBAL_STATE_SHUTTING_DOWN = 4, + SLPC_GLOBAL_STATE_ERROR = 5 +}; + +enum slpc_param_id { + SLPC_PARAM_TASK_ENABLE_GTPERF = 0, + SLPC_PARAM_TASK_DISABLE_GTPERF = 1, + SLPC_PARAM_TASK_ENABLE_BALANCER = 2, + SLPC_PARAM_TASK_DISABLE_BALANCER = 3, + SLPC_PARAM_TASK_ENABLE_DCC = 4, + SLPC_PARAM_TASK_DISABLE_DCC = 5, + SLPC_PARAM_GLOBAL_MIN_GT_UNSLICE_FREQ_MHZ = 6, + SLPC_PARAM_GLOBAL_MAX_GT_UNSLICE_FREQ_MHZ = 7, + SLPC_PARAM_GLOBAL_MIN_GT_SLICE_FREQ_MHZ = 8, + SLPC_PARAM_GLOBAL_MAX_GT_SLICE_FREQ_MHZ = 9, + SLPC_PARAM_GTPERF_THRESHOLD_MAX_FPS = 10, + SLPC_PARAM_GLOBAL_DISABLE_GT_FREQ_MANAGEMENT = 11, + SLPC_PARAM_GTPERF_ENABLE_FRAMERATE_STALLING = 12, + SLPC_PARAM_GLOBAL_DISABLE_RC6_MODE_CHANGE = 13, + SLPC_PARAM_GLOBAL_OC_UNSLICE_FREQ_MHZ = 14, + SLPC_PARAM_GLOBAL_OC_SLICE_FREQ_MHZ = 15, + SLPC_PARAM_GLOBAL_ENABLE_IA_GT_BALANCING = 16, + SLPC_PARAM_GLOBAL_ENABLE_ADAPTIVE_BURST_TURBO = 17, + SLPC_PARAM_GLOBAL_ENABLE_EVAL_MODE = 18, + SLPC_PARAM_GLOBAL_ENABLE_BALANCER_IN_NON_GAMING_MODE = 19, + SLPC_PARAM_GLOBAL_RT_MODE_TURBO_FREQ_DELTA_MHZ = 20, + SLPC_PARAM_PWRGATE_RC_MODE = 21, + SLPC_PARAM_EDR_MODE_COMPUTE_TIMEOUT_MS = 22, + SLPC_PARAM_EDR_QOS_FREQ_MHZ = 23, + SLPC_PARAM_MEDIA_FF_RATIO_MODE = 24, + SLPC_PARAM_ENABLE_IA_FREQ_LIMITING = 25, + SLPC_PARAM_STRATEGIES = 26, + SLPC_PARAM_POWER_PROFILE = 27, + SLPC_PARAM_IGNORE_EFFICIENT_FREQUENCY = 28, + SLPC_MAX_PARAM = 32, +}; + +enum slpc_event_id { + SLPC_EVENT_RESET = 0, + SLPC_EVENT_SHUTDOWN = 1, + SLPC_EVENT_PLATFORM_INFO_CHANGE = 2, + SLPC_EVENT_DISPLAY_MODE_CHANGE = 3, + SLPC_EVENT_FLIP_COMPLETE = 4, + SLPC_EVENT_QUERY_TASK_STATE = 5, + SLPC_EVENT_PARAMETER_SET = 6, + SLPC_EVENT_PARAMETER_UNSET = 7, +}; + +struct slpc_task_state_data { + union { + u32 task_status_padding; + struct { + u32 status; +#define SLPC_GTPERF_TASK_ENABLED REG_BIT(0) +#define SLPC_DCC_TASK_ENABLED REG_BIT(11) +#define SLPC_IN_DCC REG_BIT(12) +#define SLPC_BALANCER_ENABLED REG_BIT(15) +#define SLPC_IBC_TASK_ENABLED REG_BIT(16) +#define SLPC_BALANCER_IA_LMT_ENABLED REG_BIT(17) +#define SLPC_BALANCER_IA_LMT_ACTIVE REG_BIT(18) + }; + }; + union { + u32 freq_padding; + struct { +#define SLPC_MAX_UNSLICE_FREQ_MASK REG_GENMASK(7, 0) +#define SLPC_MIN_UNSLICE_FREQ_MASK REG_GENMASK(15, 8) +#define SLPC_MAX_SLICE_FREQ_MASK REG_GENMASK(23, 16) +#define SLPC_MIN_SLICE_FREQ_MASK REG_GENMASK(31, 24) + u32 freq; + }; + }; +} __packed; + +struct slpc_shared_data_header { + /* Total size in bytes of this shared buffer. */ + u32 size; + u32 global_state; + u32 display_data_addr; +} __packed; + +struct slpc_override_params { + u32 bits[SLPC_OVERRIDE_BITFIELD_SIZE]; + u32 values[SLPC_MAX_OVERRIDE_PARAMETERS]; +} __packed; + +struct slpc_shared_data { + struct slpc_shared_data_header header; + u8 shared_data_header_pad[SLPC_SHARED_DATA_SIZE_BYTE_HEADER - + sizeof(struct slpc_shared_data_header)]; + + u8 platform_info_pad[SLPC_SHARED_DATA_SIZE_BYTE_PLATFORM_INFO]; + + struct slpc_task_state_data task_state_data; + u8 task_state_data_pad[SLPC_SHARED_DATA_SIZE_BYTE_TASK_STATE - + sizeof(struct slpc_task_state_data)]; + + struct slpc_override_params override_params; + u8 override_params_pad[SLPC_OVERRIDE_PARAMS_TOTAL_BYTES - + sizeof(struct slpc_override_params)]; + + u8 shared_data_pad[SLPC_SHARED_DATA_SIZE_BYTE_OTHER]; + + /* PAGE 2 (4096 bytes), mode based parameter will be removed soon */ + u8 reserved_mode_definition[4096]; +} __packed; + +/** + * DOC: SLPC H2G MESSAGE FORMAT + * + * +---+-------+--------------------------------------------------------------+ + * | | Bits | Description | + * +===+=======+==============================================================+ + * | 0 | 31 | ORIGIN = GUC_HXG_ORIGIN_HOST_ | + * | +-------+--------------------------------------------------------------+ + * | | 30:28 | TYPE = GUC_HXG_TYPE_REQUEST_ | + * | +-------+--------------------------------------------------------------+ + * | | 27:16 | DATA0 = MBZ | + * | +-------+--------------------------------------------------------------+ + * | | 15:0 | ACTION = _`GUC_ACTION_HOST2GUC_PC_SLPM_REQUEST` = 0x3003 | + * +---+-------+--------------------------------------------------------------+ + * | 1 | 31:8 | **EVENT_ID** | + * + +-------+--------------------------------------------------------------+ + * | | 7:0 | **EVENT_ARGC** - number of data arguments | + * +---+-------+--------------------------------------------------------------+ + * | 2 | 31:0 | **EVENT_DATA1** | + * +---+-------+--------------------------------------------------------------+ + * |...| 31:0 | ... | + * +---+-------+--------------------------------------------------------------+ + * |2+n| 31:0 | **EVENT_DATAn** | + * +---+-------+--------------------------------------------------------------+ + */ + +#define GUC_ACTION_HOST2GUC_PC_SLPC_REQUEST 0x3003 + +#define HOST2GUC_PC_SLPC_REQUEST_MSG_MIN_LEN \ + (GUC_HXG_REQUEST_MSG_MIN_LEN + 1u) +#define HOST2GUC_PC_SLPC_EVENT_MAX_INPUT_ARGS 9 +#define HOST2GUC_PC_SLPC_REQUEST_MSG_MAX_LEN \ + (HOST2GUC_PC_SLPC_REQUEST_REQUEST_MSG_MIN_LEN + \ + HOST2GUC_PC_SLPC_EVENT_MAX_INPUT_ARGS) +#define HOST2GUC_PC_SLPC_REQUEST_MSG_0_MBZ GUC_HXG_REQUEST_MSG_0_DATA0 +#define HOST2GUC_PC_SLPC_REQUEST_MSG_1_EVENT_ID (0xff << 8) +#define HOST2GUC_PC_SLPC_REQUEST_MSG_1_EVENT_ARGC (0xff << 0) +#define HOST2GUC_PC_SLPC_REQUEST_MSG_N_EVENT_DATA_N GUC_HXG_REQUEST_MSG_n_DATAn + +#endif diff --git a/drivers/gpu/drm/i915/gt/uc/abi/guc_communication_ctb_abi.h b/drivers/gpu/drm/i915/gt/uc/abi/guc_communication_ctb_abi.h index d38935f47ecf..99e1fad5ca20 100644 --- a/drivers/gpu/drm/i915/gt/uc/abi/guc_communication_ctb_abi.h +++ b/drivers/gpu/drm/i915/gt/uc/abi/guc_communication_ctb_abi.h @@ -7,6 +7,111 @@ #define _ABI_GUC_COMMUNICATION_CTB_ABI_H #include <linux/types.h> +#include <linux/build_bug.h> + +#include "guc_messages_abi.h" + +/** + * DOC: CT Buffer + * + * Circular buffer used to send `CTB Message`_ + */ + +/** + * DOC: CTB Descriptor + * + * +---+-------+--------------------------------------------------------------+ + * | | Bits | Description | + * +===+=======+==============================================================+ + * | 0 | 31:0 | **HEAD** - offset (in dwords) to the last dword that was | + * | | | read from the `CT Buffer`_. | + * | | | It can only be updated by the receiver. | + * +---+-------+--------------------------------------------------------------+ + * | 1 | 31:0 | **TAIL** - offset (in dwords) to the last dword that was | + * | | | written to the `CT Buffer`_. | + * | | | It can only be updated by the sender. | + * +---+-------+--------------------------------------------------------------+ + * | 2 | 31:0 | **STATUS** - status of the CTB | + * | | | | + * | | | - _`GUC_CTB_STATUS_NO_ERROR` = 0 (normal operation) | + * | | | - _`GUC_CTB_STATUS_OVERFLOW` = 1 (head/tail too large) | + * | | | - _`GUC_CTB_STATUS_UNDERFLOW` = 2 (truncated message) | + * | | | - _`GUC_CTB_STATUS_MISMATCH` = 4 (head/tail modified) | + * +---+-------+--------------------------------------------------------------+ + * |...| | RESERVED = MBZ | + * +---+-------+--------------------------------------------------------------+ + * | 15| 31:0 | RESERVED = MBZ | + * +---+-------+--------------------------------------------------------------+ + */ + +struct guc_ct_buffer_desc { + u32 head; + u32 tail; + u32 status; +#define GUC_CTB_STATUS_NO_ERROR 0 +#define GUC_CTB_STATUS_OVERFLOW (1 << 0) +#define GUC_CTB_STATUS_UNDERFLOW (1 << 1) +#define GUC_CTB_STATUS_MISMATCH (1 << 2) + u32 reserved[13]; +} __packed; +static_assert(sizeof(struct guc_ct_buffer_desc) == 64); + +/** + * DOC: CTB Message + * + * +---+-------+--------------------------------------------------------------+ + * | | Bits | Description | + * +===+=======+==============================================================+ + * | 0 | 31:16 | **FENCE** - message identifier | + * | +-------+--------------------------------------------------------------+ + * | | 15:12 | **FORMAT** - format of the CTB message | + * | | | - _`GUC_CTB_FORMAT_HXG` = 0 - see `CTB HXG Message`_ | + * | +-------+--------------------------------------------------------------+ + * | | 11:8 | **RESERVED** | + * | +-------+--------------------------------------------------------------+ + * | | 7:0 | **NUM_DWORDS** - length of the CTB message (w/o header) | + * +---+-------+--------------------------------------------------------------+ + * | 1 | 31:0 | optional (depends on FORMAT) | + * +---+-------+ | + * |...| | | + * +---+-------+ | + * | n | 31:0 | | + * +---+-------+--------------------------------------------------------------+ + */ + +#define GUC_CTB_HDR_LEN 1u +#define GUC_CTB_MSG_MIN_LEN GUC_CTB_HDR_LEN +#define GUC_CTB_MSG_MAX_LEN 256u +#define GUC_CTB_MSG_0_FENCE (0xffff << 16) +#define GUC_CTB_MSG_0_FORMAT (0xf << 12) +#define GUC_CTB_FORMAT_HXG 0u +#define GUC_CTB_MSG_0_RESERVED (0xf << 8) +#define GUC_CTB_MSG_0_NUM_DWORDS (0xff << 0) + +/** + * DOC: CTB HXG Message + * + * +---+-------+--------------------------------------------------------------+ + * | | Bits | Description | + * +===+=======+==============================================================+ + * | 0 | 31:16 | FENCE | + * | +-------+--------------------------------------------------------------+ + * | | 15:12 | FORMAT = GUC_CTB_FORMAT_HXG_ | + * | +-------+--------------------------------------------------------------+ + * | | 11:8 | RESERVED = MBZ | + * | +-------+--------------------------------------------------------------+ + * | | 7:0 | NUM_DWORDS = length (in dwords) of the embedded HXG message | + * +---+-------+--------------------------------------------------------------+ + * | 1 | 31:0 | +--------------------------------------------------------+ | + * +---+-------+ | | | + * |...| | | Embedded `HXG Message`_ | | + * +---+-------+ | | | + * | n | 31:0 | +--------------------------------------------------------+ | + * +---+-------+--------------------------------------------------------------+ + */ + +#define GUC_CTB_HXG_MSG_MIN_LEN (GUC_CTB_MSG_MIN_LEN + GUC_HXG_MSG_MIN_LEN) +#define GUC_CTB_HXG_MSG_MAX_LEN GUC_CTB_MSG_MAX_LEN /** * DOC: CTB based communication @@ -61,28 +166,6 @@ */ /* - * Describes single command transport buffer. - * Used by both guc-master and clients. - */ -struct guc_ct_buffer_desc { - u32 addr; /* gfx address */ - u64 host_private; /* host private data */ - u32 size; /* size in bytes */ - u32 head; /* offset updated by GuC*/ - u32 tail; /* offset updated by owner */ - u32 is_in_error; /* error indicator */ - u32 reserved1; - u32 reserved2; - u32 owner; /* id of the channel owner */ - u32 owner_sub_id; /* owner-defined field for extra tracking */ - u32 reserved[5]; -} __packed; - -/* Type of command transport buffer */ -#define INTEL_GUC_CT_BUFFER_TYPE_SEND 0x0u -#define INTEL_GUC_CT_BUFFER_TYPE_RECV 0x1u - -/* * Definition of the command transport message header (DW0) * * bit[4..0] message len (in dwords) diff --git a/drivers/gpu/drm/i915/gt/uc/abi/guc_communication_mmio_abi.h b/drivers/gpu/drm/i915/gt/uc/abi/guc_communication_mmio_abi.h index be066a62e9e0..bbf1ddb77434 100644 --- a/drivers/gpu/drm/i915/gt/uc/abi/guc_communication_mmio_abi.h +++ b/drivers/gpu/drm/i915/gt/uc/abi/guc_communication_mmio_abi.h @@ -7,46 +7,43 @@ #define _ABI_GUC_COMMUNICATION_MMIO_ABI_H /** - * DOC: MMIO based communication + * DOC: GuC MMIO based communication * - * The MMIO based communication between Host and GuC uses software scratch - * registers, where first register holds data treated as message header, - * and other registers are used to hold message payload. + * The MMIO based communication between Host and GuC relies on special + * hardware registers which format could be defined by the software + * (so called scratch registers). * - * For Gen9+, GuC uses software scratch registers 0xC180-0xC1B8, - * but no H2G command takes more than 8 parameters and the GuC FW - * itself uses an 8-element array to store the H2G message. + * Each MMIO based message, both Host to GuC (H2G) and GuC to Host (G2H) + * messages, which maximum length depends on number of available scratch + * registers, is directly written into those scratch registers. * - * +-----------+---------+---------+---------+ - * | MMIO[0] | MMIO[1] | ... | MMIO[n] | - * +-----------+---------+---------+---------+ - * | header | optional payload | - * +======+====+=========+=========+=========+ - * | 31:28|type| | | | - * +------+----+ | | | - * | 27:16|data| | | | - * +------+----+ | | | - * | 15:0|code| | | | - * +------+----+---------+---------+---------+ + * For Gen9+, there are 16 software scratch registers 0xC180-0xC1B8, + * but no H2G command takes more than 4 parameters and the GuC firmware + * itself uses an 4-element array to store the H2G message. * - * The message header consists of: + * For Gen11+, there are additional 4 registers 0x190240-0x19024C, which + * are, regardless on lower count, preferred over legacy ones. * - * - **type**, indicates message type - * - **code**, indicates message code, is specific for **type** - * - **data**, indicates message data, optional, depends on **code** - * - * The following message **types** are supported: - * - * - **REQUEST**, indicates Host-to-GuC request, requested GuC action code - * must be priovided in **code** field. Optional action specific parameters - * can be provided in remaining payload registers or **data** field. - * - * - **RESPONSE**, indicates GuC-to-Host response from earlier GuC request, - * action response status will be provided in **code** field. Optional - * response data can be returned in remaining payload registers or **data** - * field. + * The MMIO based communication is mainly used during driver initialization + * phase to setup the `CTB based communication`_ that will be used afterwards. */ -#define GUC_MAX_MMIO_MSG_LEN 8 +#define GUC_MAX_MMIO_MSG_LEN 4 + +/** + * DOC: MMIO HXG Message + * + * Format of the MMIO messages follows definitions of `HXG Message`_. + * + * +---+-------+--------------------------------------------------------------+ + * | | Bits | Description | + * +===+=======+==============================================================+ + * | 0 | 31:0 | +--------------------------------------------------------+ | + * +---+-------+ | | | + * |...| | | Embedded `HXG Message`_ | | + * +---+-------+ | | | + * | n | 31:0 | +--------------------------------------------------------+ | + * +---+-------+--------------------------------------------------------------+ + */ #endif /* _ABI_GUC_COMMUNICATION_MMIO_ABI_H */ diff --git a/drivers/gpu/drm/i915/gt/uc/abi/guc_messages_abi.h b/drivers/gpu/drm/i915/gt/uc/abi/guc_messages_abi.h index 775e21f3058c..29ac823acd4c 100644 --- a/drivers/gpu/drm/i915/gt/uc/abi/guc_messages_abi.h +++ b/drivers/gpu/drm/i915/gt/uc/abi/guc_messages_abi.h @@ -6,6 +6,219 @@ #ifndef _ABI_GUC_MESSAGES_ABI_H #define _ABI_GUC_MESSAGES_ABI_H +/** + * DOC: HXG Message + * + * All messages exchanged with GuC are defined using 32 bit dwords. + * First dword is treated as a message header. Remaining dwords are optional. + * + * +---+-------+--------------------------------------------------------------+ + * | | Bits | Description | + * +===+=======+==============================================================+ + * | | | | + * | 0 | 31 | **ORIGIN** - originator of the message | + * | | | - _`GUC_HXG_ORIGIN_HOST` = 0 | + * | | | - _`GUC_HXG_ORIGIN_GUC` = 1 | + * | | | | + * | +-------+--------------------------------------------------------------+ + * | | 30:28 | **TYPE** - message type | + * | | | - _`GUC_HXG_TYPE_REQUEST` = 0 | + * | | | - _`GUC_HXG_TYPE_EVENT` = 1 | + * | | | - _`GUC_HXG_TYPE_NO_RESPONSE_BUSY` = 3 | + * | | | - _`GUC_HXG_TYPE_NO_RESPONSE_RETRY` = 5 | + * | | | - _`GUC_HXG_TYPE_RESPONSE_FAILURE` = 6 | + * | | | - _`GUC_HXG_TYPE_RESPONSE_SUCCESS` = 7 | + * | +-------+--------------------------------------------------------------+ + * | | 27:0 | **AUX** - auxiliary data (depends on TYPE) | + * +---+-------+--------------------------------------------------------------+ + * | 1 | 31:0 | | + * +---+-------+ | + * |...| | **PAYLOAD** - optional payload (depends on TYPE) | + * +---+-------+ | + * | n | 31:0 | | + * +---+-------+--------------------------------------------------------------+ + */ + +#define GUC_HXG_MSG_MIN_LEN 1u +#define GUC_HXG_MSG_0_ORIGIN (0x1 << 31) +#define GUC_HXG_ORIGIN_HOST 0u +#define GUC_HXG_ORIGIN_GUC 1u +#define GUC_HXG_MSG_0_TYPE (0x7 << 28) +#define GUC_HXG_TYPE_REQUEST 0u +#define GUC_HXG_TYPE_EVENT 1u +#define GUC_HXG_TYPE_NO_RESPONSE_BUSY 3u +#define GUC_HXG_TYPE_NO_RESPONSE_RETRY 5u +#define GUC_HXG_TYPE_RESPONSE_FAILURE 6u +#define GUC_HXG_TYPE_RESPONSE_SUCCESS 7u +#define GUC_HXG_MSG_0_AUX (0xfffffff << 0) +#define GUC_HXG_MSG_n_PAYLOAD (0xffffffff << 0) + +/** + * DOC: HXG Request + * + * The `HXG Request`_ message should be used to initiate synchronous activity + * for which confirmation or return data is expected. + * + * The recipient of this message shall use `HXG Response`_, `HXG Failure`_ + * or `HXG Retry`_ message as a definite reply, and may use `HXG Busy`_ + * message as a intermediate reply. + * + * Format of @DATA0 and all @DATAn fields depends on the @ACTION code. + * + * +---+-------+--------------------------------------------------------------+ + * | | Bits | Description | + * +===+=======+==============================================================+ + * | 0 | 31 | ORIGIN | + * | +-------+--------------------------------------------------------------+ + * | | 30:28 | TYPE = GUC_HXG_TYPE_REQUEST_ | + * | +-------+--------------------------------------------------------------+ + * | | 27:16 | **DATA0** - request data (depends on ACTION) | + * | +-------+--------------------------------------------------------------+ + * | | 15:0 | **ACTION** - requested action code | + * +---+-------+--------------------------------------------------------------+ + * | 1 | 31:0 | | + * +---+-------+ | + * |...| | **DATAn** - optional data (depends on ACTION) | + * +---+-------+ | + * | n | 31:0 | | + * +---+-------+--------------------------------------------------------------+ + */ + +#define GUC_HXG_REQUEST_MSG_MIN_LEN GUC_HXG_MSG_MIN_LEN +#define GUC_HXG_REQUEST_MSG_0_DATA0 (0xfff << 16) +#define GUC_HXG_REQUEST_MSG_0_ACTION (0xffff << 0) +#define GUC_HXG_REQUEST_MSG_n_DATAn GUC_HXG_MSG_n_PAYLOAD + +/** + * DOC: HXG Event + * + * The `HXG Event`_ message should be used to initiate asynchronous activity + * that does not involves immediate confirmation nor data. + * + * Format of @DATA0 and all @DATAn fields depends on the @ACTION code. + * + * +---+-------+--------------------------------------------------------------+ + * | | Bits | Description | + * +===+=======+==============================================================+ + * | 0 | 31 | ORIGIN | + * | +-------+--------------------------------------------------------------+ + * | | 30:28 | TYPE = GUC_HXG_TYPE_EVENT_ | + * | +-------+--------------------------------------------------------------+ + * | | 27:16 | **DATA0** - event data (depends on ACTION) | + * | +-------+--------------------------------------------------------------+ + * | | 15:0 | **ACTION** - event action code | + * +---+-------+--------------------------------------------------------------+ + * | 1 | 31:0 | | + * +---+-------+ | + * |...| | **DATAn** - optional event data (depends on ACTION) | + * +---+-------+ | + * | n | 31:0 | | + * +---+-------+--------------------------------------------------------------+ + */ + +#define GUC_HXG_EVENT_MSG_MIN_LEN GUC_HXG_MSG_MIN_LEN +#define GUC_HXG_EVENT_MSG_0_DATA0 (0xfff << 16) +#define GUC_HXG_EVENT_MSG_0_ACTION (0xffff << 0) +#define GUC_HXG_EVENT_MSG_n_DATAn GUC_HXG_MSG_n_PAYLOAD + +/** + * DOC: HXG Busy + * + * The `HXG Busy`_ message may be used to acknowledge reception of the `HXG Request`_ + * message if the recipient expects that it processing will be longer than default + * timeout. + * + * The @COUNTER field may be used as a progress indicator. + * + * +---+-------+--------------------------------------------------------------+ + * | | Bits | Description | + * +===+=======+==============================================================+ + * | 0 | 31 | ORIGIN | + * | +-------+--------------------------------------------------------------+ + * | | 30:28 | TYPE = GUC_HXG_TYPE_NO_RESPONSE_BUSY_ | + * | +-------+--------------------------------------------------------------+ + * | | 27:0 | **COUNTER** - progress indicator | + * +---+-------+--------------------------------------------------------------+ + */ + +#define GUC_HXG_BUSY_MSG_LEN GUC_HXG_MSG_MIN_LEN +#define GUC_HXG_BUSY_MSG_0_COUNTER GUC_HXG_MSG_0_AUX + +/** + * DOC: HXG Retry + * + * The `HXG Retry`_ message should be used by recipient to indicate that the + * `HXG Request`_ message was dropped and it should be resent again. + * + * The @REASON field may be used to provide additional information. + * + * +---+-------+--------------------------------------------------------------+ + * | | Bits | Description | + * +===+=======+==============================================================+ + * | 0 | 31 | ORIGIN | + * | +-------+--------------------------------------------------------------+ + * | | 30:28 | TYPE = GUC_HXG_TYPE_NO_RESPONSE_RETRY_ | + * | +-------+--------------------------------------------------------------+ + * | | 27:0 | **REASON** - reason for retry | + * | | | - _`GUC_HXG_RETRY_REASON_UNSPECIFIED` = 0 | + * +---+-------+--------------------------------------------------------------+ + */ + +#define GUC_HXG_RETRY_MSG_LEN GUC_HXG_MSG_MIN_LEN +#define GUC_HXG_RETRY_MSG_0_REASON GUC_HXG_MSG_0_AUX +#define GUC_HXG_RETRY_REASON_UNSPECIFIED 0u + +/** + * DOC: HXG Failure + * + * The `HXG Failure`_ message shall be used as a reply to the `HXG Request`_ + * message that could not be processed due to an error. + * + * +---+-------+--------------------------------------------------------------+ + * | | Bits | Description | + * +===+=======+==============================================================+ + * | 0 | 31 | ORIGIN | + * | +-------+--------------------------------------------------------------+ + * | | 30:28 | TYPE = GUC_HXG_TYPE_RESPONSE_FAILURE_ | + * | +-------+--------------------------------------------------------------+ + * | | 27:16 | **HINT** - additional error hint | + * | +-------+--------------------------------------------------------------+ + * | | 15:0 | **ERROR** - error/result code | + * +---+-------+--------------------------------------------------------------+ + */ + +#define GUC_HXG_FAILURE_MSG_LEN GUC_HXG_MSG_MIN_LEN +#define GUC_HXG_FAILURE_MSG_0_HINT (0xfff << 16) +#define GUC_HXG_FAILURE_MSG_0_ERROR (0xffff << 0) + +/** + * DOC: HXG Response + * + * The `HXG Response`_ message shall be used as a reply to the `HXG Request`_ + * message that was successfully processed without an error. + * + * +---+-------+--------------------------------------------------------------+ + * | | Bits | Description | + * +===+=======+==============================================================+ + * | 0 | 31 | ORIGIN | + * | +-------+--------------------------------------------------------------+ + * | | 30:28 | TYPE = GUC_HXG_TYPE_RESPONSE_SUCCESS_ | + * | +-------+--------------------------------------------------------------+ + * | | 27:0 | **DATA0** - data (depends on ACTION from `HXG Request`_) | + * +---+-------+--------------------------------------------------------------+ + * | 1 | 31:0 | | + * +---+-------+ | + * |...| | **DATAn** - data (depends on ACTION from `HXG Request`_) | + * +---+-------+ | + * | n | 31:0 | | + * +---+-------+--------------------------------------------------------------+ + */ + +#define GUC_HXG_RESPONSE_MSG_MIN_LEN GUC_HXG_MSG_MIN_LEN +#define GUC_HXG_RESPONSE_MSG_0_DATA0 GUC_HXG_MSG_0_AUX +#define GUC_HXG_RESPONSE_MSG_n_DATAn GUC_HXG_MSG_n_PAYLOAD + +/* deprecated */ #define INTEL_GUC_MSG_TYPE_SHIFT 28 #define INTEL_GUC_MSG_TYPE_MASK (0xF << INTEL_GUC_MSG_TYPE_SHIFT) #define INTEL_GUC_MSG_DATA_SHIFT 16 diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc.c index f147cb389a20..fbfcae727d7f 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.c @@ -7,6 +7,7 @@ #include "gt/intel_gt_irq.h" #include "gt/intel_gt_pm_irq.h" #include "intel_guc.h" +#include "intel_guc_slpc.h" #include "intel_guc_ads.h" #include "intel_guc_submission.h" #include "i915_drv.h" @@ -157,6 +158,8 @@ void intel_guc_init_early(struct intel_guc *guc) intel_guc_ct_init_early(&guc->ct); intel_guc_log_init_early(&guc->log); intel_guc_submission_init_early(guc); + intel_guc_slpc_init_early(&guc->slpc); + intel_guc_rc_init_early(guc); mutex_init(&guc->send_mutex); spin_lock_init(&guc->irq_lock); @@ -180,6 +183,11 @@ void intel_guc_init_early(struct intel_guc *guc) } } +void intel_guc_init_late(struct intel_guc *guc) +{ + intel_guc_ads_init_late(guc); +} + static u32 guc_ctl_debug_flags(struct intel_guc *guc) { u32 level = intel_guc_log_get_level(&guc->log); @@ -201,6 +209,9 @@ static u32 guc_ctl_feature_flags(struct intel_guc *guc) if (!intel_guc_submission_is_used(guc)) flags |= GUC_CTL_DISABLE_SCHEDULER; + if (intel_guc_slpc_is_used(guc)) + flags |= GUC_CTL_ENABLE_SLPC; + return flags; } @@ -219,24 +230,19 @@ static u32 guc_ctl_log_params_flags(struct intel_guc *guc) BUILD_BUG_ON(!CRASH_BUFFER_SIZE); BUILD_BUG_ON(!IS_ALIGNED(CRASH_BUFFER_SIZE, UNIT)); - BUILD_BUG_ON(!DPC_BUFFER_SIZE); - BUILD_BUG_ON(!IS_ALIGNED(DPC_BUFFER_SIZE, UNIT)); - BUILD_BUG_ON(!ISR_BUFFER_SIZE); - BUILD_BUG_ON(!IS_ALIGNED(ISR_BUFFER_SIZE, UNIT)); + BUILD_BUG_ON(!DEBUG_BUFFER_SIZE); + BUILD_BUG_ON(!IS_ALIGNED(DEBUG_BUFFER_SIZE, UNIT)); BUILD_BUG_ON((CRASH_BUFFER_SIZE / UNIT - 1) > (GUC_LOG_CRASH_MASK >> GUC_LOG_CRASH_SHIFT)); - BUILD_BUG_ON((DPC_BUFFER_SIZE / UNIT - 1) > - (GUC_LOG_DPC_MASK >> GUC_LOG_DPC_SHIFT)); - BUILD_BUG_ON((ISR_BUFFER_SIZE / UNIT - 1) > - (GUC_LOG_ISR_MASK >> GUC_LOG_ISR_SHIFT)); + BUILD_BUG_ON((DEBUG_BUFFER_SIZE / UNIT - 1) > + (GUC_LOG_DEBUG_MASK >> GUC_LOG_DEBUG_SHIFT)); flags = GUC_LOG_VALID | GUC_LOG_NOTIFY_ON_HALF_FULL | FLAG | ((CRASH_BUFFER_SIZE / UNIT - 1) << GUC_LOG_CRASH_SHIFT) | - ((DPC_BUFFER_SIZE / UNIT - 1) << GUC_LOG_DPC_SHIFT) | - ((ISR_BUFFER_SIZE / UNIT - 1) << GUC_LOG_ISR_SHIFT) | + ((DEBUG_BUFFER_SIZE / UNIT - 1) << GUC_LOG_DEBUG_SHIFT) | (offset << GUC_LOG_BUF_ADDR_SHIFT); #undef UNIT @@ -331,6 +337,12 @@ int intel_guc_init(struct intel_guc *guc) goto err_ct; } + if (intel_guc_slpc_is_used(guc)) { + ret = intel_guc_slpc_init(&guc->slpc); + if (ret) + goto err_submission; + } + /* now that everything is perma-pinned, initialize the parameters */ guc_init_params(guc); @@ -341,6 +353,8 @@ int intel_guc_init(struct intel_guc *guc) return 0; +err_submission: + intel_guc_submission_fini(guc); err_ct: intel_guc_ct_fini(&guc->ct); err_ads: @@ -363,6 +377,9 @@ void intel_guc_fini(struct intel_guc *guc) i915_ggtt_disable_guc(gt->ggtt); + if (intel_guc_slpc_is_used(guc)) + intel_guc_slpc_fini(&guc->slpc); + if (intel_guc_submission_is_used(guc)) intel_guc_submission_fini(guc); @@ -376,29 +393,27 @@ void intel_guc_fini(struct intel_guc *guc) /* * This function implements the MMIO based host to GuC interface. */ -int intel_guc_send_mmio(struct intel_guc *guc, const u32 *action, u32 len, +int intel_guc_send_mmio(struct intel_guc *guc, const u32 *request, u32 len, u32 *response_buf, u32 response_buf_size) { + struct drm_i915_private *i915 = guc_to_gt(guc)->i915; struct intel_uncore *uncore = guc_to_gt(guc)->uncore; - u32 status; + u32 header; int i; int ret; GEM_BUG_ON(!len); GEM_BUG_ON(len > guc->send_regs.count); - /* We expect only action code */ - GEM_BUG_ON(*action & ~INTEL_GUC_MSG_CODE_MASK); - - /* If CT is available, we expect to use MMIO only during init/fini */ - GEM_BUG_ON(*action != INTEL_GUC_ACTION_REGISTER_COMMAND_TRANSPORT_BUFFER && - *action != INTEL_GUC_ACTION_DEREGISTER_COMMAND_TRANSPORT_BUFFER); + GEM_BUG_ON(FIELD_GET(GUC_HXG_MSG_0_ORIGIN, request[0]) != GUC_HXG_ORIGIN_HOST); + GEM_BUG_ON(FIELD_GET(GUC_HXG_MSG_0_TYPE, request[0]) != GUC_HXG_TYPE_REQUEST); mutex_lock(&guc->send_mutex); intel_uncore_forcewake_get(uncore, guc->send_regs.fw_domains); +retry: for (i = 0; i < len; i++) - intel_uncore_write(uncore, guc_send_reg(guc, i), action[i]); + intel_uncore_write(uncore, guc_send_reg(guc, i), request[i]); intel_uncore_posting_read(uncore, guc_send_reg(guc, i - 1)); @@ -410,30 +425,74 @@ int intel_guc_send_mmio(struct intel_guc *guc, const u32 *action, u32 len, */ ret = __intel_wait_for_register_fw(uncore, guc_send_reg(guc, 0), - INTEL_GUC_MSG_TYPE_MASK, - INTEL_GUC_MSG_TYPE_RESPONSE << - INTEL_GUC_MSG_TYPE_SHIFT, - 10, 10, &status); - /* If GuC explicitly returned an error, convert it to -EIO */ - if (!ret && !INTEL_GUC_MSG_IS_RESPONSE_SUCCESS(status)) - ret = -EIO; + GUC_HXG_MSG_0_ORIGIN, + FIELD_PREP(GUC_HXG_MSG_0_ORIGIN, + GUC_HXG_ORIGIN_GUC), + 10, 10, &header); + if (unlikely(ret)) { +timeout: + drm_err(&i915->drm, "mmio request %#x: no reply %x\n", + request[0], header); + goto out; + } - if (ret) { - DRM_ERROR("MMIO: GuC action %#x failed with error %d %#x\n", - action[0], ret, status); + if (FIELD_GET(GUC_HXG_MSG_0_TYPE, header) == GUC_HXG_TYPE_NO_RESPONSE_BUSY) { +#define done ({ header = intel_uncore_read(uncore, guc_send_reg(guc, 0)); \ + FIELD_GET(GUC_HXG_MSG_0_ORIGIN, header) != GUC_HXG_ORIGIN_GUC || \ + FIELD_GET(GUC_HXG_MSG_0_TYPE, header) != GUC_HXG_TYPE_NO_RESPONSE_BUSY; }) + + ret = wait_for(done, 1000); + if (unlikely(ret)) + goto timeout; + if (unlikely(FIELD_GET(GUC_HXG_MSG_0_ORIGIN, header) != + GUC_HXG_ORIGIN_GUC)) + goto proto; +#undef done + } + + if (FIELD_GET(GUC_HXG_MSG_0_TYPE, header) == GUC_HXG_TYPE_NO_RESPONSE_RETRY) { + u32 reason = FIELD_GET(GUC_HXG_RETRY_MSG_0_REASON, header); + + drm_dbg(&i915->drm, "mmio request %#x: retrying, reason %u\n", + request[0], reason); + goto retry; + } + + if (FIELD_GET(GUC_HXG_MSG_0_TYPE, header) == GUC_HXG_TYPE_RESPONSE_FAILURE) { + u32 hint = FIELD_GET(GUC_HXG_FAILURE_MSG_0_HINT, header); + u32 error = FIELD_GET(GUC_HXG_FAILURE_MSG_0_ERROR, header); + + drm_err(&i915->drm, "mmio request %#x: failure %x/%u\n", + request[0], error, hint); + ret = -ENXIO; + goto out; + } + + if (FIELD_GET(GUC_HXG_MSG_0_TYPE, header) != GUC_HXG_TYPE_RESPONSE_SUCCESS) { +proto: + drm_err(&i915->drm, "mmio request %#x: unexpected reply %#x\n", + request[0], header); + ret = -EPROTO; goto out; } if (response_buf) { - int count = min(response_buf_size, guc->send_regs.count - 1); + int count = min(response_buf_size, guc->send_regs.count); + + GEM_BUG_ON(!count); - for (i = 0; i < count; i++) + response_buf[0] = header; + + for (i = 1; i < count; i++) response_buf[i] = intel_uncore_read(uncore, - guc_send_reg(guc, i + 1)); - } + guc_send_reg(guc, i)); - /* Use data from the GuC response as our return value */ - ret = INTEL_GUC_MSG_TO_DATA(status); + /* Use number of copied dwords as our return value */ + ret = count; + } else { + /* Use data from the GuC response as our return value */ + ret = FIELD_GET(GUC_HXG_RESPONSE_MSG_0_DATA0, header); + } out: intel_uncore_forcewake_put(uncore, guc->send_regs.fw_domains); @@ -487,65 +546,35 @@ int intel_guc_auth_huc(struct intel_guc *guc, u32 rsa_offset) */ int intel_guc_suspend(struct intel_guc *guc) { - struct intel_uncore *uncore = guc_to_gt(guc)->uncore; int ret; - u32 status; u32 action[] = { - INTEL_GUC_ACTION_ENTER_S_STATE, - GUC_POWER_D1, /* any value greater than GUC_POWER_D0 */ + INTEL_GUC_ACTION_RESET_CLIENT, }; - /* - * If GuC communication is enabled but submission is not supported, - * we do not need to suspend the GuC. - */ - if (!intel_guc_submission_is_used(guc) || !intel_guc_is_ready(guc)) + if (!intel_guc_is_ready(guc)) return 0; - /* - * The ENTER_S_STATE action queues the save/restore operation in GuC FW - * and then returns, so waiting on the H2G is not enough to guarantee - * GuC is done. When all the processing is done, GuC writes - * INTEL_GUC_SLEEP_STATE_SUCCESS to scratch register 14, so we can poll - * on that. Note that GuC does not ensure that the value in the register - * is different from INTEL_GUC_SLEEP_STATE_SUCCESS while the action is - * in progress so we need to take care of that ourselves as well. - */ - - intel_uncore_write(uncore, SOFT_SCRATCH(14), - INTEL_GUC_SLEEP_STATE_INVALID_MASK); - - ret = intel_guc_send(guc, action, ARRAY_SIZE(action)); - if (ret) - return ret; - - ret = __intel_wait_for_register(uncore, SOFT_SCRATCH(14), - INTEL_GUC_SLEEP_STATE_INVALID_MASK, - 0, 0, 10, &status); - if (ret) - return ret; - - if (status != INTEL_GUC_SLEEP_STATE_SUCCESS) { - DRM_ERROR("GuC failed to change sleep state. " - "action=0x%x, err=%u\n", - action[0], status); - return -EIO; + if (intel_guc_submission_is_used(guc)) { + /* + * This H2G MMIO command tears down the GuC in two steps. First it will + * generate a G2H CTB for every active context indicating a reset. In + * practice the i915 shouldn't ever get a G2H as suspend should only be + * called when the GPU is idle. Next, it tears down the CTBs and this + * H2G MMIO command completes. + * + * Don't abort on a failure code from the GuC. Keep going and do the + * clean up in santize() and re-initialisation on resume and hopefully + * the error here won't be problematic. + */ + ret = intel_guc_send_mmio(guc, action, ARRAY_SIZE(action), NULL, 0); + if (ret) + DRM_ERROR("GuC suspend: RESET_CLIENT action failed with error %d!\n", ret); } - return 0; -} - -/** - * intel_guc_reset_engine() - ask GuC to reset an engine - * @guc: intel_guc structure - * @engine: engine to be reset - */ -int intel_guc_reset_engine(struct intel_guc *guc, - struct intel_engine_cs *engine) -{ - /* XXX: to be implemented with submission interface rework */ + /* Signal that the GuC isn't running. */ + intel_guc_sanitize(guc); - return -ENODEV; + return 0; } /** @@ -554,7 +583,12 @@ int intel_guc_reset_engine(struct intel_guc *guc, */ int intel_guc_resume(struct intel_guc *guc) { - /* XXX: to be implemented with submission interface rework */ + /* + * NB: This function can still be called even if GuC submission is + * disabled, e.g. if GuC is enabled for HuC authentication only. Thus, + * if any code is later added here, it must be support doing nothing + * if submission is disabled (as per intel_guc_suspend). + */ return 0; } diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc.h index 4abc59f6f3cd..2e27fe59786b 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h @@ -6,12 +6,16 @@ #ifndef _INTEL_GUC_H_ #define _INTEL_GUC_H_ +#include <linux/xarray.h> +#include <linux/delay.h> + #include "intel_uncore.h" #include "intel_guc_fw.h" #include "intel_guc_fwif.h" #include "intel_guc_ct.h" #include "intel_guc_log.h" #include "intel_guc_reg.h" +#include "intel_guc_slpc_types.h" #include "intel_uc_fw.h" #include "i915_utils.h" #include "i915_vma.h" @@ -27,24 +31,47 @@ struct intel_guc { struct intel_uc_fw fw; struct intel_guc_log log; struct intel_guc_ct ct; + struct intel_guc_slpc slpc; + + /* Global engine used to submit requests to GuC */ + struct i915_sched_engine *sched_engine; + struct i915_request *stalled_request; /* intel_guc_recv interrupt related state */ spinlock_t irq_lock; unsigned int msg_enabled_mask; + atomic_t outstanding_submission_g2h; + struct { void (*reset)(struct intel_guc *guc); void (*enable)(struct intel_guc *guc); void (*disable)(struct intel_guc *guc); } interrupts; + /* + * contexts_lock protects the pool of free guc ids and a linked list of + * guc ids available to be stolen + */ + spinlock_t contexts_lock; + struct ida guc_ids; + struct list_head guc_id_list; + + bool submission_supported; bool submission_selected; + bool rc_supported; + bool rc_selected; struct i915_vma *ads_vma; struct __guc_ads_blob *ads_blob; + u32 ads_regset_size; + u32 ads_golden_ctxt_size; - struct i915_vma *stage_desc_pool; - void *stage_desc_pool_vaddr; + struct i915_vma *lrc_desc_pool; + void *lrc_desc_pool_vaddr; + + /* guc_id to intel_context lookup */ + struct xarray context_lookup; /* Control params for fw initialization */ u32 params[GUC_CTL_MAX_DWORDS]; @@ -74,7 +101,15 @@ static inline struct intel_guc *log_to_guc(struct intel_guc_log *log) static inline int intel_guc_send(struct intel_guc *guc, const u32 *action, u32 len) { - return intel_guc_ct_send(&guc->ct, action, len, NULL, 0); + return intel_guc_ct_send(&guc->ct, action, len, NULL, 0, 0); +} + +static +inline int intel_guc_send_nb(struct intel_guc *guc, const u32 *action, u32 len, + u32 g2h_len_dw) +{ + return intel_guc_ct_send(&guc->ct, action, len, NULL, 0, + MAKE_SEND_FLAGS(g2h_len_dw)); } static inline int @@ -82,7 +117,43 @@ intel_guc_send_and_receive(struct intel_guc *guc, const u32 *action, u32 len, u32 *response_buf, u32 response_buf_size) { return intel_guc_ct_send(&guc->ct, action, len, - response_buf, response_buf_size); + response_buf, response_buf_size, 0); +} + +static inline int intel_guc_send_busy_loop(struct intel_guc *guc, + const u32 *action, + u32 len, + u32 g2h_len_dw, + bool loop) +{ + int err; + unsigned int sleep_period_ms = 1; + bool not_atomic = !in_atomic() && !irqs_disabled(); + + /* + * FIXME: Have caller pass in if we are in an atomic context to avoid + * using in_atomic(). It is likely safe here as we check for irqs + * disabled which basically all the spin locks in the i915 do but + * regardless this should be cleaned up. + */ + + /* No sleeping with spin locks, just busy loop */ + might_sleep_if(loop && not_atomic); + +retry: + err = intel_guc_send_nb(guc, action, len, g2h_len_dw); + if (unlikely(err == -EBUSY && loop)) { + if (likely(not_atomic)) { + if (msleep_interruptible(sleep_period_ms)) + return -EINTR; + sleep_period_ms = sleep_period_ms << 1; + } else { + cpu_relax(); + } + goto retry; + } + + return err; } static inline void intel_guc_to_host_event_handler(struct intel_guc *guc) @@ -118,6 +189,7 @@ static inline u32 intel_guc_ggtt_offset(struct intel_guc *guc, } void intel_guc_init_early(struct intel_guc *guc); +void intel_guc_init_late(struct intel_guc *guc); void intel_guc_init_send_regs(struct intel_guc *guc); void intel_guc_write_params(struct intel_guc *guc); int intel_guc_init(struct intel_guc *guc); @@ -160,9 +232,25 @@ static inline bool intel_guc_is_ready(struct intel_guc *guc) return intel_guc_is_fw_running(guc) && intel_guc_ct_enabled(&guc->ct); } +static inline void intel_guc_reset_interrupts(struct intel_guc *guc) +{ + guc->interrupts.reset(guc); +} + +static inline void intel_guc_enable_interrupts(struct intel_guc *guc) +{ + guc->interrupts.enable(guc); +} + +static inline void intel_guc_disable_interrupts(struct intel_guc *guc) +{ + guc->interrupts.disable(guc); +} + static inline int intel_guc_sanitize(struct intel_guc *guc) { intel_uc_fw_sanitize(&guc->fw); + intel_guc_disable_interrupts(guc); intel_guc_ct_sanitize(&guc->ct); guc->mmio_msg = 0; @@ -183,8 +271,27 @@ static inline void intel_guc_disable_msg(struct intel_guc *guc, u32 mask) spin_unlock_irq(&guc->irq_lock); } -int intel_guc_reset_engine(struct intel_guc *guc, - struct intel_engine_cs *engine); +int intel_guc_wait_for_idle(struct intel_guc *guc, long timeout); + +int intel_guc_deregister_done_process_msg(struct intel_guc *guc, + const u32 *msg, u32 len); +int intel_guc_sched_done_process_msg(struct intel_guc *guc, + const u32 *msg, u32 len); +int intel_guc_context_reset_process_msg(struct intel_guc *guc, + const u32 *msg, u32 len); +int intel_guc_engine_failure_process_msg(struct intel_guc *guc, + const u32 *msg, u32 len); + +void intel_guc_find_hung_context(struct intel_engine_cs *engine); + +int intel_guc_global_policies_update(struct intel_guc *guc); + +void intel_guc_context_ban(struct intel_context *ce, struct i915_request *rq); + +void intel_guc_submission_reset_prepare(struct intel_guc *guc); +void intel_guc_submission_reset(struct intel_guc *guc, bool stalled); +void intel_guc_submission_reset_finish(struct intel_guc *guc); +void intel_guc_submission_cancel_requests(struct intel_guc *guc); void intel_guc_load_status(struct intel_guc *guc, struct drm_printer *p); diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c index 9abfbc6edbd6..6926919bcac6 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c @@ -3,8 +3,11 @@ * Copyright © 2014-2019 Intel Corporation */ +#include <linux/bsearch.h> + #include "gt/intel_gt.h" #include "gt/intel_lrc.h" +#include "gt/shmem_utils.h" #include "intel_guc_ads.h" #include "intel_guc_fwif.h" #include "intel_uc.h" @@ -23,10 +26,15 @@ * | guc_policies | * +---------------------------------------+ * | guc_gt_system_info | - * +---------------------------------------+ - * | guc_clients_info | - * +---------------------------------------+ - * | guc_ct_pool_entry[size] | + * +---------------------------------------+ <== static + * | guc_mmio_reg[countA] (engine 0.0) | + * | guc_mmio_reg[countB] (engine 0.1) | + * | guc_mmio_reg[countC] (engine 1.0) | + * | ... | + * +---------------------------------------+ <== dynamic + * | padding | + * +---------------------------------------+ <== 4K aligned + * | golden contexts | * +---------------------------------------+ * | padding | * +---------------------------------------+ <== 4K aligned @@ -39,18 +47,49 @@ struct __guc_ads_blob { struct guc_ads ads; struct guc_policies policies; struct guc_gt_system_info system_info; - struct guc_clients_info clients_info; - struct guc_ct_pool_entry ct_pool[GUC_CT_POOL_SIZE]; + /* From here on, location is dynamic! Refer to above diagram. */ + struct guc_mmio_reg regset[0]; } __packed; +static u32 guc_ads_regset_size(struct intel_guc *guc) +{ + GEM_BUG_ON(!guc->ads_regset_size); + return guc->ads_regset_size; +} + +static u32 guc_ads_golden_ctxt_size(struct intel_guc *guc) +{ + return PAGE_ALIGN(guc->ads_golden_ctxt_size); +} + static u32 guc_ads_private_data_size(struct intel_guc *guc) { return PAGE_ALIGN(guc->fw.private_data_size); } +static u32 guc_ads_regset_offset(struct intel_guc *guc) +{ + return offsetof(struct __guc_ads_blob, regset); +} + +static u32 guc_ads_golden_ctxt_offset(struct intel_guc *guc) +{ + u32 offset; + + offset = guc_ads_regset_offset(guc) + + guc_ads_regset_size(guc); + + return PAGE_ALIGN(offset); +} + static u32 guc_ads_private_data_offset(struct intel_guc *guc) { - return PAGE_ALIGN(sizeof(struct __guc_ads_blob)); + u32 offset; + + offset = guc_ads_golden_ctxt_offset(guc) + + guc_ads_golden_ctxt_size(guc); + + return PAGE_ALIGN(offset); } static u32 guc_ads_blob_size(struct intel_guc *guc) @@ -59,36 +98,66 @@ static u32 guc_ads_blob_size(struct intel_guc *guc) guc_ads_private_data_size(guc); } -static void guc_policy_init(struct guc_policy *policy) +static void guc_policies_init(struct intel_guc *guc, struct guc_policies *policies) { - policy->execution_quantum = POLICY_DEFAULT_EXECUTION_QUANTUM_US; - policy->preemption_time = POLICY_DEFAULT_PREEMPTION_TIME_US; - policy->fault_time = POLICY_DEFAULT_FAULT_TIME_US; - policy->policy_flags = 0; + struct intel_gt *gt = guc_to_gt(guc); + struct drm_i915_private *i915 = gt->i915; + + policies->dpc_promote_time = GLOBAL_POLICY_DEFAULT_DPC_PROMOTE_TIME_US; + policies->max_num_work_items = GLOBAL_POLICY_MAX_NUM_WI; + + policies->global_flags = 0; + if (i915->params.reset < 2) + policies->global_flags |= GLOBAL_POLICY_DISABLE_ENGINE_RESET; + + policies->is_valid = 1; } -static void guc_policies_init(struct guc_policies *policies) +void intel_guc_ads_print_policy_info(struct intel_guc *guc, + struct drm_printer *dp) { - struct guc_policy *policy; - u32 p, i; + struct __guc_ads_blob *blob = guc->ads_blob; - policies->dpc_promote_time = POLICY_DEFAULT_DPC_PROMOTE_TIME_US; - policies->max_num_work_items = POLICY_MAX_NUM_WI; + if (unlikely(!blob)) + return; - for (p = 0; p < GUC_CLIENT_PRIORITY_NUM; p++) { - for (i = 0; i < GUC_MAX_ENGINE_CLASSES; i++) { - policy = &policies->policy[p][i]; + drm_printf(dp, "Global scheduling policies:\n"); + drm_printf(dp, " DPC promote time = %u\n", blob->policies.dpc_promote_time); + drm_printf(dp, " Max num work items = %u\n", blob->policies.max_num_work_items); + drm_printf(dp, " Flags = %u\n", blob->policies.global_flags); +} - guc_policy_init(policy); - } - } +static int guc_action_policies_update(struct intel_guc *guc, u32 policy_offset) +{ + u32 action[] = { + INTEL_GUC_ACTION_GLOBAL_SCHED_POLICY_CHANGE, + policy_offset + }; - policies->is_valid = 1; + return intel_guc_send_busy_loop(guc, action, ARRAY_SIZE(action), 0, true); } -static void guc_ct_pool_entries_init(struct guc_ct_pool_entry *pool, u32 num) +int intel_guc_global_policies_update(struct intel_guc *guc) { - memset(pool, 0, num * sizeof(*pool)); + struct __guc_ads_blob *blob = guc->ads_blob; + struct intel_gt *gt = guc_to_gt(guc); + intel_wakeref_t wakeref; + int ret; + + if (!blob) + return -EOPNOTSUPP; + + GEM_BUG_ON(!blob->ads.scheduler_policies); + + guc_policies_init(guc, &blob->policies); + + if (!intel_guc_is_ready(guc)) + return 0; + + with_intel_runtime_pm(>->i915->runtime_pm, wakeref) + ret = guc_action_policies_update(guc, blob->ads.scheduler_policies); + + return ret; } static void guc_mapping_table_init(struct intel_gt *gt, @@ -113,53 +182,324 @@ static void guc_mapping_table_init(struct intel_gt *gt, } /* - * The first 80 dwords of the register state context, containing the - * execlists and ppgtt registers. + * The save/restore register list must be pre-calculated to a temporary + * buffer of driver defined size before it can be generated in place + * inside the ADS. */ -#define LR_HW_CONTEXT_SIZE (80 * sizeof(u32)) +#define MAX_MMIO_REGS 128 /* Arbitrary size, increase as needed */ +struct temp_regset { + struct guc_mmio_reg *registers; + u32 used; + u32 size; +}; -static void __guc_ads_init(struct intel_guc *guc) +static int guc_mmio_reg_cmp(const void *a, const void *b) +{ + const struct guc_mmio_reg *ra = a; + const struct guc_mmio_reg *rb = b; + + return (int)ra->offset - (int)rb->offset; +} + +static void guc_mmio_reg_add(struct temp_regset *regset, + u32 offset, u32 flags) +{ + u32 count = regset->used; + struct guc_mmio_reg reg = { + .offset = offset, + .flags = flags, + }; + struct guc_mmio_reg *slot; + + GEM_BUG_ON(count >= regset->size); + + /* + * The mmio list is built using separate lists within the driver. + * It's possible that at some point we may attempt to add the same + * register more than once. Do not consider this an error; silently + * move on if the register is already in the list. + */ + if (bsearch(®, regset->registers, count, + sizeof(reg), guc_mmio_reg_cmp)) + return; + + slot = ®set->registers[count]; + regset->used++; + *slot = reg; + + while (slot-- > regset->registers) { + GEM_BUG_ON(slot[0].offset == slot[1].offset); + if (slot[1].offset > slot[0].offset) + break; + + swap(slot[1], slot[0]); + } +} + +#define GUC_MMIO_REG_ADD(regset, reg, masked) \ + guc_mmio_reg_add(regset, \ + i915_mmio_reg_offset((reg)), \ + (masked) ? GUC_REGSET_MASKED : 0) + +static void guc_mmio_regset_init(struct temp_regset *regset, + struct intel_engine_cs *engine) +{ + const u32 base = engine->mmio_base; + struct i915_wa_list *wal = &engine->wa_list; + struct i915_wa *wa; + unsigned int i; + + regset->used = 0; + + GUC_MMIO_REG_ADD(regset, RING_MODE_GEN7(base), true); + GUC_MMIO_REG_ADD(regset, RING_HWS_PGA(base), false); + GUC_MMIO_REG_ADD(regset, RING_IMR(base), false); + + for (i = 0, wa = wal->list; i < wal->count; i++, wa++) + GUC_MMIO_REG_ADD(regset, wa->reg, wa->masked_reg); + + /* Be extra paranoid and include all whitelist registers. */ + for (i = 0; i < RING_MAX_NONPRIV_SLOTS; i++) + GUC_MMIO_REG_ADD(regset, + RING_FORCE_TO_NONPRIV(base, i), + false); + + /* add in local MOCS registers */ + for (i = 0; i < GEN9_LNCFCMOCS_REG_COUNT; i++) + GUC_MMIO_REG_ADD(regset, GEN9_LNCFCMOCS(i), false); +} + +static int guc_mmio_reg_state_query(struct intel_guc *guc) { struct intel_gt *gt = guc_to_gt(guc); - struct drm_i915_private *i915 = gt->i915; + struct intel_engine_cs *engine; + enum intel_engine_id id; + struct temp_regset temp_set; + u32 total; + + /* + * Need to actually build the list in order to filter out + * duplicates and other such data dependent constructions. + */ + temp_set.size = MAX_MMIO_REGS; + temp_set.registers = kmalloc_array(temp_set.size, + sizeof(*temp_set.registers), + GFP_KERNEL); + if (!temp_set.registers) + return -ENOMEM; + + total = 0; + for_each_engine(engine, gt, id) { + guc_mmio_regset_init(&temp_set, engine); + total += temp_set.used; + } + + kfree(temp_set.registers); + + return total * sizeof(struct guc_mmio_reg); +} + +static void guc_mmio_reg_state_init(struct intel_guc *guc, + struct __guc_ads_blob *blob) +{ + struct intel_gt *gt = guc_to_gt(guc); + struct intel_engine_cs *engine; + enum intel_engine_id id; + struct temp_regset temp_set; + struct guc_mmio_reg_set *ads_reg_set; + u32 addr_ggtt, offset; + u8 guc_class; + + offset = guc_ads_regset_offset(guc); + addr_ggtt = intel_guc_ggtt_offset(guc, guc->ads_vma) + offset; + temp_set.registers = (struct guc_mmio_reg *)(((u8 *)blob) + offset); + temp_set.size = guc->ads_regset_size / sizeof(temp_set.registers[0]); + + for_each_engine(engine, gt, id) { + /* Class index is checked in class converter */ + GEM_BUG_ON(engine->instance >= GUC_MAX_INSTANCES_PER_CLASS); + + guc_class = engine_class_to_guc_class(engine->class); + ads_reg_set = &blob->ads.reg_state_list[guc_class][engine->instance]; + + guc_mmio_regset_init(&temp_set, engine); + if (!temp_set.used) { + ads_reg_set->address = 0; + ads_reg_set->count = 0; + continue; + } + + ads_reg_set->address = addr_ggtt; + ads_reg_set->count = temp_set.used; + + temp_set.size -= temp_set.used; + temp_set.registers += temp_set.used; + addr_ggtt += temp_set.used * sizeof(struct guc_mmio_reg); + } + + GEM_BUG_ON(temp_set.size); +} + +static void fill_engine_enable_masks(struct intel_gt *gt, + struct guc_gt_system_info *info) +{ + info->engine_enabled_masks[GUC_RENDER_CLASS] = 1; + info->engine_enabled_masks[GUC_BLITTER_CLASS] = 1; + info->engine_enabled_masks[GUC_VIDEO_CLASS] = VDBOX_MASK(gt); + info->engine_enabled_masks[GUC_VIDEOENHANCE_CLASS] = VEBOX_MASK(gt); +} + +static int guc_prep_golden_context(struct intel_guc *guc, + struct __guc_ads_blob *blob) +{ + struct intel_gt *gt = guc_to_gt(guc); + u32 addr_ggtt, offset; + u32 total_size = 0, alloc_size, real_size; + u8 engine_class, guc_class; + struct guc_gt_system_info *info, local_info; + + /* + * Reserve the memory for the golden contexts and point GuC at it but + * leave it empty for now. The context data will be filled in later + * once there is something available to put there. + * + * Note that the HWSP and ring context are not included. + * + * Note also that the storage must be pinned in the GGTT, so that the + * address won't change after GuC has been told where to find it. The + * GuC will also validate that the LRC base + size fall within the + * allowed GGTT range. + */ + if (blob) { + offset = guc_ads_golden_ctxt_offset(guc); + addr_ggtt = intel_guc_ggtt_offset(guc, guc->ads_vma) + offset; + info = &blob->system_info; + } else { + memset(&local_info, 0, sizeof(local_info)); + info = &local_info; + fill_engine_enable_masks(gt, info); + } + + for (engine_class = 0; engine_class <= MAX_ENGINE_CLASS; ++engine_class) { + if (engine_class == OTHER_CLASS) + continue; + + guc_class = engine_class_to_guc_class(engine_class); + + if (!info->engine_enabled_masks[guc_class]) + continue; + + real_size = intel_engine_context_size(gt, engine_class); + alloc_size = PAGE_ALIGN(real_size); + total_size += alloc_size; + + if (!blob) + continue; + + blob->ads.eng_state_size[guc_class] = real_size; + blob->ads.golden_context_lrca[guc_class] = addr_ggtt; + addr_ggtt += alloc_size; + } + + if (!blob) + return total_size; + + GEM_BUG_ON(guc->ads_golden_ctxt_size != total_size); + return total_size; +} + +static struct intel_engine_cs *find_engine_state(struct intel_gt *gt, u8 engine_class) +{ + struct intel_engine_cs *engine; + enum intel_engine_id id; + + for_each_engine(engine, gt, id) { + if (engine->class != engine_class) + continue; + + if (!engine->default_state) + continue; + + return engine; + } + + return NULL; +} + +static void guc_init_golden_context(struct intel_guc *guc) +{ struct __guc_ads_blob *blob = guc->ads_blob; - const u32 skipped_size = LRC_PPHWSP_SZ * PAGE_SIZE + LR_HW_CONTEXT_SIZE; - u32 base; + struct intel_engine_cs *engine; + struct intel_gt *gt = guc_to_gt(guc); + u32 addr_ggtt, offset; + u32 total_size = 0, alloc_size, real_size; u8 engine_class, guc_class; + u8 *ptr; - /* GuC scheduling policies */ - guc_policies_init(&blob->policies); + /* Skip execlist and PPGTT registers + HWSP */ + const u32 lr_hw_context_size = 80 * sizeof(u32); + const u32 skip_size = LRC_PPHWSP_SZ * PAGE_SIZE + + lr_hw_context_size; + + if (!intel_uc_uses_guc_submission(>->uc)) + return; + + GEM_BUG_ON(!blob); /* - * GuC expects a per-engine-class context image and size - * (minus hwsp and ring context). The context image will be - * used to reinitialize engines after a reset. It must exist - * and be pinned in the GGTT, so that the address won't change after - * we have told GuC where to find it. The context size will be used - * to validate that the LRC base + size fall within allowed GGTT. + * Go back and fill in the golden context data now that it is + * available. */ + offset = guc_ads_golden_ctxt_offset(guc); + addr_ggtt = intel_guc_ggtt_offset(guc, guc->ads_vma) + offset; + ptr = ((u8 *)blob) + offset; + for (engine_class = 0; engine_class <= MAX_ENGINE_CLASS; ++engine_class) { if (engine_class == OTHER_CLASS) continue; guc_class = engine_class_to_guc_class(engine_class); - /* - * TODO: Set context pointer to default state to allow - * GuC to re-init guilty contexts after internal reset. - */ - blob->ads.golden_context_lrca[guc_class] = 0; - blob->ads.eng_state_size[guc_class] = - intel_engine_context_size(guc_to_gt(guc), - engine_class) - - skipped_size; + if (!blob->system_info.engine_enabled_masks[guc_class]) + continue; + + real_size = intel_engine_context_size(gt, engine_class); + alloc_size = PAGE_ALIGN(real_size); + total_size += alloc_size; + + engine = find_engine_state(gt, engine_class); + if (!engine) { + drm_err(>->i915->drm, "No engine state recorded for class %d!\n", + engine_class); + blob->ads.eng_state_size[guc_class] = 0; + blob->ads.golden_context_lrca[guc_class] = 0; + continue; + } + + GEM_BUG_ON(blob->ads.eng_state_size[guc_class] != real_size); + GEM_BUG_ON(blob->ads.golden_context_lrca[guc_class] != addr_ggtt); + addr_ggtt += alloc_size; + + shmem_read(engine->default_state, skip_size, ptr + skip_size, + real_size - skip_size); + ptr += alloc_size; } + GEM_BUG_ON(guc->ads_golden_ctxt_size != total_size); +} + +static void __guc_ads_init(struct intel_guc *guc) +{ + struct intel_gt *gt = guc_to_gt(guc); + struct drm_i915_private *i915 = gt->i915; + struct __guc_ads_blob *blob = guc->ads_blob; + u32 base; + + /* GuC scheduling policies */ + guc_policies_init(guc, &blob->policies); + /* System info */ - blob->system_info.engine_enabled_masks[GUC_RENDER_CLASS] = 1; - blob->system_info.engine_enabled_masks[GUC_BLITTER_CLASS] = 1; - blob->system_info.engine_enabled_masks[GUC_VIDEO_CLASS] = VDBOX_MASK(gt); - blob->system_info.engine_enabled_masks[GUC_VIDEOENHANCE_CLASS] = VEBOX_MASK(gt); + fill_engine_enable_masks(gt, &blob->system_info); blob->system_info.generic_gt_sysinfo[GUC_GENERIC_GT_SYSINFO_SLICE_ENABLED] = hweight8(gt->info.sseu.slice_mask); @@ -174,21 +514,19 @@ static void __guc_ads_init(struct intel_guc *guc) GEN12_DOORBELLS_PER_SQIDI) + 1; } + /* Golden contexts for re-initialising after a watchdog reset */ + guc_prep_golden_context(guc, blob); + guc_mapping_table_init(guc_to_gt(guc), &blob->system_info); base = intel_guc_ggtt_offset(guc, guc->ads_vma); - /* Clients info */ - guc_ct_pool_entries_init(blob->ct_pool, ARRAY_SIZE(blob->ct_pool)); - - blob->clients_info.clients_num = 1; - blob->clients_info.ct_pool_addr = base + ptr_offset(blob, ct_pool); - blob->clients_info.ct_pool_count = ARRAY_SIZE(blob->ct_pool); - /* ADS */ blob->ads.scheduler_policies = base + ptr_offset(blob, policies); blob->ads.gt_system_info = base + ptr_offset(blob, system_info); - blob->ads.clients_info = base + ptr_offset(blob, clients_info); + + /* MMIO save/restore list */ + guc_mmio_reg_state_init(guc, blob); /* Private Data */ blob->ads.private_data = base + guc_ads_private_data_offset(guc); @@ -210,6 +548,19 @@ int intel_guc_ads_create(struct intel_guc *guc) GEM_BUG_ON(guc->ads_vma); + /* Need to calculate the reg state size dynamically: */ + ret = guc_mmio_reg_state_query(guc); + if (ret < 0) + return ret; + guc->ads_regset_size = ret; + + /* Likewise the golden contexts: */ + ret = guc_prep_golden_context(guc, NULL); + if (ret < 0) + return ret; + guc->ads_golden_ctxt_size = ret; + + /* Now the total size can be determined: */ size = guc_ads_blob_size(guc); ret = intel_guc_allocate_and_map_vma(guc, size, &guc->ads_vma, @@ -222,6 +573,18 @@ int intel_guc_ads_create(struct intel_guc *guc) return 0; } +void intel_guc_ads_init_late(struct intel_guc *guc) +{ + /* + * The golden context setup requires the saved engine state from + * __engines_record_defaults(). However, that requires engines to be + * operational which means the ADS must already have been configured. + * Fortunately, the golden context state is not needed until a hang + * occurs, so it can be filled in during this late init phase. + */ + guc_init_golden_context(guc); +} + void intel_guc_ads_destroy(struct intel_guc *guc) { i915_vma_unpin_and_release(&guc->ads_vma, I915_VMA_RELEASE_MAP); diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.h index b00d3ae1113a..3d85051d57e4 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.h @@ -7,9 +7,13 @@ #define _INTEL_GUC_ADS_H_ struct intel_guc; +struct drm_printer; int intel_guc_ads_create(struct intel_guc *guc); void intel_guc_ads_destroy(struct intel_guc *guc); +void intel_guc_ads_init_late(struct intel_guc *guc); void intel_guc_ads_reset(struct intel_guc *guc); +void intel_guc_ads_print_policy_info(struct intel_guc *guc, + struct drm_printer *p); #endif diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c index 8f7b148fef58..22b4733b55e2 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c @@ -3,6 +3,11 @@ * Copyright © 2016-2019 Intel Corporation */ +#include <linux/circ_buf.h> +#include <linux/ktime.h> +#include <linux/time64.h> +#include <linux/timekeeping.h> + #include "i915_drv.h" #include "intel_guc_ct.h" #include "gt/intel_gt.h" @@ -58,11 +63,17 @@ static inline struct drm_device *ct_to_drm(struct intel_guc_ct *ct) * +--------+-----------------------------------------------+------+ * * Size of each `CT Buffer`_ must be multiple of 4K. - * As we don't expect too many messages, for now use minimum sizes. + * We don't expect too many messages in flight at any time, unless we are + * using the GuC submission. In that case each request requires a minimum + * 2 dwords which gives us a maximum 256 queue'd requests. Hopefully this + * enough space to avoid backpressure on the driver. We increase the size + * of the receive buffer (relative to the send) to ensure a G2H response + * CTB has a landing spot. */ #define CTB_DESC_SIZE ALIGN(sizeof(struct guc_ct_buffer_desc), SZ_2K) #define CTB_H2G_BUFFER_SIZE (SZ_4K) -#define CTB_G2H_BUFFER_SIZE (SZ_4K) +#define CTB_G2H_BUFFER_SIZE (4 * CTB_H2G_BUFFER_SIZE) +#define G2H_ROOM_BUFFER_SIZE (CTB_G2H_BUFFER_SIZE / 4) struct ct_request { struct list_head link; @@ -98,66 +109,84 @@ void intel_guc_ct_init_early(struct intel_guc_ct *ct) INIT_LIST_HEAD(&ct->requests.incoming); INIT_WORK(&ct->requests.worker, ct_incoming_request_worker_func); tasklet_setup(&ct->receive_tasklet, ct_receive_tasklet_func); + init_waitqueue_head(&ct->wq); } static inline const char *guc_ct_buffer_type_to_str(u32 type) { switch (type) { - case INTEL_GUC_CT_BUFFER_TYPE_SEND: + case GUC_CTB_TYPE_HOST2GUC: return "SEND"; - case INTEL_GUC_CT_BUFFER_TYPE_RECV: + case GUC_CTB_TYPE_GUC2HOST: return "RECV"; default: return "<invalid>"; } } -static void guc_ct_buffer_desc_init(struct guc_ct_buffer_desc *desc, - u32 cmds_addr, u32 size) +static void guc_ct_buffer_desc_init(struct guc_ct_buffer_desc *desc) { memset(desc, 0, sizeof(*desc)); - desc->addr = cmds_addr; - desc->size = size; - desc->owner = CTB_OWNER_HOST; } -static void guc_ct_buffer_reset(struct intel_guc_ct_buffer *ctb, u32 cmds_addr) +static void guc_ct_buffer_reset(struct intel_guc_ct_buffer *ctb) { - guc_ct_buffer_desc_init(ctb->desc, cmds_addr, ctb->size); + u32 space; + + ctb->broken = false; + ctb->tail = 0; + ctb->head = 0; + space = CIRC_SPACE(ctb->tail, ctb->head, ctb->size) - ctb->resv_space; + atomic_set(&ctb->space, space); + + guc_ct_buffer_desc_init(ctb->desc); } static void guc_ct_buffer_init(struct intel_guc_ct_buffer *ctb, struct guc_ct_buffer_desc *desc, - u32 *cmds, u32 size) + u32 *cmds, u32 size_in_bytes, u32 resv_space) { - GEM_BUG_ON(size % 4); + GEM_BUG_ON(size_in_bytes % 4); ctb->desc = desc; ctb->cmds = cmds; - ctb->size = size; + ctb->size = size_in_bytes / 4; + ctb->resv_space = resv_space / 4; - guc_ct_buffer_reset(ctb, 0); + guc_ct_buffer_reset(ctb); } -static int guc_action_register_ct_buffer(struct intel_guc *guc, - u32 desc_addr, - u32 type) +static int guc_action_register_ct_buffer(struct intel_guc *guc, u32 type, + u32 desc_addr, u32 buff_addr, u32 size) { - u32 action[] = { - INTEL_GUC_ACTION_REGISTER_COMMAND_TRANSPORT_BUFFER, - desc_addr, - sizeof(struct guc_ct_buffer_desc), - type + u32 request[HOST2GUC_REGISTER_CTB_REQUEST_MSG_LEN] = { + FIELD_PREP(GUC_HXG_MSG_0_ORIGIN, GUC_HXG_ORIGIN_HOST) | + FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_REQUEST) | + FIELD_PREP(GUC_HXG_REQUEST_MSG_0_ACTION, GUC_ACTION_HOST2GUC_REGISTER_CTB), + FIELD_PREP(HOST2GUC_REGISTER_CTB_REQUEST_MSG_1_SIZE, size / SZ_4K - 1) | + FIELD_PREP(HOST2GUC_REGISTER_CTB_REQUEST_MSG_1_TYPE, type), + FIELD_PREP(HOST2GUC_REGISTER_CTB_REQUEST_MSG_2_DESC_ADDR, desc_addr), + FIELD_PREP(HOST2GUC_REGISTER_CTB_REQUEST_MSG_3_BUFF_ADDR, buff_addr), }; - /* Can't use generic send(), CT registration must go over MMIO */ - return intel_guc_send_mmio(guc, action, ARRAY_SIZE(action), NULL, 0); + GEM_BUG_ON(type != GUC_CTB_TYPE_HOST2GUC && type != GUC_CTB_TYPE_GUC2HOST); + GEM_BUG_ON(size % SZ_4K); + + /* CT registration must go over MMIO */ + return intel_guc_send_mmio(guc, request, ARRAY_SIZE(request), NULL, 0); } -static int ct_register_buffer(struct intel_guc_ct *ct, u32 desc_addr, u32 type) +static int ct_register_buffer(struct intel_guc_ct *ct, u32 type, + u32 desc_addr, u32 buff_addr, u32 size) { - int err = guc_action_register_ct_buffer(ct_to_guc(ct), desc_addr, type); + int err; + + err = i915_inject_probe_error(guc_to_gt(ct_to_guc(ct))->i915, -ENXIO); + if (unlikely(err)) + return err; + err = guc_action_register_ct_buffer(ct_to_guc(ct), type, + desc_addr, buff_addr, size); if (unlikely(err)) CT_ERROR(ct, "Failed to register %s buffer (err=%d)\n", guc_ct_buffer_type_to_str(type), err); @@ -166,14 +195,17 @@ static int ct_register_buffer(struct intel_guc_ct *ct, u32 desc_addr, u32 type) static int guc_action_deregister_ct_buffer(struct intel_guc *guc, u32 type) { - u32 action[] = { - INTEL_GUC_ACTION_DEREGISTER_COMMAND_TRANSPORT_BUFFER, - CTB_OWNER_HOST, - type + u32 request[HOST2GUC_DEREGISTER_CTB_REQUEST_MSG_LEN] = { + FIELD_PREP(GUC_HXG_MSG_0_ORIGIN, GUC_HXG_ORIGIN_HOST) | + FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_REQUEST) | + FIELD_PREP(GUC_HXG_REQUEST_MSG_0_ACTION, GUC_ACTION_HOST2GUC_DEREGISTER_CTB), + FIELD_PREP(HOST2GUC_DEREGISTER_CTB_REQUEST_MSG_1_TYPE, type), }; - /* Can't use generic send(), CT deregistration must go over MMIO */ - return intel_guc_send_mmio(guc, action, ARRAY_SIZE(action), NULL, 0); + GEM_BUG_ON(type != GUC_CTB_TYPE_HOST2GUC && type != GUC_CTB_TYPE_GUC2HOST); + + /* CT deregistration must go over MMIO */ + return intel_guc_send_mmio(guc, request, ARRAY_SIZE(request), NULL, 0); } static int ct_deregister_buffer(struct intel_guc_ct *ct, u32 type) @@ -200,10 +232,15 @@ int intel_guc_ct_init(struct intel_guc_ct *ct) struct guc_ct_buffer_desc *desc; u32 blob_size; u32 cmds_size; + u32 resv_space; void *blob; u32 *cmds; int err; + err = i915_inject_probe_error(guc_to_gt(guc)->i915, -ENXIO); + if (err) + return err; + GEM_BUG_ON(ct->vma); blob_size = 2 * CTB_DESC_SIZE + CTB_H2G_BUFFER_SIZE + CTB_G2H_BUFFER_SIZE; @@ -220,19 +257,23 @@ int intel_guc_ct_init(struct intel_guc_ct *ct) desc = blob; cmds = blob + 2 * CTB_DESC_SIZE; cmds_size = CTB_H2G_BUFFER_SIZE; - CT_DEBUG(ct, "%s desc %#tx cmds %#tx size %u\n", "send", - ptrdiff(desc, blob), ptrdiff(cmds, blob), cmds_size); + resv_space = 0; + CT_DEBUG(ct, "%s desc %#tx cmds %#tx size %u/%u\n", "send", + ptrdiff(desc, blob), ptrdiff(cmds, blob), cmds_size, + resv_space); - guc_ct_buffer_init(&ct->ctbs.send, desc, cmds, cmds_size); + guc_ct_buffer_init(&ct->ctbs.send, desc, cmds, cmds_size, resv_space); /* store pointers to desc and cmds for recv ctb */ desc = blob + CTB_DESC_SIZE; cmds = blob + 2 * CTB_DESC_SIZE + CTB_H2G_BUFFER_SIZE; cmds_size = CTB_G2H_BUFFER_SIZE; - CT_DEBUG(ct, "%s desc %#tx cmds %#tx size %u\n", "recv", - ptrdiff(desc, blob), ptrdiff(cmds, blob), cmds_size); + resv_space = G2H_ROOM_BUFFER_SIZE; + CT_DEBUG(ct, "%s desc %#tx cmds %#tx size %u/%u\n", "recv", + ptrdiff(desc, blob), ptrdiff(cmds, blob), cmds_size, + resv_space); - guc_ct_buffer_init(&ct->ctbs.recv, desc, cmds, cmds_size); + guc_ct_buffer_init(&ct->ctbs.recv, desc, cmds, cmds_size, resv_space); return 0; } @@ -261,7 +302,7 @@ void intel_guc_ct_fini(struct intel_guc_ct *ct) int intel_guc_ct_enable(struct intel_guc_ct *ct) { struct intel_guc *guc = ct_to_guc(ct); - u32 base, cmds; + u32 base, desc, cmds; void *blob; int err; @@ -277,32 +318,36 @@ int intel_guc_ct_enable(struct intel_guc_ct *ct) GEM_BUG_ON(blob != ct->ctbs.send.desc); /* (re)initialize descriptors */ - cmds = base + ptrdiff(ct->ctbs.send.cmds, blob); - guc_ct_buffer_reset(&ct->ctbs.send, cmds); - - cmds = base + ptrdiff(ct->ctbs.recv.cmds, blob); - guc_ct_buffer_reset(&ct->ctbs.recv, cmds); + guc_ct_buffer_reset(&ct->ctbs.send); + guc_ct_buffer_reset(&ct->ctbs.recv); /* * Register both CT buffers starting with RECV buffer. * Descriptors are in first half of the blob. */ - err = ct_register_buffer(ct, base + ptrdiff(ct->ctbs.recv.desc, blob), - INTEL_GUC_CT_BUFFER_TYPE_RECV); + desc = base + ptrdiff(ct->ctbs.recv.desc, blob); + cmds = base + ptrdiff(ct->ctbs.recv.cmds, blob); + err = ct_register_buffer(ct, GUC_CTB_TYPE_GUC2HOST, + desc, cmds, ct->ctbs.recv.size * 4); + if (unlikely(err)) goto err_out; - err = ct_register_buffer(ct, base + ptrdiff(ct->ctbs.send.desc, blob), - INTEL_GUC_CT_BUFFER_TYPE_SEND); + desc = base + ptrdiff(ct->ctbs.send.desc, blob); + cmds = base + ptrdiff(ct->ctbs.send.cmds, blob); + err = ct_register_buffer(ct, GUC_CTB_TYPE_HOST2GUC, + desc, cmds, ct->ctbs.send.size * 4); + if (unlikely(err)) goto err_deregister; ct->enabled = true; + ct->stall_time = KTIME_MAX; return 0; err_deregister: - ct_deregister_buffer(ct, INTEL_GUC_CT_BUFFER_TYPE_RECV); + ct_deregister_buffer(ct, GUC_CTB_TYPE_GUC2HOST); err_out: CT_PROBE_ERROR(ct, "Failed to enable CTB (%pe)\n", ERR_PTR(err)); return err; @@ -321,8 +366,8 @@ void intel_guc_ct_disable(struct intel_guc_ct *ct) ct->enabled = false; if (intel_guc_is_fw_running(guc)) { - ct_deregister_buffer(ct, INTEL_GUC_CT_BUFFER_TYPE_SEND); - ct_deregister_buffer(ct, INTEL_GUC_CT_BUFFER_TYPE_RECV); + ct_deregister_buffer(ct, GUC_CTB_TYPE_HOST2GUC); + ct_deregister_buffer(ct, GUC_CTB_TYPE_GUC2HOST); } } @@ -354,81 +399,63 @@ static void write_barrier(struct intel_guc_ct *ct) } } -/** - * DOC: CTB Host to GuC request - * - * Format of the CTB Host to GuC request message is as follows:: - * - * +------------+---------+---------+---------+---------+ - * | msg[0] | [1] | [2] | ... | [n-1] | - * +------------+---------+---------+---------+---------+ - * | MESSAGE | MESSAGE PAYLOAD | - * + HEADER +---------+---------+---------+---------+ - * | | 0 | 1 | ... | n | - * +============+=========+=========+=========+=========+ - * | len >= 1 | FENCE | request specific data | - * +------+-----+---------+---------+---------+---------+ - * - * ^-----------------len-------------------^ - */ - static int ct_write(struct intel_guc_ct *ct, const u32 *action, u32 len /* in dwords */, - u32 fence) + u32 fence, u32 flags) { struct intel_guc_ct_buffer *ctb = &ct->ctbs.send; struct guc_ct_buffer_desc *desc = ctb->desc; - u32 head = desc->head; - u32 tail = desc->tail; + u32 tail = ctb->tail; u32 size = ctb->size; - u32 used; u32 header; + u32 hxg; + u32 type; u32 *cmds = ctb->cmds; unsigned int i; - if (unlikely(desc->is_in_error)) - return -EPIPE; - - if (unlikely(!IS_ALIGNED(head | tail, 4) || - (tail | head) >= size)) + if (unlikely(desc->status)) goto corrupted; - /* later calculations will be done in dwords */ - head /= 4; - tail /= 4; - size /= 4; - - /* - * tail == head condition indicates empty. GuC FW does not support - * using up the entire buffer to get tail == head meaning full. - */ - if (tail < head) - used = (size - head) + tail; - else - used = tail - head; + GEM_BUG_ON(tail > size); - /* make sure there is a space including extra dw for the fence */ - if (unlikely(used + len + 1 >= size)) - return -ENOSPC; +#ifdef CONFIG_DRM_I915_DEBUG_GUC + if (unlikely(tail != READ_ONCE(desc->tail))) { + CT_ERROR(ct, "Tail was modified %u != %u\n", + desc->tail, tail); + desc->status |= GUC_CTB_STATUS_MISMATCH; + goto corrupted; + } + if (unlikely(READ_ONCE(desc->head) >= size)) { + CT_ERROR(ct, "Invalid head offset %u >= %u)\n", + desc->head, size); + desc->status |= GUC_CTB_STATUS_OVERFLOW; + goto corrupted; + } +#endif /* - * Write the message. The format is the following: - * DW0: header (including action code) - * DW1: fence - * DW2+: action data + * dw0: CT header (including fence) + * dw1: HXG header (including action code) + * dw2+: action data */ - header = (len << GUC_CT_MSG_LEN_SHIFT) | - GUC_CT_MSG_SEND_STATUS | - (action[0] << GUC_CT_MSG_ACTION_SHIFT); + header = FIELD_PREP(GUC_CTB_MSG_0_FORMAT, GUC_CTB_FORMAT_HXG) | + FIELD_PREP(GUC_CTB_MSG_0_NUM_DWORDS, len) | + FIELD_PREP(GUC_CTB_MSG_0_FENCE, fence); - CT_DEBUG(ct, "writing %*ph %*ph %*ph\n", - 4, &header, 4, &fence, 4 * (len - 1), &action[1]); + type = (flags & INTEL_GUC_CT_SEND_NB) ? GUC_HXG_TYPE_EVENT : + GUC_HXG_TYPE_REQUEST; + hxg = FIELD_PREP(GUC_HXG_MSG_0_TYPE, type) | + FIELD_PREP(GUC_HXG_EVENT_MSG_0_ACTION | + GUC_HXG_EVENT_MSG_0_DATA0, action[0]); + + CT_DEBUG(ct, "writing (tail %u) %*ph %*ph %*ph\n", + tail, 4, &header, 4, &hxg, 4 * (len - 1), &action[1]); cmds[tail] = header; tail = (tail + 1) % size; - cmds[tail] = fence; + cmds[tail] = hxg; tail = (tail + 1) % size; for (i = 1; i < len; i++) { @@ -443,14 +470,20 @@ static int ct_write(struct intel_guc_ct *ct, */ write_barrier(ct); - /* now update desc tail (back in bytes) */ - desc->tail = tail * 4; + /* update local copies */ + ctb->tail = tail; + GEM_BUG_ON(atomic_read(&ctb->space) < len + GUC_CTB_HDR_LEN); + atomic_sub(len + GUC_CTB_HDR_LEN, &ctb->space); + + /* now update descriptor */ + WRITE_ONCE(desc->tail, tail); + return 0; corrupted: - CT_ERROR(ct, "Corrupted descriptor addr=%#x head=%u tail=%u size=%u\n", - desc->addr, desc->head, desc->tail, desc->size); - desc->is_in_error = 1; + CT_ERROR(ct, "Corrupted descriptor head=%u tail=%u status=%#x\n", + desc->head, desc->tail, desc->status); + ctb->broken = true; return -EPIPE; } @@ -459,7 +492,7 @@ corrupted: * @req: pointer to pending request * @status: placeholder for status * - * For each sent request, Guc shall send bac CT response message. + * For each sent request, GuC shall send back CT response message. * Our message handler will update status of tracked request once * response message with given fence is received. Wait here and * check for valid response status value. @@ -475,12 +508,18 @@ static int wait_for_ct_request_update(struct ct_request *req, u32 *status) /* * Fast commands should complete in less than 10us, so sample quickly * up to that length of time, then switch to a slower sleep-wait loop. - * No GuC command should ever take longer than 10ms. + * No GuC command should ever take longer than 10ms but many GuC + * commands can be inflight at time, so use a 1s timeout on the slower + * sleep-wait loop. */ -#define done INTEL_GUC_MSG_IS_RESPONSE(READ_ONCE(req->status)) - err = wait_for_us(done, 10); +#define GUC_CTB_RESPONSE_TIMEOUT_SHORT_MS 10 +#define GUC_CTB_RESPONSE_TIMEOUT_LONG_MS 1000 +#define done \ + (FIELD_GET(GUC_HXG_MSG_0_ORIGIN, READ_ONCE(req->status)) == \ + GUC_HXG_ORIGIN_GUC) + err = wait_for_us(done, GUC_CTB_RESPONSE_TIMEOUT_SHORT_MS); if (err) - err = wait_for(done, 10); + err = wait_for(done, GUC_CTB_RESPONSE_TIMEOUT_LONG_MS); #undef done if (unlikely(err)) @@ -490,6 +529,131 @@ static int wait_for_ct_request_update(struct ct_request *req, u32 *status) return err; } +#define GUC_CTB_TIMEOUT_MS 1500 +static inline bool ct_deadlocked(struct intel_guc_ct *ct) +{ + long timeout = GUC_CTB_TIMEOUT_MS; + bool ret = ktime_ms_delta(ktime_get(), ct->stall_time) > timeout; + + if (unlikely(ret)) { + struct guc_ct_buffer_desc *send = ct->ctbs.send.desc; + struct guc_ct_buffer_desc *recv = ct->ctbs.send.desc; + + CT_ERROR(ct, "Communication stalled for %lld ms, desc status=%#x,%#x\n", + ktime_ms_delta(ktime_get(), ct->stall_time), + send->status, recv->status); + ct->ctbs.send.broken = true; + } + + return ret; +} + +static inline bool g2h_has_room(struct intel_guc_ct *ct, u32 g2h_len_dw) +{ + struct intel_guc_ct_buffer *ctb = &ct->ctbs.recv; + + /* + * We leave a certain amount of space in the G2H CTB buffer for + * unexpected G2H CTBs (e.g. logging, engine hang, etc...) + */ + return !g2h_len_dw || atomic_read(&ctb->space) >= g2h_len_dw; +} + +static inline void g2h_reserve_space(struct intel_guc_ct *ct, u32 g2h_len_dw) +{ + lockdep_assert_held(&ct->ctbs.send.lock); + + GEM_BUG_ON(!g2h_has_room(ct, g2h_len_dw)); + + if (g2h_len_dw) + atomic_sub(g2h_len_dw, &ct->ctbs.recv.space); +} + +static inline void g2h_release_space(struct intel_guc_ct *ct, u32 g2h_len_dw) +{ + atomic_add(g2h_len_dw, &ct->ctbs.recv.space); +} + +static inline bool h2g_has_room(struct intel_guc_ct *ct, u32 len_dw) +{ + struct intel_guc_ct_buffer *ctb = &ct->ctbs.send; + struct guc_ct_buffer_desc *desc = ctb->desc; + u32 head; + u32 space; + + if (atomic_read(&ctb->space) >= len_dw) + return true; + + head = READ_ONCE(desc->head); + if (unlikely(head > ctb->size)) { + CT_ERROR(ct, "Invalid head offset %u >= %u)\n", + head, ctb->size); + desc->status |= GUC_CTB_STATUS_OVERFLOW; + ctb->broken = true; + return false; + } + + space = CIRC_SPACE(ctb->tail, head, ctb->size); + atomic_set(&ctb->space, space); + + return space >= len_dw; +} + +static int has_room_nb(struct intel_guc_ct *ct, u32 h2g_dw, u32 g2h_dw) +{ + lockdep_assert_held(&ct->ctbs.send.lock); + + if (unlikely(!h2g_has_room(ct, h2g_dw) || !g2h_has_room(ct, g2h_dw))) { + if (ct->stall_time == KTIME_MAX) + ct->stall_time = ktime_get(); + + if (unlikely(ct_deadlocked(ct))) + return -EPIPE; + else + return -EBUSY; + } + + ct->stall_time = KTIME_MAX; + return 0; +} + +#define G2H_LEN_DW(f) ({ \ + typeof(f) f_ = (f); \ + FIELD_GET(INTEL_GUC_CT_SEND_G2H_DW_MASK, f_) ? \ + FIELD_GET(INTEL_GUC_CT_SEND_G2H_DW_MASK, f_) + \ + GUC_CTB_HXG_MSG_MIN_LEN : 0; \ +}) +static int ct_send_nb(struct intel_guc_ct *ct, + const u32 *action, + u32 len, + u32 flags) +{ + struct intel_guc_ct_buffer *ctb = &ct->ctbs.send; + unsigned long spin_flags; + u32 g2h_len_dw = G2H_LEN_DW(flags); + u32 fence; + int ret; + + spin_lock_irqsave(&ctb->lock, spin_flags); + + ret = has_room_nb(ct, len + GUC_CTB_HDR_LEN, g2h_len_dw); + if (unlikely(ret)) + goto out; + + fence = ct_get_next_fence(ct); + ret = ct_write(ct, action, len, fence, flags); + if (unlikely(ret)) + goto out; + + g2h_reserve_space(ct, g2h_len_dw); + intel_guc_notify(ct_to_guc(ct)); + +out: + spin_unlock_irqrestore(&ctb->lock, spin_flags); + + return ret; +} + static int ct_send(struct intel_guc_ct *ct, const u32 *action, u32 len, @@ -497,8 +661,10 @@ static int ct_send(struct intel_guc_ct *ct, u32 response_buf_size, u32 *status) { + struct intel_guc_ct_buffer *ctb = &ct->ctbs.send; struct ct_request request; unsigned long flags; + unsigned int sleep_period_ms = 1; u32 fence; int err; @@ -506,8 +672,33 @@ static int ct_send(struct intel_guc_ct *ct, GEM_BUG_ON(!len); GEM_BUG_ON(len & ~GUC_CT_MSG_LEN_MASK); GEM_BUG_ON(!response_buf && response_buf_size); + might_sleep(); + + /* + * We use a lazy spin wait loop here as we believe that if the CT + * buffers are sized correctly the flow control condition should be + * rare. Reserving the maximum size in the G2H credits as we don't know + * how big the response is going to be. + */ +retry: + spin_lock_irqsave(&ctb->lock, flags); + if (unlikely(!h2g_has_room(ct, len + GUC_CTB_HDR_LEN) || + !g2h_has_room(ct, GUC_CTB_HXG_MSG_MAX_LEN))) { + if (ct->stall_time == KTIME_MAX) + ct->stall_time = ktime_get(); + spin_unlock_irqrestore(&ctb->lock, flags); + + if (unlikely(ct_deadlocked(ct))) + return -EPIPE; + + if (msleep_interruptible(sleep_period_ms)) + return -EINTR; + sleep_period_ms = sleep_period_ms << 1; + + goto retry; + } - spin_lock_irqsave(&ct->ctbs.send.lock, flags); + ct->stall_time = KTIME_MAX; fence = ct_get_next_fence(ct); request.fence = fence; @@ -519,9 +710,10 @@ static int ct_send(struct intel_guc_ct *ct, list_add_tail(&request.link, &ct->requests.pending); spin_unlock(&ct->requests.lock); - err = ct_write(ct, action, len, fence); + err = ct_write(ct, action, len, fence, 0); + g2h_reserve_space(ct, GUC_CTB_HXG_MSG_MAX_LEN); - spin_unlock_irqrestore(&ct->ctbs.send.lock, flags); + spin_unlock_irqrestore(&ctb->lock, flags); if (unlikely(err)) goto unlink; @@ -529,24 +721,25 @@ static int ct_send(struct intel_guc_ct *ct, intel_guc_notify(ct_to_guc(ct)); err = wait_for_ct_request_update(&request, status); + g2h_release_space(ct, GUC_CTB_HXG_MSG_MAX_LEN); if (unlikely(err)) goto unlink; - if (!INTEL_GUC_MSG_IS_RESPONSE_SUCCESS(*status)) { + if (FIELD_GET(GUC_HXG_MSG_0_TYPE, *status) != GUC_HXG_TYPE_RESPONSE_SUCCESS) { err = -EIO; goto unlink; } if (response_buf) { /* There shall be no data in the status */ - WARN_ON(INTEL_GUC_MSG_TO_DATA(request.status)); + WARN_ON(FIELD_GET(GUC_HXG_RESPONSE_MSG_0_DATA0, request.status)); /* Return actual response len */ err = request.response_len; } else { /* There shall be no response payload */ WARN_ON(request.response_len); /* Return data decoded from the status dword */ - err = INTEL_GUC_MSG_TO_DATA(*status); + err = FIELD_GET(GUC_HXG_RESPONSE_MSG_0_DATA0, *status); } unlink: @@ -561,16 +754,25 @@ unlink: * Command Transport (CT) buffer based GuC send function. */ int intel_guc_ct_send(struct intel_guc_ct *ct, const u32 *action, u32 len, - u32 *response_buf, u32 response_buf_size) + u32 *response_buf, u32 response_buf_size, u32 flags) { u32 status = ~0; /* undefined */ int ret; if (unlikely(!ct->enabled)) { - WARN(1, "Unexpected send: action=%#x\n", *action); + struct intel_guc *guc = ct_to_guc(ct); + struct intel_uc *uc = container_of(guc, struct intel_uc, guc); + + WARN(!uc->reset_in_progress, "Unexpected send: action=%#x\n", *action); return -ENODEV; } + if (unlikely(ct->ctbs.send.broken)) + return -EPIPE; + + if (flags & INTEL_GUC_CT_SEND_NB) + return ct_send_nb(ct, action, len, flags); + ret = ct_send(ct, action, len, response_buf, response_buf_size, &status); if (unlikely(ret < 0)) { CT_ERROR(ct, "Sending action %#x failed (err=%d status=%#X)\n", @@ -583,21 +785,6 @@ int intel_guc_ct_send(struct intel_guc_ct *ct, const u32 *action, u32 len, return ret; } -static inline unsigned int ct_header_get_len(u32 header) -{ - return (header >> GUC_CT_MSG_LEN_SHIFT) & GUC_CT_MSG_LEN_MASK; -} - -static inline unsigned int ct_header_get_action(u32 header) -{ - return (header >> GUC_CT_MSG_ACTION_SHIFT) & GUC_CT_MSG_ACTION_MASK; -} - -static inline bool ct_header_is_response(u32 header) -{ - return !!(header & GUC_CT_MSG_IS_RESPONSE); -} - static struct ct_incoming_msg *ct_alloc_msg(u32 num_dwords) { struct ct_incoming_msg *msg; @@ -621,8 +808,8 @@ static int ct_read(struct intel_guc_ct *ct, struct ct_incoming_msg **msg) { struct intel_guc_ct_buffer *ctb = &ct->ctbs.recv; struct guc_ct_buffer_desc *desc = ctb->desc; - u32 head = desc->head; - u32 tail = desc->tail; + u32 head = ctb->head; + u32 tail = READ_ONCE(desc->tail); u32 size = ctb->size; u32 *cmds = ctb->cmds; s32 available; @@ -630,17 +817,28 @@ static int ct_read(struct intel_guc_ct *ct, struct ct_incoming_msg **msg) unsigned int i; u32 header; - if (unlikely(desc->is_in_error)) + if (unlikely(ctb->broken)) return -EPIPE; - if (unlikely(!IS_ALIGNED(head | tail, 4) || - (tail | head) >= size)) + if (unlikely(desc->status)) goto corrupted; - /* later calculations will be done in dwords */ - head /= 4; - tail /= 4; - size /= 4; + GEM_BUG_ON(head > size); + +#ifdef CONFIG_DRM_I915_DEBUG_GUC + if (unlikely(head != READ_ONCE(desc->head))) { + CT_ERROR(ct, "Head was modified %u != %u\n", + desc->head, head); + desc->status |= GUC_CTB_STATUS_MISMATCH; + goto corrupted; + } +#endif + if (unlikely(tail >= size)) { + CT_ERROR(ct, "Invalid tail offset %u >= %u)\n", + tail, size); + desc->status |= GUC_CTB_STATUS_OVERFLOW; + goto corrupted; + } /* tail == head condition indicates empty */ available = tail - head; @@ -652,14 +850,14 @@ static int ct_read(struct intel_guc_ct *ct, struct ct_incoming_msg **msg) /* beware of buffer wrap case */ if (unlikely(available < 0)) available += size; - CT_DEBUG(ct, "available %d (%u:%u)\n", available, head, tail); + CT_DEBUG(ct, "available %d (%u:%u:%u)\n", available, head, tail, size); GEM_BUG_ON(available < 0); header = cmds[head]; head = (head + 1) % size; /* message len with header */ - len = ct_header_get_len(header) + 1; + len = FIELD_GET(GUC_CTB_MSG_0_NUM_DWORDS, header) + GUC_CTB_MSG_MIN_LEN; if (unlikely(len > (u32)available)) { CT_ERROR(ct, "Incomplete message %*ph %*ph %*ph\n", 4, &header, @@ -667,6 +865,7 @@ static int ct_read(struct intel_guc_ct *ct, struct ct_incoming_msg **msg) size - head : available - 1), &cmds[head], 4 * (head + available - 1 > size ? available - 1 - size + head : 0), &cmds[0]); + desc->status |= GUC_CTB_STATUS_UNDERFLOW; goto corrupted; } @@ -689,65 +888,39 @@ static int ct_read(struct intel_guc_ct *ct, struct ct_incoming_msg **msg) } CT_DEBUG(ct, "received %*ph\n", 4 * len, (*msg)->msg); - desc->head = head * 4; + /* update local copies */ + ctb->head = head; + + /* now update descriptor */ + WRITE_ONCE(desc->head, head); + return available - len; corrupted: - CT_ERROR(ct, "Corrupted descriptor addr=%#x head=%u tail=%u size=%u\n", - desc->addr, desc->head, desc->tail, desc->size); - desc->is_in_error = 1; + CT_ERROR(ct, "Corrupted descriptor head=%u tail=%u status=%#x\n", + desc->head, desc->tail, desc->status); + ctb->broken = true; return -EPIPE; } -/** - * DOC: CTB GuC to Host response - * - * Format of the CTB GuC to Host response message is as follows:: - * - * +------------+---------+---------+---------+---------+---------+ - * | msg[0] | [1] | [2] | [3] | ... | [n-1] | - * +------------+---------+---------+---------+---------+---------+ - * | MESSAGE | MESSAGE PAYLOAD | - * + HEADER +---------+---------+---------+---------+---------+ - * | | 0 | 1 | 2 | ... | n | - * +============+=========+=========+=========+=========+=========+ - * | len >= 2 | FENCE | STATUS | response specific data | - * +------+-----+---------+---------+---------+---------+---------+ - * - * ^-----------------------len-----------------------^ - */ - static int ct_handle_response(struct intel_guc_ct *ct, struct ct_incoming_msg *response) { - u32 header = response->msg[0]; - u32 len = ct_header_get_len(header); - u32 fence; - u32 status; - u32 datalen; + u32 len = FIELD_GET(GUC_CTB_MSG_0_NUM_DWORDS, response->msg[0]); + u32 fence = FIELD_GET(GUC_CTB_MSG_0_FENCE, response->msg[0]); + const u32 *hxg = &response->msg[GUC_CTB_MSG_MIN_LEN]; + const u32 *data = &hxg[GUC_HXG_MSG_MIN_LEN]; + u32 datalen = len - GUC_HXG_MSG_MIN_LEN; struct ct_request *req; unsigned long flags; bool found = false; int err = 0; - GEM_BUG_ON(!ct_header_is_response(header)); - - /* Response payload shall at least include fence and status */ - if (unlikely(len < 2)) { - CT_ERROR(ct, "Corrupted response (len %u)\n", len); - return -EPROTO; - } - - fence = response->msg[1]; - status = response->msg[2]; - datalen = len - 2; - - /* Format of the status follows RESPONSE message */ - if (unlikely(!INTEL_GUC_MSG_IS_RESPONSE(status))) { - CT_ERROR(ct, "Corrupted response (status %#x)\n", status); - return -EPROTO; - } + GEM_BUG_ON(len < GUC_HXG_MSG_MIN_LEN); + GEM_BUG_ON(FIELD_GET(GUC_HXG_MSG_0_ORIGIN, hxg[0]) != GUC_HXG_ORIGIN_GUC); + GEM_BUG_ON(FIELD_GET(GUC_HXG_MSG_0_TYPE, hxg[0]) != GUC_HXG_TYPE_RESPONSE_SUCCESS && + FIELD_GET(GUC_HXG_MSG_0_TYPE, hxg[0]) != GUC_HXG_TYPE_RESPONSE_FAILURE); - CT_DEBUG(ct, "response fence %u status %#x\n", fence, status); + CT_DEBUG(ct, "response fence %u status %#x\n", fence, hxg[0]); spin_lock_irqsave(&ct->requests.lock, flags); list_for_each_entry(req, &ct->requests.pending, link) { @@ -763,18 +936,22 @@ static int ct_handle_response(struct intel_guc_ct *ct, struct ct_incoming_msg *r err = -EMSGSIZE; } if (datalen) - memcpy(req->response_buf, response->msg + 3, 4 * datalen); + memcpy(req->response_buf, data, 4 * datalen); req->response_len = datalen; - WRITE_ONCE(req->status, status); + WRITE_ONCE(req->status, hxg[0]); found = true; break; } - spin_unlock_irqrestore(&ct->requests.lock, flags); - if (!found) { CT_ERROR(ct, "Unsolicited response (fence %u)\n", fence); - return -ENOKEY; + CT_ERROR(ct, "Could not find fence=%u, last_fence=%u\n", fence, + ct->requests.last_fence); + list_for_each_entry(req, &ct->requests.pending, link) + CT_ERROR(ct, "request %u awaits response\n", + req->fence); + err = -ENOKEY; } + spin_unlock_irqrestore(&ct->requests.lock, flags); if (unlikely(err)) return err; @@ -786,14 +963,16 @@ static int ct_handle_response(struct intel_guc_ct *ct, struct ct_incoming_msg *r static int ct_process_request(struct intel_guc_ct *ct, struct ct_incoming_msg *request) { struct intel_guc *guc = ct_to_guc(ct); - u32 header, action, len; + const u32 *hxg; const u32 *payload; + u32 hxg_len, action, len; int ret; - header = request->msg[0]; - payload = &request->msg[1]; - action = ct_header_get_action(header); - len = ct_header_get_len(header); + hxg = &request->msg[GUC_CTB_MSG_MIN_LEN]; + hxg_len = request->size - GUC_CTB_MSG_MIN_LEN; + payload = &hxg[GUC_HXG_MSG_MIN_LEN]; + action = FIELD_GET(GUC_HXG_EVENT_MSG_0_ACTION, hxg[0]); + len = hxg_len - GUC_HXG_MSG_MIN_LEN; CT_DEBUG(ct, "request %x %*ph\n", action, 4 * len, payload); @@ -801,6 +980,19 @@ static int ct_process_request(struct intel_guc_ct *ct, struct ct_incoming_msg *r case INTEL_GUC_ACTION_DEFAULT: ret = intel_guc_to_host_process_recv_msg(guc, payload, len); break; + case INTEL_GUC_ACTION_DEREGISTER_CONTEXT_DONE: + ret = intel_guc_deregister_done_process_msg(guc, payload, + len); + break; + case INTEL_GUC_ACTION_SCHED_CONTEXT_MODE_DONE: + ret = intel_guc_sched_done_process_msg(guc, payload, len); + break; + case INTEL_GUC_ACTION_CONTEXT_RESET_NOTIFICATION: + ret = intel_guc_context_reset_process_msg(guc, payload, len); + break; + case INTEL_GUC_ACTION_ENGINE_FAILURE_NOTIFICATION: + ret = intel_guc_engine_failure_process_msg(guc, payload, len); + break; default: ret = -EOPNOTSUPP; break; @@ -855,29 +1047,24 @@ static void ct_incoming_request_worker_func(struct work_struct *w) queue_work(system_unbound_wq, &ct->requests.worker); } -/** - * DOC: CTB GuC to Host request - * - * Format of the CTB GuC to Host request message is as follows:: - * - * +------------+---------+---------+---------+---------+---------+ - * | msg[0] | [1] | [2] | [3] | ... | [n-1] | - * +------------+---------+---------+---------+---------+---------+ - * | MESSAGE | MESSAGE PAYLOAD | - * + HEADER +---------+---------+---------+---------+---------+ - * | | 0 | 1 | 2 | ... | n | - * +============+=========+=========+=========+=========+=========+ - * | len | request specific data | - * +------+-----+---------+---------+---------+---------+---------+ - * - * ^-----------------------len-----------------------^ - */ - -static int ct_handle_request(struct intel_guc_ct *ct, struct ct_incoming_msg *request) +static int ct_handle_event(struct intel_guc_ct *ct, struct ct_incoming_msg *request) { + const u32 *hxg = &request->msg[GUC_CTB_MSG_MIN_LEN]; + u32 action = FIELD_GET(GUC_HXG_EVENT_MSG_0_ACTION, hxg[0]); unsigned long flags; - GEM_BUG_ON(ct_header_is_response(request->msg[0])); + GEM_BUG_ON(FIELD_GET(GUC_HXG_MSG_0_TYPE, hxg[0]) != GUC_HXG_TYPE_EVENT); + + /* + * Adjusting the space must be done in IRQ or deadlock can occur as the + * CTB processing in the below workqueue can send CTBs which creates a + * circular dependency if the space was returned there. + */ + switch (action) { + case INTEL_GUC_ACTION_SCHED_CONTEXT_MODE_DONE: + case INTEL_GUC_ACTION_DEREGISTER_CONTEXT_DONE: + g2h_release_space(ct, request->size); + } spin_lock_irqsave(&ct->requests.lock, flags); list_add_tail(&request->link, &ct->requests.incoming); @@ -887,15 +1074,53 @@ static int ct_handle_request(struct intel_guc_ct *ct, struct ct_incoming_msg *re return 0; } -static void ct_handle_msg(struct intel_guc_ct *ct, struct ct_incoming_msg *msg) +static int ct_handle_hxg(struct intel_guc_ct *ct, struct ct_incoming_msg *msg) { - u32 header = msg->msg[0]; + u32 origin, type; + u32 *hxg; int err; - if (ct_header_is_response(header)) + if (unlikely(msg->size < GUC_CTB_HXG_MSG_MIN_LEN)) + return -EBADMSG; + + hxg = &msg->msg[GUC_CTB_MSG_MIN_LEN]; + + origin = FIELD_GET(GUC_HXG_MSG_0_ORIGIN, hxg[0]); + if (unlikely(origin != GUC_HXG_ORIGIN_GUC)) { + err = -EPROTO; + goto failed; + } + + type = FIELD_GET(GUC_HXG_MSG_0_TYPE, hxg[0]); + switch (type) { + case GUC_HXG_TYPE_EVENT: + err = ct_handle_event(ct, msg); + break; + case GUC_HXG_TYPE_RESPONSE_SUCCESS: + case GUC_HXG_TYPE_RESPONSE_FAILURE: err = ct_handle_response(ct, msg); + break; + default: + err = -EOPNOTSUPP; + } + + if (unlikely(err)) { +failed: + CT_ERROR(ct, "Failed to handle HXG message (%pe) %*ph\n", + ERR_PTR(err), 4 * GUC_HXG_MSG_MIN_LEN, hxg); + } + return err; +} + +static void ct_handle_msg(struct intel_guc_ct *ct, struct ct_incoming_msg *msg) +{ + u32 format = FIELD_GET(GUC_CTB_MSG_0_FORMAT, msg->msg[0]); + int err; + + if (format == GUC_CTB_FORMAT_HXG) + err = ct_handle_hxg(ct, msg); else - err = ct_handle_request(ct, msg); + err = -EOPNOTSUPP; if (unlikely(err)) { CT_ERROR(ct, "Failed to process CT message (%pe) %*ph\n", @@ -958,3 +1183,25 @@ void intel_guc_ct_event_handler(struct intel_guc_ct *ct) ct_try_receive_message(ct); } + +void intel_guc_ct_print_info(struct intel_guc_ct *ct, + struct drm_printer *p) +{ + drm_printf(p, "CT %s\n", enableddisabled(ct->enabled)); + + if (!ct->enabled) + return; + + drm_printf(p, "H2G Space: %u\n", + atomic_read(&ct->ctbs.send.space) * 4); + drm_printf(p, "Head: %u\n", + ct->ctbs.send.desc->head); + drm_printf(p, "Tail: %u\n", + ct->ctbs.send.desc->tail); + drm_printf(p, "G2H Space: %u\n", + atomic_read(&ct->ctbs.recv.space) * 4); + drm_printf(p, "Head: %u\n", + ct->ctbs.recv.desc->head); + drm_printf(p, "Tail: %u\n", + ct->ctbs.recv.desc->tail); +} diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h index cb222f202301..f709a19c7e21 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h @@ -9,11 +9,14 @@ #include <linux/interrupt.h> #include <linux/spinlock.h> #include <linux/workqueue.h> +#include <linux/ktime.h> +#include <linux/wait.h> #include "intel_guc_fwif.h" struct i915_vma; struct intel_guc; +struct drm_printer; /** * DOC: Command Transport (CT). @@ -31,16 +34,25 @@ struct intel_guc; * @lock: protects access to the commands buffer and buffer descriptor * @desc: pointer to the buffer descriptor * @cmds: pointer to the commands buffer - * @size: size of the commands buffer + * @size: size of the commands buffer in dwords + * @resv_space: reserved space in buffer in dwords + * @head: local shadow copy of head in dwords + * @tail: local shadow copy of tail in dwords + * @space: local shadow copy of space in dwords + * @broken: flag to indicate if descriptor data is broken */ struct intel_guc_ct_buffer { spinlock_t lock; struct guc_ct_buffer_desc *desc; u32 *cmds; u32 size; + u32 resv_space; + u32 tail; + u32 head; + atomic_t space; + bool broken; }; - /** Top-level structure for Command Transport related data * * Includes a pair of CT buffers for bi-directional communication and tracking @@ -58,8 +70,11 @@ struct intel_guc_ct { struct tasklet_struct receive_tasklet; + /** @wq: wait queue for g2h chanenl */ + wait_queue_head_t wq; + struct { - u32 last_fence; /* last fence used to send request */ + u16 last_fence; /* last fence used to send request */ spinlock_t lock; /* protects pending requests list */ struct list_head pending; /* requests waiting for response */ @@ -67,6 +82,9 @@ struct intel_guc_ct { struct list_head incoming; /* incoming requests */ struct work_struct worker; /* handler for incoming requests */ } requests; + + /** @stall_time: time of first time a CTB submission is stalled */ + ktime_t stall_time; }; void intel_guc_ct_init_early(struct intel_guc_ct *ct); @@ -85,8 +103,18 @@ static inline bool intel_guc_ct_enabled(struct intel_guc_ct *ct) return ct->enabled; } +#define INTEL_GUC_CT_SEND_NB BIT(31) +#define INTEL_GUC_CT_SEND_G2H_DW_SHIFT 0 +#define INTEL_GUC_CT_SEND_G2H_DW_MASK (0xff << INTEL_GUC_CT_SEND_G2H_DW_SHIFT) +#define MAKE_SEND_FLAGS(len) ({ \ + typeof(len) len_ = (len); \ + GEM_BUG_ON(!FIELD_FIT(INTEL_GUC_CT_SEND_G2H_DW_MASK, len_)); \ + (FIELD_PREP(INTEL_GUC_CT_SEND_G2H_DW_MASK, len_) | INTEL_GUC_CT_SEND_NB); \ +}) int intel_guc_ct_send(struct intel_guc_ct *ct, const u32 *action, u32 len, - u32 *response_buf, u32 response_buf_size); + u32 *response_buf, u32 response_buf_size, u32 flags); void intel_guc_ct_event_handler(struct intel_guc_ct *ct); +void intel_guc_ct_print_info(struct intel_guc_ct *ct, struct drm_printer *p); + #endif /* _INTEL_GUC_CT_H_ */ diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_debugfs.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_debugfs.c index fe7cb7b29a1e..887c8c8f35db 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_debugfs.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_debugfs.c @@ -9,6 +9,10 @@ #include "intel_guc.h" #include "intel_guc_debugfs.h" #include "intel_guc_log_debugfs.h" +#include "gt/uc/intel_guc_ct.h" +#include "gt/uc/intel_guc_ads.h" +#include "gt/uc/intel_guc_submission.h" +#include "gt/uc/intel_guc_slpc.h" static int guc_info_show(struct seq_file *m, void *data) { @@ -22,16 +26,57 @@ static int guc_info_show(struct seq_file *m, void *data) drm_puts(&p, "\n"); intel_guc_log_info(&guc->log, &p); - /* Add more as required ... */ + if (!intel_guc_submission_is_used(guc)) + return 0; + + intel_guc_ct_print_info(&guc->ct, &p); + intel_guc_submission_print_info(guc, &p); + intel_guc_ads_print_policy_info(guc, &p); return 0; } DEFINE_GT_DEBUGFS_ATTRIBUTE(guc_info); +static int guc_registered_contexts_show(struct seq_file *m, void *data) +{ + struct intel_guc *guc = m->private; + struct drm_printer p = drm_seq_file_printer(m); + + if (!intel_guc_submission_is_used(guc)) + return -ENODEV; + + intel_guc_submission_print_context_info(guc, &p); + + return 0; +} +DEFINE_GT_DEBUGFS_ATTRIBUTE(guc_registered_contexts); + +static int guc_slpc_info_show(struct seq_file *m, void *unused) +{ + struct intel_guc *guc = m->private; + struct intel_guc_slpc *slpc = &guc->slpc; + struct drm_printer p = drm_seq_file_printer(m); + + if (!intel_guc_slpc_is_used(guc)) + return -ENODEV; + + return intel_guc_slpc_print_info(slpc, &p); +} +DEFINE_GT_DEBUGFS_ATTRIBUTE(guc_slpc_info); + +static bool intel_eval_slpc_support(void *data) +{ + struct intel_guc *guc = (struct intel_guc *)data; + + return intel_guc_slpc_is_used(guc); +} + void intel_guc_debugfs_register(struct intel_guc *guc, struct dentry *root) { static const struct debugfs_gt_file files[] = { { "guc_info", &guc_info_fops, NULL }, + { "guc_registered_contexts", &guc_registered_contexts_fops, NULL }, + { "guc_slpc_info", &guc_slpc_info_fops, &intel_eval_slpc_support}, }; if (!intel_guc_is_supported(guc)) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h index e9a9d85e2aa3..fa4be13c8854 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h @@ -12,19 +12,27 @@ #include "gt/intel_engine_types.h" #include "abi/guc_actions_abi.h" +#include "abi/guc_actions_slpc_abi.h" #include "abi/guc_errors_abi.h" #include "abi/guc_communication_mmio_abi.h" #include "abi/guc_communication_ctb_abi.h" #include "abi/guc_messages_abi.h" +/* Payload length only i.e. don't include G2H header length */ +#define G2H_LEN_DW_SCHED_CONTEXT_MODE_SET 2 +#define G2H_LEN_DW_DEREGISTER_CONTEXT 1 + +#define GUC_CONTEXT_DISABLE 0 +#define GUC_CONTEXT_ENABLE 1 + #define GUC_CLIENT_PRIORITY_KMD_HIGH 0 #define GUC_CLIENT_PRIORITY_HIGH 1 #define GUC_CLIENT_PRIORITY_KMD_NORMAL 2 #define GUC_CLIENT_PRIORITY_NORMAL 3 #define GUC_CLIENT_PRIORITY_NUM 4 -#define GUC_MAX_STAGE_DESCRIPTORS 1024 -#define GUC_INVALID_STAGE_ID GUC_MAX_STAGE_DESCRIPTORS +#define GUC_MAX_LRC_DESCRIPTORS 65535 +#define GUC_INVALID_LRC_ID GUC_MAX_LRC_DESCRIPTORS #define GUC_RENDER_ENGINE 0 #define GUC_VIDEO_ENGINE 1 @@ -81,15 +89,14 @@ #define GUC_LOG_ALLOC_IN_MEGABYTE (1 << 3) #define GUC_LOG_CRASH_SHIFT 4 #define GUC_LOG_CRASH_MASK (0x3 << GUC_LOG_CRASH_SHIFT) -#define GUC_LOG_DPC_SHIFT 6 -#define GUC_LOG_DPC_MASK (0x7 << GUC_LOG_DPC_SHIFT) -#define GUC_LOG_ISR_SHIFT 9 -#define GUC_LOG_ISR_MASK (0x7 << GUC_LOG_ISR_SHIFT) +#define GUC_LOG_DEBUG_SHIFT 6 +#define GUC_LOG_DEBUG_MASK (0xF << GUC_LOG_DEBUG_SHIFT) #define GUC_LOG_BUF_ADDR_SHIFT 12 #define GUC_CTL_WA 1 #define GUC_CTL_FEATURE 2 #define GUC_CTL_DISABLE_SCHEDULER (1 << 14) +#define GUC_CTL_ENABLE_SLPC BIT(2) #define GUC_CTL_DEBUG 3 #define GUC_LOG_VERBOSITY_SHIFT 0 @@ -136,6 +143,11 @@ #define GUC_ID_TO_ENGINE_INSTANCE(guc_id) \ (((guc_id) & GUC_ENGINE_INSTANCE_MASK) >> GUC_ENGINE_INSTANCE_SHIFT) +#define SLPC_EVENT(id, c) (\ +FIELD_PREP(HOST2GUC_PC_SLPC_REQUEST_MSG_1_EVENT_ID, id) | \ +FIELD_PREP(HOST2GUC_PC_SLPC_REQUEST_MSG_1_EVENT_ARGC, c) \ +) + static inline u8 engine_class_to_guc_class(u8 class) { BUILD_BUG_ON(GUC_RENDER_CLASS != RENDER_CLASS); @@ -177,66 +189,40 @@ struct guc_process_desc { u32 reserved[30]; } __packed; -/* engine id and context id is packed into guc_execlist_context.context_id*/ -#define GUC_ELC_CTXID_OFFSET 0 -#define GUC_ELC_ENGINE_OFFSET 29 +#define CONTEXT_REGISTRATION_FLAG_KMD BIT(0) -/* The execlist context including software and HW information */ -struct guc_execlist_context { - u32 context_desc; - u32 context_id; - u32 ring_status; - u32 ring_lrca; - u32 ring_begin; - u32 ring_end; - u32 ring_next_free_location; - u32 ring_current_tail_pointer_value; - u8 engine_state_submit_value; - u8 engine_state_wait_value; - u16 pagefault_count; - u16 engine_submit_queue_count; -} __packed; +#define CONTEXT_POLICY_DEFAULT_EXECUTION_QUANTUM_US 1000000 +#define CONTEXT_POLICY_DEFAULT_PREEMPTION_TIME_US 500000 + +/* Preempt to idle on quantum expiry */ +#define CONTEXT_POLICY_FLAG_PREEMPT_TO_IDLE BIT(0) /* - * This structure describes a stage set arranged for a particular communication - * between uKernel (GuC) and Driver (KMD). Technically, this is known as a - * "GuC Context descriptor" in the specs, but we use the term "stage descriptor" - * to avoid confusion with all the other things already named "context" in the - * driver. A static pool of these descriptors are stored inside a GEM object - * (stage_desc_pool) which is held for the entire lifetime of our interaction - * with the GuC, being allocated before the GuC is loaded with its firmware. + * GuC Context registration descriptor. + * FIXME: This is only required to exist during context registration. + * The current 1:1 between guc_lrc_desc and LRCs for the lifetime of the LRC + * is not required. */ -struct guc_stage_desc { - u32 sched_common_area; - u32 stage_id; - u32 pas_id; - u8 engines_used; - u64 db_trigger_cpu; - u32 db_trigger_uk; - u64 db_trigger_phy; - u16 db_id; - - struct guc_execlist_context lrc[GUC_MAX_ENGINES_NUM]; - - u8 attribute; - +struct guc_lrc_desc { + u32 hw_context_desc; + u32 slpm_perf_mode_hint; /* SPLC v1 only */ + u32 slpm_freq_hint; + u32 engine_submit_mask; /* In logical space */ + u8 engine_class; + u8 reserved0[3]; u32 priority; - - u32 wq_sampled_tail_offset; - u32 wq_total_submit_enqueues; - u32 process_desc; u32 wq_addr; u32 wq_size; - - u32 engine_presence; - - u8 engine_suspended; - - u8 reserved0[3]; - u64 reserved1[1]; - - u64 desc_private; + u32 context_flags; /* CONTEXT_REGISTRATION_* */ + /* Time for one workload to execute. (in micro seconds) */ + u32 execution_quantum; + /* Time to wait for a preemption request to complete before issuing a + * reset. (in micro seconds). + */ + u32 preemption_timeout; + u32 policy_flags; /* CONTEXT_POLICY_* */ + u32 reserved1[19]; } __packed; #define GUC_POWER_UNSPECIFIED 0 @@ -247,32 +233,14 @@ struct guc_stage_desc { /* Scheduling policy settings */ -/* Reset engine upon preempt failure */ -#define POLICY_RESET_ENGINE (1<<0) -/* Preempt to idle on quantum expiry */ -#define POLICY_PREEMPT_TO_IDLE (1<<1) +#define GLOBAL_POLICY_MAX_NUM_WI 15 -#define POLICY_MAX_NUM_WI 15 -#define POLICY_DEFAULT_DPC_PROMOTE_TIME_US 500000 -#define POLICY_DEFAULT_EXECUTION_QUANTUM_US 1000000 -#define POLICY_DEFAULT_PREEMPTION_TIME_US 500000 -#define POLICY_DEFAULT_FAULT_TIME_US 250000 +/* Don't reset an engine upon preemption failure */ +#define GLOBAL_POLICY_DISABLE_ENGINE_RESET BIT(0) -struct guc_policy { - /* Time for one workload to execute. (in micro seconds) */ - u32 execution_quantum; - /* Time to wait for a preemption request to completed before issuing a - * reset. (in micro seconds). */ - u32 preemption_time; - /* How much time to allow to run after the first fault is observed. - * Then preempt afterwards. (in micro seconds) */ - u32 fault_time; - u32 policy_flags; - u32 reserved[8]; -} __packed; +#define GLOBAL_POLICY_DEFAULT_DPC_PROMOTE_TIME_US 500000 struct guc_policies { - struct guc_policy policy[GUC_CLIENT_PRIORITY_NUM][GUC_MAX_ENGINE_CLASSES]; u32 submission_queue_depth[GUC_MAX_ENGINE_CLASSES]; /* In micro seconds. How much time to allow before DPC processing is * called back via interrupt (to prevent DPC queue drain starving). @@ -286,6 +254,7 @@ struct guc_policies { * idle. */ u32 max_num_work_items; + u32 global_flags; u32 reserved[4]; } __packed; @@ -311,29 +280,13 @@ struct guc_gt_system_info { u32 generic_gt_sysinfo[GUC_GENERIC_GT_SYSINFO_MAX]; } __packed; -/* Clients info */ -struct guc_ct_pool_entry { - struct guc_ct_buffer_desc desc; - u32 reserved[7]; -} __packed; - -#define GUC_CT_POOL_SIZE 2 - -struct guc_clients_info { - u32 clients_num; - u32 reserved0[13]; - u32 ct_pool_addr; - u32 ct_pool_count; - u32 reserved[4]; -} __packed; - /* GuC Additional Data Struct */ struct guc_ads { struct guc_mmio_reg_set reg_state_list[GUC_MAX_ENGINE_CLASSES][GUC_MAX_INSTANCES_PER_CLASS]; u32 reserved0; u32 scheduler_policies; u32 gt_system_info; - u32 clients_info; + u32 reserved1; u32 control_data; u32 golden_context_lrca[GUC_MAX_ENGINE_CLASSES]; u32 eng_state_size[GUC_MAX_ENGINE_CLASSES]; @@ -344,8 +297,7 @@ struct guc_ads { /* GuC logging structures */ enum guc_log_buffer_type { - GUC_ISR_LOG_BUFFER, - GUC_DPC_LOG_BUFFER, + GUC_DEBUG_LOG_BUFFER, GUC_CRASH_DUMP_LOG_BUFFER, GUC_MAX_LOG_BUFFER }; @@ -414,23 +366,6 @@ struct guc_shared_ctx_data { struct guc_ctx_report preempt_ctx_report[GUC_MAX_ENGINES_NUM]; } __packed; -#define __INTEL_GUC_MSG_GET(T, m) \ - (((m) & INTEL_GUC_MSG_ ## T ## _MASK) >> INTEL_GUC_MSG_ ## T ## _SHIFT) -#define INTEL_GUC_MSG_TO_TYPE(m) __INTEL_GUC_MSG_GET(TYPE, m) -#define INTEL_GUC_MSG_TO_DATA(m) __INTEL_GUC_MSG_GET(DATA, m) -#define INTEL_GUC_MSG_TO_CODE(m) __INTEL_GUC_MSG_GET(CODE, m) - -#define __INTEL_GUC_MSG_TYPE_IS(T, m) \ - (INTEL_GUC_MSG_TO_TYPE(m) == INTEL_GUC_MSG_TYPE_ ## T) -#define INTEL_GUC_MSG_IS_REQUEST(m) __INTEL_GUC_MSG_TYPE_IS(REQUEST, m) -#define INTEL_GUC_MSG_IS_RESPONSE(m) __INTEL_GUC_MSG_TYPE_IS(RESPONSE, m) - -#define INTEL_GUC_MSG_IS_RESPONSE_SUCCESS(m) \ - (typecheck(u32, (m)) && \ - ((m) & (INTEL_GUC_MSG_TYPE_MASK | INTEL_GUC_MSG_CODE_MASK)) == \ - ((INTEL_GUC_MSG_TYPE_RESPONSE << INTEL_GUC_MSG_TYPE_SHIFT) | \ - (INTEL_GUC_RESPONSE_STATUS_SUCCESS << INTEL_GUC_MSG_CODE_SHIFT))) - /* This action will be programmed in C1BC - SOFT_SCRATCH_15_REG */ enum intel_guc_recv_message { INTEL_GUC_RECV_MSG_CRASH_DUMP_POSTED = BIT(1), diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c index c36d5eb5bbb9..ac0931f0374b 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c @@ -197,10 +197,8 @@ static bool guc_check_log_buf_overflow(struct intel_guc_log *log, static unsigned int guc_get_log_buffer_size(enum guc_log_buffer_type type) { switch (type) { - case GUC_ISR_LOG_BUFFER: - return ISR_BUFFER_SIZE; - case GUC_DPC_LOG_BUFFER: - return DPC_BUFFER_SIZE; + case GUC_DEBUG_LOG_BUFFER: + return DEBUG_BUFFER_SIZE; case GUC_CRASH_DUMP_LOG_BUFFER: return CRASH_BUFFER_SIZE; default: @@ -245,7 +243,7 @@ static void guc_read_update_log_buffer(struct intel_guc_log *log) src_data += PAGE_SIZE; dst_data += PAGE_SIZE; - for (type = GUC_ISR_LOG_BUFFER; type < GUC_MAX_LOG_BUFFER; type++) { + for (type = GUC_DEBUG_LOG_BUFFER; type < GUC_MAX_LOG_BUFFER; type++) { /* * Make a copy of the state structure, inside GuC log buffer * (which is uncached mapped), on the stack to avoid reading @@ -463,21 +461,16 @@ int intel_guc_log_create(struct intel_guc_log *log) * +===============================+ 00B * | Crash dump state header | * +-------------------------------+ 32B - * | DPC state header | + * | Debug state header | * +-------------------------------+ 64B - * | ISR state header | - * +-------------------------------+ 96B * | | * +===============================+ PAGE_SIZE (4KB) * | Crash Dump logs | * +===============================+ + CRASH_SIZE - * | DPC logs | - * +===============================+ + DPC_SIZE - * | ISR logs | - * +===============================+ + ISR_SIZE + * | Debug logs | + * +===============================+ + DEBUG_SIZE */ - guc_log_size = PAGE_SIZE + CRASH_BUFFER_SIZE + DPC_BUFFER_SIZE + - ISR_BUFFER_SIZE; + guc_log_size = PAGE_SIZE + CRASH_BUFFER_SIZE + DEBUG_BUFFER_SIZE; vma = intel_guc_allocate_vma(guc, guc_log_size); if (IS_ERR(vma)) { @@ -675,10 +668,8 @@ static const char * stringify_guc_log_type(enum guc_log_buffer_type type) { switch (type) { - case GUC_ISR_LOG_BUFFER: - return "ISR"; - case GUC_DPC_LOG_BUFFER: - return "DPC"; + case GUC_DEBUG_LOG_BUFFER: + return "DEBUG"; case GUC_CRASH_DUMP_LOG_BUFFER: return "CRASH"; default: @@ -708,7 +699,7 @@ void intel_guc_log_info(struct intel_guc_log *log, struct drm_printer *p) drm_printf(p, "\tRelay full count: %u\n", log->relay.full_count); - for (type = GUC_ISR_LOG_BUFFER; type < GUC_MAX_LOG_BUFFER; type++) { + for (type = GUC_DEBUG_LOG_BUFFER; type < GUC_MAX_LOG_BUFFER; type++) { drm_printf(p, "\t%s:\tflush count %10u, overflow count %10u\n", stringify_guc_log_type(type), log->stats[type].flush, diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.h index 11fccd0b2294..ac1ee1d5ce10 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.h @@ -17,12 +17,10 @@ struct intel_guc; #ifdef CONFIG_DRM_I915_DEBUG_GUC #define CRASH_BUFFER_SIZE SZ_2M -#define DPC_BUFFER_SIZE SZ_8M -#define ISR_BUFFER_SIZE SZ_8M +#define DEBUG_BUFFER_SIZE SZ_16M #else #define CRASH_BUFFER_SIZE SZ_8K -#define DPC_BUFFER_SIZE SZ_32K -#define ISR_BUFFER_SIZE SZ_32K +#define DEBUG_BUFFER_SIZE SZ_64K #endif /* diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_rc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_rc.c new file mode 100644 index 000000000000..fc805d466d99 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_rc.c @@ -0,0 +1,80 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2021 Intel Corporation + */ + +#include "intel_guc_rc.h" +#include "gt/intel_gt.h" +#include "i915_drv.h" + +static bool __guc_rc_supported(struct intel_guc *guc) +{ + /* GuC RC is unavailable for pre-Gen12 */ + return guc->submission_supported && + GRAPHICS_VER(guc_to_gt(guc)->i915) >= 12; +} + +static bool __guc_rc_selected(struct intel_guc *guc) +{ + if (!intel_guc_rc_is_supported(guc)) + return false; + + return guc->submission_selected; +} + +void intel_guc_rc_init_early(struct intel_guc *guc) +{ + guc->rc_supported = __guc_rc_supported(guc); + guc->rc_selected = __guc_rc_selected(guc); +} + +static int guc_action_control_gucrc(struct intel_guc *guc, bool enable) +{ + u32 rc_mode = enable ? INTEL_GUCRC_FIRMWARE_CONTROL : + INTEL_GUCRC_HOST_CONTROL; + u32 action[] = { + INTEL_GUC_ACTION_SETUP_PC_GUCRC, + rc_mode + }; + int ret; + + ret = intel_guc_send(guc, action, ARRAY_SIZE(action)); + ret = ret > 0 ? -EPROTO : ret; + + return ret; +} + +static int __guc_rc_control(struct intel_guc *guc, bool enable) +{ + struct intel_gt *gt = guc_to_gt(guc); + struct drm_device *drm = &guc_to_gt(guc)->i915->drm; + int ret; + + if (!intel_uc_uses_guc_rc(>->uc)) + return -EOPNOTSUPP; + + if (!intel_guc_is_ready(guc)) + return -EINVAL; + + ret = guc_action_control_gucrc(guc, enable); + if (ret) { + drm_err(drm, "Failed to %s GuC RC (%pe)\n", + enabledisable(enable), ERR_PTR(ret)); + return ret; + } + + drm_info(>->i915->drm, "GuC RC: %s\n", + enableddisabled(enable)); + + return 0; +} + +int intel_guc_rc_enable(struct intel_guc *guc) +{ + return __guc_rc_control(guc, true); +} + +int intel_guc_rc_disable(struct intel_guc *guc) +{ + return __guc_rc_control(guc, false); +} diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_rc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_rc.h new file mode 100644 index 000000000000..57e86c337838 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_rc.h @@ -0,0 +1,31 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2021 Intel Corporation + */ + +#ifndef _INTEL_GUC_RC_H_ +#define _INTEL_GUC_RC_H_ + +#include "intel_guc_submission.h" + +void intel_guc_rc_init_early(struct intel_guc *guc); + +static inline bool intel_guc_rc_is_supported(struct intel_guc *guc) +{ + return guc->rc_supported; +} + +static inline bool intel_guc_rc_is_wanted(struct intel_guc *guc) +{ + return guc->submission_selected && intel_guc_rc_is_supported(guc); +} + +static inline bool intel_guc_rc_is_used(struct intel_guc *guc) +{ + return intel_guc_submission_is_used(guc) && intel_guc_rc_is_wanted(guc); +} + +int intel_guc_rc_enable(struct intel_guc *guc); +int intel_guc_rc_disable(struct intel_guc *guc); + +#endif diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c new file mode 100644 index 000000000000..65a3e7fdb2b2 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c @@ -0,0 +1,626 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2021 Intel Corporation + */ + +#include "i915_drv.h" +#include "intel_guc_slpc.h" +#include "gt/intel_gt.h" + +static inline struct intel_guc *slpc_to_guc(struct intel_guc_slpc *slpc) +{ + return container_of(slpc, struct intel_guc, slpc); +} + +static inline struct intel_gt *slpc_to_gt(struct intel_guc_slpc *slpc) +{ + return guc_to_gt(slpc_to_guc(slpc)); +} + +static inline struct drm_i915_private *slpc_to_i915(struct intel_guc_slpc *slpc) +{ + return slpc_to_gt(slpc)->i915; +} + +static bool __detect_slpc_supported(struct intel_guc *guc) +{ + /* GuC SLPC is unavailable for pre-Gen12 */ + return guc->submission_supported && + GRAPHICS_VER(guc_to_gt(guc)->i915) >= 12; +} + +static bool __guc_slpc_selected(struct intel_guc *guc) +{ + if (!intel_guc_slpc_is_supported(guc)) + return false; + + return guc->submission_selected; +} + +void intel_guc_slpc_init_early(struct intel_guc_slpc *slpc) +{ + struct intel_guc *guc = slpc_to_guc(slpc); + + slpc->supported = __detect_slpc_supported(guc); + slpc->selected = __guc_slpc_selected(guc); +} + +static void slpc_mem_set_param(struct slpc_shared_data *data, + u32 id, u32 value) +{ + GEM_BUG_ON(id >= SLPC_MAX_OVERRIDE_PARAMETERS); + /* + * When the flag bit is set, corresponding value will be read + * and applied by SLPC. + */ + data->override_params.bits[id >> 5] |= (1 << (id % 32)); + data->override_params.values[id] = value; +} + +static void slpc_mem_set_enabled(struct slpc_shared_data *data, + u8 enable_id, u8 disable_id) +{ + /* + * Enabling a param involves setting the enable_id + * to 1 and disable_id to 0. + */ + slpc_mem_set_param(data, enable_id, 1); + slpc_mem_set_param(data, disable_id, 0); +} + +static void slpc_mem_set_disabled(struct slpc_shared_data *data, + u8 enable_id, u8 disable_id) +{ + /* + * Disabling a param involves setting the enable_id + * to 0 and disable_id to 1. + */ + slpc_mem_set_param(data, disable_id, 1); + slpc_mem_set_param(data, enable_id, 0); +} + +int intel_guc_slpc_init(struct intel_guc_slpc *slpc) +{ + struct intel_guc *guc = slpc_to_guc(slpc); + struct drm_i915_private *i915 = slpc_to_i915(slpc); + u32 size = PAGE_ALIGN(sizeof(struct slpc_shared_data)); + int err; + + GEM_BUG_ON(slpc->vma); + + err = intel_guc_allocate_and_map_vma(guc, size, &slpc->vma, (void **)&slpc->vaddr); + if (unlikely(err)) { + drm_err(&i915->drm, + "Failed to allocate SLPC struct (err=%pe)\n", + ERR_PTR(err)); + return err; + } + + slpc->max_freq_softlimit = 0; + slpc->min_freq_softlimit = 0; + + return err; +} + +static u32 slpc_get_state(struct intel_guc_slpc *slpc) +{ + struct slpc_shared_data *data; + + GEM_BUG_ON(!slpc->vma); + + drm_clflush_virt_range(slpc->vaddr, sizeof(u32)); + data = slpc->vaddr; + + return data->header.global_state; +} + +static int guc_action_slpc_set_param(struct intel_guc *guc, u8 id, u32 value) +{ + u32 request[] = { + GUC_ACTION_HOST2GUC_PC_SLPC_REQUEST, + SLPC_EVENT(SLPC_EVENT_PARAMETER_SET, 2), + id, + value, + }; + int ret; + + ret = intel_guc_send(guc, request, ARRAY_SIZE(request)); + + return ret > 0 ? -EPROTO : ret; +} + +static int guc_action_slpc_unset_param(struct intel_guc *guc, u8 id) +{ + u32 request[] = { + GUC_ACTION_HOST2GUC_PC_SLPC_REQUEST, + SLPC_EVENT(SLPC_EVENT_PARAMETER_UNSET, 2), + id, + }; + + return intel_guc_send(guc, request, ARRAY_SIZE(request)); +} + +static bool slpc_is_running(struct intel_guc_slpc *slpc) +{ + return slpc_get_state(slpc) == SLPC_GLOBAL_STATE_RUNNING; +} + +static int guc_action_slpc_query(struct intel_guc *guc, u32 offset) +{ + u32 request[] = { + GUC_ACTION_HOST2GUC_PC_SLPC_REQUEST, + SLPC_EVENT(SLPC_EVENT_QUERY_TASK_STATE, 2), + offset, + 0, + }; + int ret; + + ret = intel_guc_send(guc, request, ARRAY_SIZE(request)); + + return ret > 0 ? -EPROTO : ret; +} + +static int slpc_query_task_state(struct intel_guc_slpc *slpc) +{ + struct intel_guc *guc = slpc_to_guc(slpc); + struct drm_i915_private *i915 = slpc_to_i915(slpc); + u32 offset = intel_guc_ggtt_offset(guc, slpc->vma); + int ret; + + ret = guc_action_slpc_query(guc, offset); + if (unlikely(ret)) + drm_err(&i915->drm, "Failed to query task state (%pe)\n", + ERR_PTR(ret)); + + drm_clflush_virt_range(slpc->vaddr, SLPC_PAGE_SIZE_BYTES); + + return ret; +} + +static int slpc_set_param(struct intel_guc_slpc *slpc, u8 id, u32 value) +{ + struct intel_guc *guc = slpc_to_guc(slpc); + struct drm_i915_private *i915 = slpc_to_i915(slpc); + int ret; + + GEM_BUG_ON(id >= SLPC_MAX_PARAM); + + ret = guc_action_slpc_set_param(guc, id, value); + if (ret) + drm_err(&i915->drm, "Failed to set param %d to %u (%pe)\n", + id, value, ERR_PTR(ret)); + + return ret; +} + +static int slpc_unset_param(struct intel_guc_slpc *slpc, + u8 id) +{ + struct intel_guc *guc = slpc_to_guc(slpc); + + GEM_BUG_ON(id >= SLPC_MAX_PARAM); + + return guc_action_slpc_unset_param(guc, id); +} + +static const char *slpc_global_state_to_string(enum slpc_global_state state) +{ + switch (state) { + case SLPC_GLOBAL_STATE_NOT_RUNNING: + return "not running"; + case SLPC_GLOBAL_STATE_INITIALIZING: + return "initializing"; + case SLPC_GLOBAL_STATE_RESETTING: + return "resetting"; + case SLPC_GLOBAL_STATE_RUNNING: + return "running"; + case SLPC_GLOBAL_STATE_SHUTTING_DOWN: + return "shutting down"; + case SLPC_GLOBAL_STATE_ERROR: + return "error"; + default: + return "unknown"; + } +} + +static const char *slpc_get_state_string(struct intel_guc_slpc *slpc) +{ + return slpc_global_state_to_string(slpc_get_state(slpc)); +} + +static int guc_action_slpc_reset(struct intel_guc *guc, u32 offset) +{ + u32 request[] = { + GUC_ACTION_HOST2GUC_PC_SLPC_REQUEST, + SLPC_EVENT(SLPC_EVENT_RESET, 2), + offset, + 0, + }; + int ret; + + ret = intel_guc_send(guc, request, ARRAY_SIZE(request)); + + return ret > 0 ? -EPROTO : ret; +} + +static int slpc_reset(struct intel_guc_slpc *slpc) +{ + struct drm_i915_private *i915 = slpc_to_i915(slpc); + struct intel_guc *guc = slpc_to_guc(slpc); + u32 offset = intel_guc_ggtt_offset(guc, slpc->vma); + int ret; + + ret = guc_action_slpc_reset(guc, offset); + + if (unlikely(ret < 0)) { + drm_err(&i915->drm, "SLPC reset action failed (%pe)\n", + ERR_PTR(ret)); + return ret; + } + + if (!ret) { + if (wait_for(slpc_is_running(slpc), SLPC_RESET_TIMEOUT_MS)) { + drm_err(&i915->drm, "SLPC not enabled! State = %s\n", + slpc_get_state_string(slpc)); + return -EIO; + } + } + + return 0; +} + +static u32 slpc_decode_min_freq(struct intel_guc_slpc *slpc) +{ + struct slpc_shared_data *data = slpc->vaddr; + + GEM_BUG_ON(!slpc->vma); + + return DIV_ROUND_CLOSEST(REG_FIELD_GET(SLPC_MIN_UNSLICE_FREQ_MASK, + data->task_state_data.freq) * + GT_FREQUENCY_MULTIPLIER, GEN9_FREQ_SCALER); +} + +static u32 slpc_decode_max_freq(struct intel_guc_slpc *slpc) +{ + struct slpc_shared_data *data = slpc->vaddr; + + GEM_BUG_ON(!slpc->vma); + + return DIV_ROUND_CLOSEST(REG_FIELD_GET(SLPC_MAX_UNSLICE_FREQ_MASK, + data->task_state_data.freq) * + GT_FREQUENCY_MULTIPLIER, GEN9_FREQ_SCALER); +} + +static void slpc_shared_data_reset(struct slpc_shared_data *data) +{ + memset(data, 0, sizeof(struct slpc_shared_data)); + + data->header.size = sizeof(struct slpc_shared_data); + + /* Enable only GTPERF task, disable others */ + slpc_mem_set_enabled(data, SLPC_PARAM_TASK_ENABLE_GTPERF, + SLPC_PARAM_TASK_DISABLE_GTPERF); + + slpc_mem_set_disabled(data, SLPC_PARAM_TASK_ENABLE_BALANCER, + SLPC_PARAM_TASK_DISABLE_BALANCER); + + slpc_mem_set_disabled(data, SLPC_PARAM_TASK_ENABLE_DCC, + SLPC_PARAM_TASK_DISABLE_DCC); +} + +/** + * intel_guc_slpc_set_max_freq() - Set max frequency limit for SLPC. + * @slpc: pointer to intel_guc_slpc. + * @val: frequency (MHz) + * + * This function will invoke GuC SLPC action to update the max frequency + * limit for unslice. + * + * Return: 0 on success, non-zero error code on failure. + */ +int intel_guc_slpc_set_max_freq(struct intel_guc_slpc *slpc, u32 val) +{ + struct drm_i915_private *i915 = slpc_to_i915(slpc); + intel_wakeref_t wakeref; + int ret; + + if (val < slpc->min_freq || + val > slpc->rp0_freq || + val < slpc->min_freq_softlimit) + return -EINVAL; + + with_intel_runtime_pm(&i915->runtime_pm, wakeref) { + ret = slpc_set_param(slpc, + SLPC_PARAM_GLOBAL_MAX_GT_UNSLICE_FREQ_MHZ, + val); + + /* Return standardized err code for sysfs calls */ + if (ret) + ret = -EIO; + } + + if (!ret) + slpc->max_freq_softlimit = val; + + return ret; +} + +/** + * intel_guc_slpc_get_max_freq() - Get max frequency limit for SLPC. + * @slpc: pointer to intel_guc_slpc. + * @val: pointer to val which will hold max frequency (MHz) + * + * This function will invoke GuC SLPC action to read the max frequency + * limit for unslice. + * + * Return: 0 on success, non-zero error code on failure. + */ +int intel_guc_slpc_get_max_freq(struct intel_guc_slpc *slpc, u32 *val) +{ + struct drm_i915_private *i915 = slpc_to_i915(slpc); + intel_wakeref_t wakeref; + int ret = 0; + + with_intel_runtime_pm(&i915->runtime_pm, wakeref) { + /* Force GuC to update task data */ + ret = slpc_query_task_state(slpc); + + if (!ret) + *val = slpc_decode_max_freq(slpc); + } + + return ret; +} + +/** + * intel_guc_slpc_set_min_freq() - Set min frequency limit for SLPC. + * @slpc: pointer to intel_guc_slpc. + * @val: frequency (MHz) + * + * This function will invoke GuC SLPC action to update the min unslice + * frequency. + * + * Return: 0 on success, non-zero error code on failure. + */ +int intel_guc_slpc_set_min_freq(struct intel_guc_slpc *slpc, u32 val) +{ + struct drm_i915_private *i915 = slpc_to_i915(slpc); + intel_wakeref_t wakeref; + int ret; + + if (val < slpc->min_freq || + val > slpc->rp0_freq || + val > slpc->max_freq_softlimit) + return -EINVAL; + + with_intel_runtime_pm(&i915->runtime_pm, wakeref) { + ret = slpc_set_param(slpc, + SLPC_PARAM_GLOBAL_MIN_GT_UNSLICE_FREQ_MHZ, + val); + + /* Return standardized err code for sysfs calls */ + if (ret) + ret = -EIO; + } + + if (!ret) + slpc->min_freq_softlimit = val; + + return ret; +} + +/** + * intel_guc_slpc_get_min_freq() - Get min frequency limit for SLPC. + * @slpc: pointer to intel_guc_slpc. + * @val: pointer to val which will hold min frequency (MHz) + * + * This function will invoke GuC SLPC action to read the min frequency + * limit for unslice. + * + * Return: 0 on success, non-zero error code on failure. + */ +int intel_guc_slpc_get_min_freq(struct intel_guc_slpc *slpc, u32 *val) +{ + struct drm_i915_private *i915 = slpc_to_i915(slpc); + intel_wakeref_t wakeref; + int ret = 0; + + with_intel_runtime_pm(&i915->runtime_pm, wakeref) { + /* Force GuC to update task data */ + ret = slpc_query_task_state(slpc); + + if (!ret) + *val = slpc_decode_min_freq(slpc); + } + + return ret; +} + +void intel_guc_pm_intrmsk_enable(struct intel_gt *gt) +{ + u32 pm_intrmsk_mbz = 0; + + /* + * Allow GuC to receive ARAT timer expiry event. + * This interrupt register is setup by RPS code + * when host based Turbo is enabled. + */ + pm_intrmsk_mbz |= ARAT_EXPIRED_INTRMSK; + + intel_uncore_rmw(gt->uncore, + GEN6_PMINTRMSK, pm_intrmsk_mbz, 0); +} + +static int slpc_set_softlimits(struct intel_guc_slpc *slpc) +{ + int ret = 0; + + /* + * Softlimits are initially equivalent to platform limits + * unless they have deviated from defaults, in which case, + * we retain the values and set min/max accordingly. + */ + if (!slpc->max_freq_softlimit) + slpc->max_freq_softlimit = slpc->rp0_freq; + else if (slpc->max_freq_softlimit != slpc->rp0_freq) + ret = intel_guc_slpc_set_max_freq(slpc, + slpc->max_freq_softlimit); + + if (unlikely(ret)) + return ret; + + if (!slpc->min_freq_softlimit) + slpc->min_freq_softlimit = slpc->min_freq; + else if (slpc->min_freq_softlimit != slpc->min_freq) + return intel_guc_slpc_set_min_freq(slpc, + slpc->min_freq_softlimit); + + return 0; +} + +static int slpc_ignore_eff_freq(struct intel_guc_slpc *slpc, bool ignore) +{ + int ret = 0; + + if (ignore) { + ret = slpc_set_param(slpc, + SLPC_PARAM_IGNORE_EFFICIENT_FREQUENCY, + ignore); + if (!ret) + return slpc_set_param(slpc, + SLPC_PARAM_GLOBAL_MIN_GT_UNSLICE_FREQ_MHZ, + slpc->min_freq); + } else { + ret = slpc_unset_param(slpc, + SLPC_PARAM_IGNORE_EFFICIENT_FREQUENCY); + if (!ret) + return slpc_unset_param(slpc, + SLPC_PARAM_GLOBAL_MIN_GT_UNSLICE_FREQ_MHZ); + } + + return ret; +} + +static int slpc_use_fused_rp0(struct intel_guc_slpc *slpc) +{ + /* Force SLPC to used platform rp0 */ + return slpc_set_param(slpc, + SLPC_PARAM_GLOBAL_MAX_GT_UNSLICE_FREQ_MHZ, + slpc->rp0_freq); +} + +static void slpc_get_rp_values(struct intel_guc_slpc *slpc) +{ + u32 rp_state_cap; + + rp_state_cap = intel_uncore_read(slpc_to_gt(slpc)->uncore, + GEN6_RP_STATE_CAP); + + slpc->rp0_freq = REG_FIELD_GET(RP0_CAP_MASK, rp_state_cap) * + GT_FREQUENCY_MULTIPLIER; + slpc->rp1_freq = REG_FIELD_GET(RP1_CAP_MASK, rp_state_cap) * + GT_FREQUENCY_MULTIPLIER; + slpc->min_freq = REG_FIELD_GET(RPN_CAP_MASK, rp_state_cap) * + GT_FREQUENCY_MULTIPLIER; +} + +/* + * intel_guc_slpc_enable() - Start SLPC + * @slpc: pointer to intel_guc_slpc. + * + * SLPC is enabled by setting up the shared data structure and + * sending reset event to GuC SLPC. Initial data is setup in + * intel_guc_slpc_init. Here we send the reset event. We do + * not currently need a slpc_disable since this is taken care + * of automatically when a reset/suspend occurs and the GuC + * CTB is destroyed. + * + * Return: 0 on success, non-zero error code on failure. + */ +int intel_guc_slpc_enable(struct intel_guc_slpc *slpc) +{ + struct drm_i915_private *i915 = slpc_to_i915(slpc); + int ret; + + GEM_BUG_ON(!slpc->vma); + + slpc_shared_data_reset(slpc->vaddr); + + ret = slpc_reset(slpc); + if (unlikely(ret < 0)) { + drm_err(&i915->drm, "SLPC Reset event returned (%pe)\n", + ERR_PTR(ret)); + return ret; + } + + ret = slpc_query_task_state(slpc); + if (unlikely(ret < 0)) + return ret; + + intel_guc_pm_intrmsk_enable(&i915->gt); + + slpc_get_rp_values(slpc); + + /* Ignore efficient freq and set min to platform min */ + ret = slpc_ignore_eff_freq(slpc, true); + if (unlikely(ret)) { + drm_err(&i915->drm, "Failed to set SLPC min to RPn (%pe)\n", + ERR_PTR(ret)); + return ret; + } + + /* Set SLPC max limit to RP0 */ + ret = slpc_use_fused_rp0(slpc); + if (unlikely(ret)) { + drm_err(&i915->drm, "Failed to set SLPC max to RP0 (%pe)\n", + ERR_PTR(ret)); + return ret; + } + + /* Revert SLPC min/max to softlimits if necessary */ + ret = slpc_set_softlimits(slpc); + if (unlikely(ret)) { + drm_err(&i915->drm, "Failed to set SLPC softlimits (%pe)\n", + ERR_PTR(ret)); + return ret; + } + + return 0; +} + +int intel_guc_slpc_print_info(struct intel_guc_slpc *slpc, struct drm_printer *p) +{ + struct drm_i915_private *i915 = slpc_to_i915(slpc); + struct slpc_shared_data *data = slpc->vaddr; + struct slpc_task_state_data *slpc_tasks; + intel_wakeref_t wakeref; + int ret = 0; + + GEM_BUG_ON(!slpc->vma); + + with_intel_runtime_pm(&i915->runtime_pm, wakeref) { + ret = slpc_query_task_state(slpc); + + if (!ret) { + slpc_tasks = &data->task_state_data; + + drm_printf(p, "\tSLPC state: %s\n", slpc_get_state_string(slpc)); + drm_printf(p, "\tGTPERF task active: %s\n", + yesno(slpc_tasks->status & SLPC_GTPERF_TASK_ENABLED)); + drm_printf(p, "\tMax freq: %u MHz\n", + slpc_decode_max_freq(slpc)); + drm_printf(p, "\tMin freq: %u MHz\n", + slpc_decode_min_freq(slpc)); + } + } + + return ret; +} + +void intel_guc_slpc_fini(struct intel_guc_slpc *slpc) +{ + if (!slpc->vma) + return; + + i915_vma_unpin_and_release(&slpc->vma, I915_VMA_RELEASE_MAP); +} diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h new file mode 100644 index 000000000000..e45054d5b9b4 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h @@ -0,0 +1,42 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2021 Intel Corporation + */ + +#ifndef _INTEL_GUC_SLPC_H_ +#define _INTEL_GUC_SLPC_H_ + +#include "intel_guc_submission.h" +#include "intel_guc_slpc_types.h" + +struct intel_gt; +struct drm_printer; + +static inline bool intel_guc_slpc_is_supported(struct intel_guc *guc) +{ + return guc->slpc.supported; +} + +static inline bool intel_guc_slpc_is_wanted(struct intel_guc *guc) +{ + return guc->slpc.selected; +} + +static inline bool intel_guc_slpc_is_used(struct intel_guc *guc) +{ + return intel_guc_submission_is_used(guc) && intel_guc_slpc_is_wanted(guc); +} + +void intel_guc_slpc_init_early(struct intel_guc_slpc *slpc); + +int intel_guc_slpc_init(struct intel_guc_slpc *slpc); +int intel_guc_slpc_enable(struct intel_guc_slpc *slpc); +void intel_guc_slpc_fini(struct intel_guc_slpc *slpc); +int intel_guc_slpc_set_max_freq(struct intel_guc_slpc *slpc, u32 val); +int intel_guc_slpc_set_min_freq(struct intel_guc_slpc *slpc, u32 val); +int intel_guc_slpc_get_max_freq(struct intel_guc_slpc *slpc, u32 *val); +int intel_guc_slpc_get_min_freq(struct intel_guc_slpc *slpc, u32 *val); +int intel_guc_slpc_print_info(struct intel_guc_slpc *slpc, struct drm_printer *p); +void intel_guc_pm_intrmsk_enable(struct intel_gt *gt); + +#endif diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc_types.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc_types.h new file mode 100644 index 000000000000..41d13527666f --- /dev/null +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc_types.h @@ -0,0 +1,29 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2021 Intel Corporation + */ + +#ifndef _INTEL_GUC_SLPC_TYPES_H_ +#define _INTEL_GUC_SLPC_TYPES_H_ + +#include <linux/types.h> + +#define SLPC_RESET_TIMEOUT_MS 5 + +struct intel_guc_slpc { + struct i915_vma *vma; + struct slpc_shared_data *vaddr; + bool supported; + bool selected; + + /* platform frequency limits */ + u32 min_freq; + u32 rp0_freq; + u32 rp1_freq; + + /* frequency softlimits */ + u32 min_freq_softlimit; + u32 max_freq_softlimit; +}; + +#endif diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index 7c8ff9792f7b..87d8dc8f51b9 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -10,10 +10,13 @@ #include "gt/intel_breadcrumbs.h" #include "gt/intel_context.h" #include "gt/intel_engine_pm.h" +#include "gt/intel_engine_heartbeat.h" #include "gt/intel_gt.h" #include "gt/intel_gt_irq.h" #include "gt/intel_gt_pm.h" +#include "gt/intel_gt_requests.h" #include "gt/intel_lrc.h" +#include "gt/intel_lrc_reg.h" #include "gt/intel_mocs.h" #include "gt/intel_ring.h" @@ -58,244 +61,705 @@ * */ +/* GuC Virtual Engine */ +struct guc_virtual_engine { + struct intel_engine_cs base; + struct intel_context context; +}; + +static struct intel_context * +guc_create_virtual(struct intel_engine_cs **siblings, unsigned int count); + #define GUC_REQUEST_SIZE 64 /* bytes */ -static inline struct i915_priolist *to_priolist(struct rb_node *rb) +/* + * Below is a set of functions which control the GuC scheduling state which do + * not require a lock as all state transitions are mutually exclusive. i.e. It + * is not possible for the context pinning code and submission, for the same + * context, to be executing simultaneously. We still need an atomic as it is + * possible for some of the bits to changing at the same time though. + */ +#define SCHED_STATE_NO_LOCK_ENABLED BIT(0) +#define SCHED_STATE_NO_LOCK_PENDING_ENABLE BIT(1) +#define SCHED_STATE_NO_LOCK_REGISTERED BIT(2) +static inline bool context_enabled(struct intel_context *ce) { - return rb_entry(rb, struct i915_priolist, node); + return (atomic_read(&ce->guc_sched_state_no_lock) & + SCHED_STATE_NO_LOCK_ENABLED); } -static struct guc_stage_desc *__get_stage_desc(struct intel_guc *guc, u32 id) +static inline void set_context_enabled(struct intel_context *ce) { - struct guc_stage_desc *base = guc->stage_desc_pool_vaddr; + atomic_or(SCHED_STATE_NO_LOCK_ENABLED, &ce->guc_sched_state_no_lock); +} - return &base[id]; +static inline void clr_context_enabled(struct intel_context *ce) +{ + atomic_and((u32)~SCHED_STATE_NO_LOCK_ENABLED, + &ce->guc_sched_state_no_lock); } -static int guc_stage_desc_pool_create(struct intel_guc *guc) +static inline bool context_pending_enable(struct intel_context *ce) { - u32 size = PAGE_ALIGN(sizeof(struct guc_stage_desc) * - GUC_MAX_STAGE_DESCRIPTORS); + return (atomic_read(&ce->guc_sched_state_no_lock) & + SCHED_STATE_NO_LOCK_PENDING_ENABLE); +} - return intel_guc_allocate_and_map_vma(guc, size, &guc->stage_desc_pool, - &guc->stage_desc_pool_vaddr); +static inline void set_context_pending_enable(struct intel_context *ce) +{ + atomic_or(SCHED_STATE_NO_LOCK_PENDING_ENABLE, + &ce->guc_sched_state_no_lock); +} + +static inline void clr_context_pending_enable(struct intel_context *ce) +{ + atomic_and((u32)~SCHED_STATE_NO_LOCK_PENDING_ENABLE, + &ce->guc_sched_state_no_lock); } -static void guc_stage_desc_pool_destroy(struct intel_guc *guc) +static inline bool context_registered(struct intel_context *ce) { - i915_vma_unpin_and_release(&guc->stage_desc_pool, I915_VMA_RELEASE_MAP); + return (atomic_read(&ce->guc_sched_state_no_lock) & + SCHED_STATE_NO_LOCK_REGISTERED); +} + +static inline void set_context_registered(struct intel_context *ce) +{ + atomic_or(SCHED_STATE_NO_LOCK_REGISTERED, + &ce->guc_sched_state_no_lock); +} + +static inline void clr_context_registered(struct intel_context *ce) +{ + atomic_and((u32)~SCHED_STATE_NO_LOCK_REGISTERED, + &ce->guc_sched_state_no_lock); } /* - * Initialise/clear the stage descriptor shared with the GuC firmware. - * - * This descriptor tells the GuC where (in GGTT space) to find the important - * data structures related to work submission (process descriptor, write queue, - * etc). + * Below is a set of functions which control the GuC scheduling state which + * require a lock, aside from the special case where the functions are called + * from guc_lrc_desc_pin(). In that case it isn't possible for any other code + * path to be executing on the context. */ -static void guc_stage_desc_init(struct intel_guc *guc) +#define SCHED_STATE_WAIT_FOR_DEREGISTER_TO_REGISTER BIT(0) +#define SCHED_STATE_DESTROYED BIT(1) +#define SCHED_STATE_PENDING_DISABLE BIT(2) +#define SCHED_STATE_BANNED BIT(3) +#define SCHED_STATE_BLOCKED_SHIFT 4 +#define SCHED_STATE_BLOCKED BIT(SCHED_STATE_BLOCKED_SHIFT) +#define SCHED_STATE_BLOCKED_MASK (0xfff << SCHED_STATE_BLOCKED_SHIFT) +static inline void init_sched_state(struct intel_context *ce) +{ + /* Only should be called from guc_lrc_desc_pin() */ + atomic_set(&ce->guc_sched_state_no_lock, 0); + ce->guc_state.sched_state = 0; +} + +static inline bool +context_wait_for_deregister_to_register(struct intel_context *ce) +{ + return ce->guc_state.sched_state & + SCHED_STATE_WAIT_FOR_DEREGISTER_TO_REGISTER; +} + +static inline void +set_context_wait_for_deregister_to_register(struct intel_context *ce) { - struct guc_stage_desc *desc; + /* Only should be called from guc_lrc_desc_pin() without lock */ + ce->guc_state.sched_state |= + SCHED_STATE_WAIT_FOR_DEREGISTER_TO_REGISTER; +} - /* we only use 1 stage desc, so hardcode it to 0 */ - desc = __get_stage_desc(guc, 0); - memset(desc, 0, sizeof(*desc)); +static inline void +clr_context_wait_for_deregister_to_register(struct intel_context *ce) +{ + lockdep_assert_held(&ce->guc_state.lock); + ce->guc_state.sched_state &= + ~SCHED_STATE_WAIT_FOR_DEREGISTER_TO_REGISTER; +} - desc->attribute = GUC_STAGE_DESC_ATTR_ACTIVE | - GUC_STAGE_DESC_ATTR_KERNEL; +static inline bool +context_destroyed(struct intel_context *ce) +{ + return ce->guc_state.sched_state & SCHED_STATE_DESTROYED; +} - desc->stage_id = 0; - desc->priority = GUC_CLIENT_PRIORITY_KMD_NORMAL; +static inline void +set_context_destroyed(struct intel_context *ce) +{ + lockdep_assert_held(&ce->guc_state.lock); + ce->guc_state.sched_state |= SCHED_STATE_DESTROYED; +} - desc->wq_size = GUC_WQ_SIZE; +static inline bool context_pending_disable(struct intel_context *ce) +{ + return ce->guc_state.sched_state & SCHED_STATE_PENDING_DISABLE; } -static void guc_stage_desc_fini(struct intel_guc *guc) +static inline void set_context_pending_disable(struct intel_context *ce) { - struct guc_stage_desc *desc; + lockdep_assert_held(&ce->guc_state.lock); + ce->guc_state.sched_state |= SCHED_STATE_PENDING_DISABLE; +} - desc = __get_stage_desc(guc, 0); - memset(desc, 0, sizeof(*desc)); +static inline void clr_context_pending_disable(struct intel_context *ce) +{ + lockdep_assert_held(&ce->guc_state.lock); + ce->guc_state.sched_state &= ~SCHED_STATE_PENDING_DISABLE; } -static void guc_add_request(struct intel_guc *guc, struct i915_request *rq) +static inline bool context_banned(struct intel_context *ce) { - /* Leaving stub as this function will be used in future patches */ + return ce->guc_state.sched_state & SCHED_STATE_BANNED; } -/* - * When we're doing submissions using regular execlists backend, writing to - * ELSP from CPU side is enough to make sure that writes to ringbuffer pages - * pinned in mappable aperture portion of GGTT are visible to command streamer. - * Writes done by GuC on our behalf are not guaranteeing such ordering, - * therefore, to ensure the flush, we're issuing a POSTING READ. - */ -static void flush_ggtt_writes(struct i915_vma *vma) +static inline void set_context_banned(struct intel_context *ce) { - if (i915_vma_is_map_and_fenceable(vma)) - intel_uncore_posting_read_fw(vma->vm->gt->uncore, - GUC_STATUS); + lockdep_assert_held(&ce->guc_state.lock); + ce->guc_state.sched_state |= SCHED_STATE_BANNED; } -static void guc_submit(struct intel_engine_cs *engine, - struct i915_request **out, - struct i915_request **end) +static inline void clr_context_banned(struct intel_context *ce) { - struct intel_guc *guc = &engine->gt->uc.guc; + lockdep_assert_held(&ce->guc_state.lock); + ce->guc_state.sched_state &= ~SCHED_STATE_BANNED; +} - do { - struct i915_request *rq = *out++; +static inline u32 context_blocked(struct intel_context *ce) +{ + return (ce->guc_state.sched_state & SCHED_STATE_BLOCKED_MASK) >> + SCHED_STATE_BLOCKED_SHIFT; +} - flush_ggtt_writes(rq->ring->vma); - guc_add_request(guc, rq); - } while (out != end); +static inline void incr_context_blocked(struct intel_context *ce) +{ + lockdep_assert_held(&ce->engine->sched_engine->lock); + lockdep_assert_held(&ce->guc_state.lock); + + ce->guc_state.sched_state += SCHED_STATE_BLOCKED; + + GEM_BUG_ON(!context_blocked(ce)); /* Overflow check */ } -static inline int rq_prio(const struct i915_request *rq) +static inline void decr_context_blocked(struct intel_context *ce) { - return rq->sched.attr.priority; + lockdep_assert_held(&ce->engine->sched_engine->lock); + lockdep_assert_held(&ce->guc_state.lock); + + GEM_BUG_ON(!context_blocked(ce)); /* Underflow check */ + + ce->guc_state.sched_state -= SCHED_STATE_BLOCKED; } -static struct i915_request *schedule_in(struct i915_request *rq, int idx) +static inline bool context_guc_id_invalid(struct intel_context *ce) { - trace_i915_request_in(rq, idx); + return ce->guc_id == GUC_INVALID_LRC_ID; +} + +static inline void set_context_guc_id_invalid(struct intel_context *ce) +{ + ce->guc_id = GUC_INVALID_LRC_ID; +} + +static inline struct intel_guc *ce_to_guc(struct intel_context *ce) +{ + return &ce->engine->gt->uc.guc; +} + +static inline struct i915_priolist *to_priolist(struct rb_node *rb) +{ + return rb_entry(rb, struct i915_priolist, node); +} + +static struct guc_lrc_desc *__get_lrc_desc(struct intel_guc *guc, u32 index) +{ + struct guc_lrc_desc *base = guc->lrc_desc_pool_vaddr; + + GEM_BUG_ON(index >= GUC_MAX_LRC_DESCRIPTORS); + + return &base[index]; +} + +static inline struct intel_context *__get_context(struct intel_guc *guc, u32 id) +{ + struct intel_context *ce = xa_load(&guc->context_lookup, id); + + GEM_BUG_ON(id >= GUC_MAX_LRC_DESCRIPTORS); + + return ce; +} + +static int guc_lrc_desc_pool_create(struct intel_guc *guc) +{ + u32 size; + int ret; + + size = PAGE_ALIGN(sizeof(struct guc_lrc_desc) * + GUC_MAX_LRC_DESCRIPTORS); + ret = intel_guc_allocate_and_map_vma(guc, size, &guc->lrc_desc_pool, + (void **)&guc->lrc_desc_pool_vaddr); + if (ret) + return ret; + + return 0; +} + +static void guc_lrc_desc_pool_destroy(struct intel_guc *guc) +{ + guc->lrc_desc_pool_vaddr = NULL; + i915_vma_unpin_and_release(&guc->lrc_desc_pool, I915_VMA_RELEASE_MAP); +} + +static inline bool guc_submission_initialized(struct intel_guc *guc) +{ + return !!guc->lrc_desc_pool_vaddr; +} + +static inline void reset_lrc_desc(struct intel_guc *guc, u32 id) +{ + if (likely(guc_submission_initialized(guc))) { + struct guc_lrc_desc *desc = __get_lrc_desc(guc, id); + unsigned long flags; + + memset(desc, 0, sizeof(*desc)); + + /* + * xarray API doesn't have xa_erase_irqsave wrapper, so calling + * the lower level functions directly. + */ + xa_lock_irqsave(&guc->context_lookup, flags); + __xa_erase(&guc->context_lookup, id); + xa_unlock_irqrestore(&guc->context_lookup, flags); + } +} + +static inline bool lrc_desc_registered(struct intel_guc *guc, u32 id) +{ + return __get_context(guc, id); +} + +static inline void set_lrc_desc_registered(struct intel_guc *guc, u32 id, + struct intel_context *ce) +{ + unsigned long flags; /* - * Currently we are not tracking the rq->context being inflight - * (ce->inflight = rq->engine). It is only used by the execlists - * backend at the moment, a similar counting strategy would be - * required if we generalise the inflight tracking. + * xarray API doesn't have xa_save_irqsave wrapper, so calling the + * lower level functions directly. */ + xa_lock_irqsave(&guc->context_lookup, flags); + __xa_store(&guc->context_lookup, id, ce, GFP_ATOMIC); + xa_unlock_irqrestore(&guc->context_lookup, flags); +} + +static int guc_submission_send_busy_loop(struct intel_guc *guc, + const u32 *action, + u32 len, + u32 g2h_len_dw, + bool loop) +{ + int err; + + err = intel_guc_send_busy_loop(guc, action, len, g2h_len_dw, loop); + + if (!err && g2h_len_dw) + atomic_inc(&guc->outstanding_submission_g2h); + + return err; +} + +int intel_guc_wait_for_pending_msg(struct intel_guc *guc, + atomic_t *wait_var, + bool interruptible, + long timeout) +{ + const int state = interruptible ? + TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE; + DEFINE_WAIT(wait); - __intel_gt_pm_get(rq->engine->gt); - return i915_request_get(rq); + might_sleep(); + GEM_BUG_ON(timeout < 0); + + if (!atomic_read(wait_var)) + return 0; + + if (!timeout) + return -ETIME; + + for (;;) { + prepare_to_wait(&guc->ct.wq, &wait, state); + + if (!atomic_read(wait_var)) + break; + + if (signal_pending_state(state, current)) { + timeout = -EINTR; + break; + } + + if (!timeout) { + timeout = -ETIME; + break; + } + + timeout = io_schedule_timeout(timeout); + } + finish_wait(&guc->ct.wq, &wait); + + return (timeout < 0) ? timeout : 0; } -static void schedule_out(struct i915_request *rq) +int intel_guc_wait_for_idle(struct intel_guc *guc, long timeout) { - trace_i915_request_out(rq); + if (!intel_uc_uses_guc_submission(&guc_to_gt(guc)->uc)) + return 0; - intel_gt_pm_put_async(rq->engine->gt); - i915_request_put(rq); + return intel_guc_wait_for_pending_msg(guc, + &guc->outstanding_submission_g2h, + true, timeout); } -static void __guc_dequeue(struct intel_engine_cs *engine) +static int guc_lrc_desc_pin(struct intel_context *ce, bool loop); + +static int guc_add_request(struct intel_guc *guc, struct i915_request *rq) { - struct intel_engine_execlists * const execlists = &engine->execlists; - struct i915_request **first = execlists->inflight; - struct i915_request ** const last_port = first + execlists->port_mask; - struct i915_request *last = first[0]; - struct i915_request **port; - bool submit = false; - struct rb_node *rb; + int err = 0; + struct intel_context *ce = rq->context; + u32 action[3]; + int len = 0; + u32 g2h_len_dw = 0; + bool enabled; - lockdep_assert_held(&engine->active.lock); + /* + * Corner case where requests were sitting in the priority list or a + * request resubmitted after the context was banned. + */ + if (unlikely(intel_context_is_banned(ce))) { + i915_request_put(i915_request_mark_eio(rq)); + intel_engine_signal_breadcrumbs(ce->engine); + goto out; + } - if (last) { - if (*++first) - return; + GEM_BUG_ON(!atomic_read(&ce->guc_id_ref)); + GEM_BUG_ON(context_guc_id_invalid(ce)); - last = NULL; + /* + * Corner case where the GuC firmware was blown away and reloaded while + * this context was pinned. + */ + if (unlikely(!lrc_desc_registered(guc, ce->guc_id))) { + err = guc_lrc_desc_pin(ce, false); + if (unlikely(err)) + goto out; } /* - * We write directly into the execlists->inflight queue and don't use - * the execlists->pending queue, as we don't have a distinct switch - * event. + * The request / context will be run on the hardware when scheduling + * gets enabled in the unblock. */ - port = first; - while ((rb = rb_first_cached(&execlists->queue))) { + if (unlikely(context_blocked(ce))) + goto out; + + enabled = context_enabled(ce); + + if (!enabled) { + action[len++] = INTEL_GUC_ACTION_SCHED_CONTEXT_MODE_SET; + action[len++] = ce->guc_id; + action[len++] = GUC_CONTEXT_ENABLE; + set_context_pending_enable(ce); + intel_context_get(ce); + g2h_len_dw = G2H_LEN_DW_SCHED_CONTEXT_MODE_SET; + } else { + action[len++] = INTEL_GUC_ACTION_SCHED_CONTEXT; + action[len++] = ce->guc_id; + } + + err = intel_guc_send_nb(guc, action, len, g2h_len_dw); + if (!enabled && !err) { + trace_intel_context_sched_enable(ce); + atomic_inc(&guc->outstanding_submission_g2h); + set_context_enabled(ce); + } else if (!enabled) { + clr_context_pending_enable(ce); + intel_context_put(ce); + } + if (likely(!err)) + trace_i915_request_guc_submit(rq); + +out: + return err; +} + +static inline void guc_set_lrc_tail(struct i915_request *rq) +{ + rq->context->lrc_reg_state[CTX_RING_TAIL] = + intel_ring_set_tail(rq->ring, rq->tail); +} + +static inline int rq_prio(const struct i915_request *rq) +{ + return rq->sched.attr.priority; +} + +static int guc_dequeue_one_context(struct intel_guc *guc) +{ + struct i915_sched_engine * const sched_engine = guc->sched_engine; + struct i915_request *last = NULL; + bool submit = false; + struct rb_node *rb; + int ret; + + lockdep_assert_held(&sched_engine->lock); + + if (guc->stalled_request) { + submit = true; + last = guc->stalled_request; + goto resubmit; + } + + while ((rb = rb_first_cached(&sched_engine->queue))) { struct i915_priolist *p = to_priolist(rb); struct i915_request *rq, *rn; priolist_for_each_request_consume(rq, rn, p) { - if (last && rq->context != last->context) { - if (port == last_port) - goto done; - - *port = schedule_in(last, - port - execlists->inflight); - port++; - } + if (last && rq->context != last->context) + goto done; list_del_init(&rq->sched.link); + __i915_request_submit(rq); - submit = true; + + trace_i915_request_in(rq, 0); last = rq; + submit = true; } - rb_erase_cached(&p->node, &execlists->queue); + rb_erase_cached(&p->node, &sched_engine->queue); i915_priolist_free(p); } done: - execlists->queue_priority_hint = - rb ? to_priolist(rb)->priority : INT_MIN; if (submit) { - *port = schedule_in(last, port - execlists->inflight); - *++port = NULL; - guc_submit(engine, first, port); + guc_set_lrc_tail(last); +resubmit: + ret = guc_add_request(guc, last); + if (unlikely(ret == -EPIPE)) + goto deadlk; + else if (ret == -EBUSY) { + tasklet_schedule(&sched_engine->tasklet); + guc->stalled_request = last; + return false; + } } - execlists->active = execlists->inflight; + + guc->stalled_request = NULL; + return submit; + +deadlk: + sched_engine->tasklet.callback = NULL; + tasklet_disable_nosync(&sched_engine->tasklet); + return false; } static void guc_submission_tasklet(struct tasklet_struct *t) { - struct intel_engine_cs * const engine = - from_tasklet(engine, t, execlists.tasklet); - struct intel_engine_execlists * const execlists = &engine->execlists; - struct i915_request **port, *rq; + struct i915_sched_engine *sched_engine = + from_tasklet(sched_engine, t, tasklet); unsigned long flags; + bool loop; - spin_lock_irqsave(&engine->active.lock, flags); - - for (port = execlists->inflight; (rq = *port); port++) { - if (!i915_request_completed(rq)) - break; + spin_lock_irqsave(&sched_engine->lock, flags); - schedule_out(rq); - } - if (port != execlists->inflight) { - int idx = port - execlists->inflight; - int rem = ARRAY_SIZE(execlists->inflight) - idx; - memmove(execlists->inflight, port, rem * sizeof(*port)); - } + do { + loop = guc_dequeue_one_context(sched_engine->private_data); + } while (loop); - __guc_dequeue(engine); + i915_sched_engine_reset_on_empty(sched_engine); - spin_unlock_irqrestore(&engine->active.lock, flags); + spin_unlock_irqrestore(&sched_engine->lock, flags); } static void cs_irq_handler(struct intel_engine_cs *engine, u16 iir) { - if (iir & GT_RENDER_USER_INTERRUPT) { + if (iir & GT_RENDER_USER_INTERRUPT) intel_engine_signal_breadcrumbs(engine); - tasklet_hi_schedule(&engine->execlists.tasklet); +} + +static void __guc_context_destroy(struct intel_context *ce); +static void release_guc_id(struct intel_guc *guc, struct intel_context *ce); +static void guc_signal_context_fence(struct intel_context *ce); +static void guc_cancel_context_requests(struct intel_context *ce); +static void guc_blocked_fence_complete(struct intel_context *ce); + +static void scrub_guc_desc_for_outstanding_g2h(struct intel_guc *guc) +{ + struct intel_context *ce; + unsigned long index, flags; + bool pending_disable, pending_enable, deregister, destroyed, banned; + + xa_for_each(&guc->context_lookup, index, ce) { + /* Flush context */ + spin_lock_irqsave(&ce->guc_state.lock, flags); + spin_unlock_irqrestore(&ce->guc_state.lock, flags); + + /* + * Once we are at this point submission_disabled() is guaranteed + * to be visible to all callers who set the below flags (see above + * flush and flushes in reset_prepare). If submission_disabled() + * is set, the caller shouldn't set these flags. + */ + + destroyed = context_destroyed(ce); + pending_enable = context_pending_enable(ce); + pending_disable = context_pending_disable(ce); + deregister = context_wait_for_deregister_to_register(ce); + banned = context_banned(ce); + init_sched_state(ce); + + if (pending_enable || destroyed || deregister) { + atomic_dec(&guc->outstanding_submission_g2h); + if (deregister) + guc_signal_context_fence(ce); + if (destroyed) { + release_guc_id(guc, ce); + __guc_context_destroy(ce); + } + if (pending_enable || deregister) + intel_context_put(ce); + } + + /* Not mutualy exclusive with above if statement. */ + if (pending_disable) { + guc_signal_context_fence(ce); + if (banned) { + guc_cancel_context_requests(ce); + intel_engine_signal_breadcrumbs(ce->engine); + } + intel_context_sched_disable_unpin(ce); + atomic_dec(&guc->outstanding_submission_g2h); + spin_lock_irqsave(&ce->guc_state.lock, flags); + guc_blocked_fence_complete(ce); + spin_unlock_irqrestore(&ce->guc_state.lock, flags); + + intel_context_put(ce); + } } } -static void guc_reset_prepare(struct intel_engine_cs *engine) +static inline bool +submission_disabled(struct intel_guc *guc) +{ + struct i915_sched_engine * const sched_engine = guc->sched_engine; + + return unlikely(!sched_engine || + !__tasklet_is_enabled(&sched_engine->tasklet)); +} + +static void disable_submission(struct intel_guc *guc) { - struct intel_engine_execlists * const execlists = &engine->execlists; + struct i915_sched_engine * const sched_engine = guc->sched_engine; + + if (__tasklet_is_enabled(&sched_engine->tasklet)) { + GEM_BUG_ON(!guc->ct.enabled); + __tasklet_disable_sync_once(&sched_engine->tasklet); + sched_engine->tasklet.callback = NULL; + } +} + +static void enable_submission(struct intel_guc *guc) +{ + struct i915_sched_engine * const sched_engine = guc->sched_engine; + unsigned long flags; - ENGINE_TRACE(engine, "\n"); + spin_lock_irqsave(&guc->sched_engine->lock, flags); + sched_engine->tasklet.callback = guc_submission_tasklet; + wmb(); /* Make sure callback visible */ + if (!__tasklet_is_enabled(&sched_engine->tasklet) && + __tasklet_enable(&sched_engine->tasklet)) { + GEM_BUG_ON(!guc->ct.enabled); + + /* And kick in case we missed a new request submission. */ + tasklet_hi_schedule(&sched_engine->tasklet); + } + spin_unlock_irqrestore(&guc->sched_engine->lock, flags); +} + +static void guc_flush_submissions(struct intel_guc *guc) +{ + struct i915_sched_engine * const sched_engine = guc->sched_engine; + unsigned long flags; + + spin_lock_irqsave(&sched_engine->lock, flags); + spin_unlock_irqrestore(&sched_engine->lock, flags); +} + +void intel_guc_submission_reset_prepare(struct intel_guc *guc) +{ + int i; + + if (unlikely(!guc_submission_initialized(guc))) { + /* Reset called during driver load? GuC not yet initialised! */ + return; + } + + intel_gt_park_heartbeats(guc_to_gt(guc)); + disable_submission(guc); + guc->interrupts.disable(guc); + + /* Flush IRQ handler */ + spin_lock_irq(&guc_to_gt(guc)->irq_lock); + spin_unlock_irq(&guc_to_gt(guc)->irq_lock); + + guc_flush_submissions(guc); /* - * Prevent request submission to the hardware until we have - * completed the reset in i915_gem_reset_finish(). If a request - * is completed by one engine, it may then queue a request - * to a second via its execlists->tasklet *just* as we are - * calling engine->init_hw() and also writing the ELSP. - * Turning off the execlists->tasklet until the reset is over - * prevents the race. + * Handle any outstanding G2Hs before reset. Call IRQ handler directly + * each pass as interrupt have been disabled. We always scrub for + * outstanding G2H as it is possible for outstanding_submission_g2h to + * be incremented after the context state update. */ - __tasklet_disable_sync_once(&execlists->tasklet); + for (i = 0; i < 4 && atomic_read(&guc->outstanding_submission_g2h); ++i) { + intel_guc_to_host_event_handler(guc); +#define wait_for_reset(guc, wait_var) \ + intel_guc_wait_for_pending_msg(guc, wait_var, false, (HZ / 20)) + do { + wait_for_reset(guc, &guc->outstanding_submission_g2h); + } while (!list_empty(&guc->ct.requests.incoming)); + } + scrub_guc_desc_for_outstanding_g2h(guc); +} + +static struct intel_engine_cs * +guc_virtual_get_sibling(struct intel_engine_cs *ve, unsigned int sibling) +{ + struct intel_engine_cs *engine; + intel_engine_mask_t tmp, mask = ve->mask; + unsigned int num_siblings = 0; + + for_each_engine_masked(engine, ve->gt, mask, tmp) + if (num_siblings++ == sibling) + return engine; + + return NULL; +} + +static inline struct intel_engine_cs * +__context_to_physical_engine(struct intel_context *ce) +{ + struct intel_engine_cs *engine = ce->engine; + + if (intel_engine_is_virtual(engine)) + engine = guc_virtual_get_sibling(engine, 0); + + return engine; } -static void guc_reset_state(struct intel_context *ce, - struct intel_engine_cs *engine, - u32 head, - bool scrub) +static void guc_reset_state(struct intel_context *ce, u32 head, bool scrub) { + struct intel_engine_cs *engine = __context_to_physical_engine(ce); + + if (intel_context_is_banned(ce)) + return; + GEM_BUG_ON(!intel_context_is_pinned(ce)); /* @@ -313,37 +777,132 @@ static void guc_reset_state(struct intel_context *ce, lrc_update_regs(ce, engine, head); } -static void guc_reset_rewind(struct intel_engine_cs *engine, bool stalled) +static void guc_reset_nop(struct intel_engine_cs *engine) { - struct intel_engine_execlists * const execlists = &engine->execlists; - struct i915_request *rq; +} + +static void guc_rewind_nop(struct intel_engine_cs *engine, bool stalled) +{ +} + +static void +__unwind_incomplete_requests(struct intel_context *ce) +{ + struct i915_request *rq, *rn; + struct list_head *pl; + int prio = I915_PRIORITY_INVALID; + struct i915_sched_engine * const sched_engine = + ce->engine->sched_engine; unsigned long flags; - spin_lock_irqsave(&engine->active.lock, flags); + spin_lock_irqsave(&sched_engine->lock, flags); + spin_lock(&ce->guc_active.lock); + list_for_each_entry_safe(rq, rn, + &ce->guc_active.requests, + sched.link) { + if (i915_request_completed(rq)) + continue; - /* Push back any incomplete requests for replay after the reset. */ - rq = execlists_unwind_incomplete_requests(execlists); - if (!rq) - goto out_unlock; + list_del_init(&rq->sched.link); + spin_unlock(&ce->guc_active.lock); + + __i915_request_unsubmit(rq); + + /* Push the request back into the queue for later resubmission. */ + GEM_BUG_ON(rq_prio(rq) == I915_PRIORITY_INVALID); + if (rq_prio(rq) != prio) { + prio = rq_prio(rq); + pl = i915_sched_lookup_priolist(sched_engine, prio); + } + GEM_BUG_ON(i915_sched_engine_is_empty(sched_engine)); + + list_add_tail(&rq->sched.link, pl); + set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); + + spin_lock(&ce->guc_active.lock); + } + spin_unlock(&ce->guc_active.lock); + spin_unlock_irqrestore(&sched_engine->lock, flags); +} + +static void __guc_reset_context(struct intel_context *ce, bool stalled) +{ + struct i915_request *rq; + u32 head; + + intel_context_get(ce); + + /* + * GuC will implicitly mark the context as non-schedulable + * when it sends the reset notification. Make sure our state + * reflects this change. The context will be marked enabled + * on resubmission. + */ + clr_context_enabled(ce); + + rq = intel_context_find_active_request(ce); + if (!rq) { + head = ce->ring->tail; + stalled = false; + goto out_replay; + } if (!i915_request_started(rq)) stalled = false; + GEM_BUG_ON(i915_active_is_idle(&ce->active)); + head = intel_ring_wrap(ce->ring, rq->head); __i915_request_reset(rq, stalled); - guc_reset_state(rq->context, engine, rq->head, stalled); -out_unlock: - spin_unlock_irqrestore(&engine->active.lock, flags); +out_replay: + guc_reset_state(ce, head, stalled); + __unwind_incomplete_requests(ce); + intel_context_put(ce); +} + +void intel_guc_submission_reset(struct intel_guc *guc, bool stalled) +{ + struct intel_context *ce; + unsigned long index; + + if (unlikely(!guc_submission_initialized(guc))) { + /* Reset called during driver load? GuC not yet initialised! */ + return; + } + + xa_for_each(&guc->context_lookup, index, ce) + if (intel_context_is_pinned(ce)) + __guc_reset_context(ce, stalled); + + /* GuC is blown away, drop all references to contexts */ + xa_destroy(&guc->context_lookup); } -static void guc_reset_cancel(struct intel_engine_cs *engine) +static void guc_cancel_context_requests(struct intel_context *ce) +{ + struct i915_sched_engine *sched_engine = ce_to_guc(ce)->sched_engine; + struct i915_request *rq; + unsigned long flags; + + /* Mark all executing requests as skipped. */ + spin_lock_irqsave(&sched_engine->lock, flags); + spin_lock(&ce->guc_active.lock); + list_for_each_entry(rq, &ce->guc_active.requests, sched.link) + i915_request_put(i915_request_mark_eio(rq)); + spin_unlock(&ce->guc_active.lock); + spin_unlock_irqrestore(&sched_engine->lock, flags); +} + +static void +guc_cancel_sched_engine_requests(struct i915_sched_engine *sched_engine) { - struct intel_engine_execlists * const execlists = &engine->execlists; struct i915_request *rq, *rn; struct rb_node *rb; unsigned long flags; - ENGINE_TRACE(engine, "\n"); + /* Can be called during boot if GuC fails to load */ + if (!sched_engine) + return; /* * Before we call engine->cancel_requests(), we should have exclusive @@ -359,47 +918,67 @@ static void guc_reset_cancel(struct intel_engine_cs *engine) * submission's irq state, we also wish to remind ourselves that * it is irq state.) */ - spin_lock_irqsave(&engine->active.lock, flags); - - /* Mark all executing requests as skipped. */ - list_for_each_entry(rq, &engine->active.requests, sched.link) { - i915_request_set_error_once(rq, -EIO); - i915_request_mark_complete(rq); - } + spin_lock_irqsave(&sched_engine->lock, flags); /* Flush the queued requests to the timeline list (for retiring). */ - while ((rb = rb_first_cached(&execlists->queue))) { + while ((rb = rb_first_cached(&sched_engine->queue))) { struct i915_priolist *p = to_priolist(rb); priolist_for_each_request_consume(rq, rn, p) { list_del_init(&rq->sched.link); + __i915_request_submit(rq); - dma_fence_set_error(&rq->fence, -EIO); - i915_request_mark_complete(rq); + + i915_request_put(i915_request_mark_eio(rq)); } - rb_erase_cached(&p->node, &execlists->queue); + rb_erase_cached(&p->node, &sched_engine->queue); i915_priolist_free(p); } /* Remaining _unready_ requests will be nop'ed when submitted */ - execlists->queue_priority_hint = INT_MIN; - execlists->queue = RB_ROOT_CACHED; + sched_engine->queue_priority_hint = INT_MIN; + sched_engine->queue = RB_ROOT_CACHED; - spin_unlock_irqrestore(&engine->active.lock, flags); + spin_unlock_irqrestore(&sched_engine->lock, flags); } -static void guc_reset_finish(struct intel_engine_cs *engine) +void intel_guc_submission_cancel_requests(struct intel_guc *guc) { - struct intel_engine_execlists * const execlists = &engine->execlists; + struct intel_context *ce; + unsigned long index; - if (__tasklet_enable(&execlists->tasklet)) - /* And kick in case we missed a new request submission. */ - tasklet_hi_schedule(&execlists->tasklet); + xa_for_each(&guc->context_lookup, index, ce) + if (intel_context_is_pinned(ce)) + guc_cancel_context_requests(ce); + + guc_cancel_sched_engine_requests(guc->sched_engine); - ENGINE_TRACE(engine, "depth->%d\n", - atomic_read(&execlists->tasklet.count)); + /* GuC is blown away, drop all references to contexts */ + xa_destroy(&guc->context_lookup); +} + +void intel_guc_submission_reset_finish(struct intel_guc *guc) +{ + /* Reset called during driver load or during wedge? */ + if (unlikely(!guc_submission_initialized(guc) || + test_bit(I915_WEDGED, &guc_to_gt(guc)->reset.flags))) { + return; + } + + /* + * Technically possible for either of these values to be non-zero here, + * but very unlikely + harmless. Regardless let's add a warn so we can + * see in CI if this happens frequently / a precursor to taking down the + * machine. + */ + GEM_WARN_ON(atomic_read(&guc->outstanding_submission_g2h)); + atomic_set(&guc->outstanding_submission_g2h, 0); + + intel_guc_global_policies_update(guc); + enable_submission(guc); + intel_gt_unpark_heartbeats(guc_to_gt(guc)); } /* @@ -410,43 +989,986 @@ int intel_guc_submission_init(struct intel_guc *guc) { int ret; - if (guc->stage_desc_pool) + if (guc->lrc_desc_pool) return 0; - ret = guc_stage_desc_pool_create(guc); + ret = guc_lrc_desc_pool_create(guc); if (ret) return ret; /* * Keep static analysers happy, let them know that we allocated the * vma after testing that it didn't exist earlier. */ - GEM_BUG_ON(!guc->stage_desc_pool); + GEM_BUG_ON(!guc->lrc_desc_pool); + + xa_init_flags(&guc->context_lookup, XA_FLAGS_LOCK_IRQ); + + spin_lock_init(&guc->contexts_lock); + INIT_LIST_HEAD(&guc->guc_id_list); + ida_init(&guc->guc_ids); return 0; } void intel_guc_submission_fini(struct intel_guc *guc) { - if (guc->stage_desc_pool) { - guc_stage_desc_pool_destroy(guc); + if (!guc->lrc_desc_pool) + return; + + guc_lrc_desc_pool_destroy(guc); + i915_sched_engine_put(guc->sched_engine); +} + +static inline void queue_request(struct i915_sched_engine *sched_engine, + struct i915_request *rq, + int prio) +{ + GEM_BUG_ON(!list_empty(&rq->sched.link)); + list_add_tail(&rq->sched.link, + i915_sched_lookup_priolist(sched_engine, prio)); + set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); +} + +static int guc_bypass_tasklet_submit(struct intel_guc *guc, + struct i915_request *rq) +{ + int ret; + + __i915_request_submit(rq); + + trace_i915_request_in(rq, 0); + + guc_set_lrc_tail(rq); + ret = guc_add_request(guc, rq); + if (ret == -EBUSY) + guc->stalled_request = rq; + + if (unlikely(ret == -EPIPE)) + disable_submission(guc); + + return ret; +} + +static void guc_submit_request(struct i915_request *rq) +{ + struct i915_sched_engine *sched_engine = rq->engine->sched_engine; + struct intel_guc *guc = &rq->engine->gt->uc.guc; + unsigned long flags; + + /* Will be called from irq-context when using foreign fences. */ + spin_lock_irqsave(&sched_engine->lock, flags); + + if (submission_disabled(guc) || guc->stalled_request || + !i915_sched_engine_is_empty(sched_engine)) + queue_request(sched_engine, rq, rq_prio(rq)); + else if (guc_bypass_tasklet_submit(guc, rq) == -EBUSY) + tasklet_hi_schedule(&sched_engine->tasklet); + + spin_unlock_irqrestore(&sched_engine->lock, flags); +} + +static int new_guc_id(struct intel_guc *guc) +{ + return ida_simple_get(&guc->guc_ids, 0, + GUC_MAX_LRC_DESCRIPTORS, GFP_KERNEL | + __GFP_RETRY_MAYFAIL | __GFP_NOWARN); +} + +static void __release_guc_id(struct intel_guc *guc, struct intel_context *ce) +{ + if (!context_guc_id_invalid(ce)) { + ida_simple_remove(&guc->guc_ids, ce->guc_id); + reset_lrc_desc(guc, ce->guc_id); + set_context_guc_id_invalid(ce); } + if (!list_empty(&ce->guc_id_link)) + list_del_init(&ce->guc_id_link); } -static int guc_context_alloc(struct intel_context *ce) +static void release_guc_id(struct intel_guc *guc, struct intel_context *ce) { - return lrc_alloc(ce, ce->engine); + unsigned long flags; + + spin_lock_irqsave(&guc->contexts_lock, flags); + __release_guc_id(guc, ce); + spin_unlock_irqrestore(&guc->contexts_lock, flags); +} + +static int steal_guc_id(struct intel_guc *guc) +{ + struct intel_context *ce; + int guc_id; + + lockdep_assert_held(&guc->contexts_lock); + + if (!list_empty(&guc->guc_id_list)) { + ce = list_first_entry(&guc->guc_id_list, + struct intel_context, + guc_id_link); + + GEM_BUG_ON(atomic_read(&ce->guc_id_ref)); + GEM_BUG_ON(context_guc_id_invalid(ce)); + + list_del_init(&ce->guc_id_link); + guc_id = ce->guc_id; + clr_context_registered(ce); + set_context_guc_id_invalid(ce); + return guc_id; + } else { + return -EAGAIN; + } +} + +static int assign_guc_id(struct intel_guc *guc, u16 *out) +{ + int ret; + + lockdep_assert_held(&guc->contexts_lock); + + ret = new_guc_id(guc); + if (unlikely(ret < 0)) { + ret = steal_guc_id(guc); + if (ret < 0) + return ret; + } + + *out = ret; + return 0; +} + +#define PIN_GUC_ID_TRIES 4 +static int pin_guc_id(struct intel_guc *guc, struct intel_context *ce) +{ + int ret = 0; + unsigned long flags, tries = PIN_GUC_ID_TRIES; + + GEM_BUG_ON(atomic_read(&ce->guc_id_ref)); + +try_again: + spin_lock_irqsave(&guc->contexts_lock, flags); + + if (context_guc_id_invalid(ce)) { + ret = assign_guc_id(guc, &ce->guc_id); + if (ret) + goto out_unlock; + ret = 1; /* Indidcates newly assigned guc_id */ + } + if (!list_empty(&ce->guc_id_link)) + list_del_init(&ce->guc_id_link); + atomic_inc(&ce->guc_id_ref); + +out_unlock: + spin_unlock_irqrestore(&guc->contexts_lock, flags); + + /* + * -EAGAIN indicates no guc_ids are available, let's retire any + * outstanding requests to see if that frees up a guc_id. If the first + * retire didn't help, insert a sleep with the timeslice duration before + * attempting to retire more requests. Double the sleep period each + * subsequent pass before finally giving up. The sleep period has max of + * 100ms and minimum of 1ms. + */ + if (ret == -EAGAIN && --tries) { + if (PIN_GUC_ID_TRIES - tries > 1) { + unsigned int timeslice_shifted = + ce->engine->props.timeslice_duration_ms << + (PIN_GUC_ID_TRIES - tries - 2); + unsigned int max = min_t(unsigned int, 100, + timeslice_shifted); + + msleep(max_t(unsigned int, max, 1)); + } + intel_gt_retire_requests(guc_to_gt(guc)); + goto try_again; + } + + return ret; +} + +static void unpin_guc_id(struct intel_guc *guc, struct intel_context *ce) +{ + unsigned long flags; + + GEM_BUG_ON(atomic_read(&ce->guc_id_ref) < 0); + + if (unlikely(context_guc_id_invalid(ce))) + return; + + spin_lock_irqsave(&guc->contexts_lock, flags); + if (!context_guc_id_invalid(ce) && list_empty(&ce->guc_id_link) && + !atomic_read(&ce->guc_id_ref)) + list_add_tail(&ce->guc_id_link, &guc->guc_id_list); + spin_unlock_irqrestore(&guc->contexts_lock, flags); +} + +static int __guc_action_register_context(struct intel_guc *guc, + u32 guc_id, + u32 offset, + bool loop) +{ + u32 action[] = { + INTEL_GUC_ACTION_REGISTER_CONTEXT, + guc_id, + offset, + }; + + return guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action), + 0, loop); +} + +static int register_context(struct intel_context *ce, bool loop) +{ + struct intel_guc *guc = ce_to_guc(ce); + u32 offset = intel_guc_ggtt_offset(guc, guc->lrc_desc_pool) + + ce->guc_id * sizeof(struct guc_lrc_desc); + int ret; + + trace_intel_context_register(ce); + + ret = __guc_action_register_context(guc, ce->guc_id, offset, loop); + if (likely(!ret)) + set_context_registered(ce); + + return ret; +} + +static int __guc_action_deregister_context(struct intel_guc *guc, + u32 guc_id, + bool loop) +{ + u32 action[] = { + INTEL_GUC_ACTION_DEREGISTER_CONTEXT, + guc_id, + }; + + return guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action), + G2H_LEN_DW_DEREGISTER_CONTEXT, + loop); +} + +static int deregister_context(struct intel_context *ce, u32 guc_id, bool loop) +{ + struct intel_guc *guc = ce_to_guc(ce); + + trace_intel_context_deregister(ce); + + return __guc_action_deregister_context(guc, guc_id, loop); +} + +static intel_engine_mask_t adjust_engine_mask(u8 class, intel_engine_mask_t mask) +{ + switch (class) { + case RENDER_CLASS: + return mask >> RCS0; + case VIDEO_ENHANCEMENT_CLASS: + return mask >> VECS0; + case VIDEO_DECODE_CLASS: + return mask >> VCS0; + case COPY_ENGINE_CLASS: + return mask >> BCS0; + default: + MISSING_CASE(class); + return 0; + } +} + +static void guc_context_policy_init(struct intel_engine_cs *engine, + struct guc_lrc_desc *desc) +{ + desc->policy_flags = 0; + + if (engine->flags & I915_ENGINE_WANT_FORCED_PREEMPTION) + desc->policy_flags |= CONTEXT_POLICY_FLAG_PREEMPT_TO_IDLE; + + /* NB: For both of these, zero means disabled. */ + desc->execution_quantum = engine->props.timeslice_duration_ms * 1000; + desc->preemption_timeout = engine->props.preempt_timeout_ms * 1000; +} + +static inline u8 map_i915_prio_to_guc_prio(int prio); + +static int guc_lrc_desc_pin(struct intel_context *ce, bool loop) +{ + struct intel_engine_cs *engine = ce->engine; + struct intel_runtime_pm *runtime_pm = engine->uncore->rpm; + struct intel_guc *guc = &engine->gt->uc.guc; + u32 desc_idx = ce->guc_id; + struct guc_lrc_desc *desc; + const struct i915_gem_context *ctx; + int prio = I915_CONTEXT_DEFAULT_PRIORITY; + bool context_registered; + intel_wakeref_t wakeref; + int ret = 0; + + GEM_BUG_ON(!engine->mask); + + /* + * Ensure LRC + CT vmas are is same region as write barrier is done + * based on CT vma region. + */ + GEM_BUG_ON(i915_gem_object_is_lmem(guc->ct.vma->obj) != + i915_gem_object_is_lmem(ce->ring->vma->obj)); + + context_registered = lrc_desc_registered(guc, desc_idx); + + rcu_read_lock(); + ctx = rcu_dereference(ce->gem_context); + if (ctx) + prio = ctx->sched.priority; + rcu_read_unlock(); + + reset_lrc_desc(guc, desc_idx); + set_lrc_desc_registered(guc, desc_idx, ce); + + desc = __get_lrc_desc(guc, desc_idx); + desc->engine_class = engine_class_to_guc_class(engine->class); + desc->engine_submit_mask = adjust_engine_mask(engine->class, + engine->mask); + desc->hw_context_desc = ce->lrc.lrca; + ce->guc_prio = map_i915_prio_to_guc_prio(prio); + desc->priority = ce->guc_prio; + desc->context_flags = CONTEXT_REGISTRATION_FLAG_KMD; + guc_context_policy_init(engine, desc); + init_sched_state(ce); + + /* + * The context_lookup xarray is used to determine if the hardware + * context is currently registered. There are two cases in which it + * could be registered either the guc_id has been stolen from another + * context or the lrc descriptor address of this context has changed. In + * either case the context needs to be deregistered with the GuC before + * registering this context. + */ + if (context_registered) { + trace_intel_context_steal_guc_id(ce); + if (!loop) { + set_context_wait_for_deregister_to_register(ce); + intel_context_get(ce); + } else { + bool disabled; + unsigned long flags; + + /* Seal race with Reset */ + spin_lock_irqsave(&ce->guc_state.lock, flags); + disabled = submission_disabled(guc); + if (likely(!disabled)) { + set_context_wait_for_deregister_to_register(ce); + intel_context_get(ce); + } + spin_unlock_irqrestore(&ce->guc_state.lock, flags); + if (unlikely(disabled)) { + reset_lrc_desc(guc, desc_idx); + return 0; /* Will get registered later */ + } + } + + /* + * If stealing the guc_id, this ce has the same guc_id as the + * context whose guc_id was stolen. + */ + with_intel_runtime_pm(runtime_pm, wakeref) + ret = deregister_context(ce, ce->guc_id, loop); + if (unlikely(ret == -EBUSY)) { + clr_context_wait_for_deregister_to_register(ce); + intel_context_put(ce); + } else if (unlikely(ret == -ENODEV)) { + ret = 0; /* Will get registered later */ + } + } else { + with_intel_runtime_pm(runtime_pm, wakeref) + ret = register_context(ce, loop); + if (unlikely(ret == -EBUSY)) + reset_lrc_desc(guc, desc_idx); + else if (unlikely(ret == -ENODEV)) + ret = 0; /* Will get registered later */ + } + + return ret; +} + +static int __guc_context_pre_pin(struct intel_context *ce, + struct intel_engine_cs *engine, + struct i915_gem_ww_ctx *ww, + void **vaddr) +{ + return lrc_pre_pin(ce, engine, ww, vaddr); +} + +static int __guc_context_pin(struct intel_context *ce, + struct intel_engine_cs *engine, + void *vaddr) +{ + if (i915_ggtt_offset(ce->state) != + (ce->lrc.lrca & CTX_GTT_ADDRESS_MASK)) + set_bit(CONTEXT_LRCA_DIRTY, &ce->flags); + + /* + * GuC context gets pinned in guc_request_alloc. See that function for + * explaination of why. + */ + + return lrc_pin(ce, engine, vaddr); } static int guc_context_pre_pin(struct intel_context *ce, struct i915_gem_ww_ctx *ww, void **vaddr) { - return lrc_pre_pin(ce, ce->engine, ww, vaddr); + return __guc_context_pre_pin(ce, ce->engine, ww, vaddr); } static int guc_context_pin(struct intel_context *ce, void *vaddr) { - return lrc_pin(ce, ce->engine, vaddr); + return __guc_context_pin(ce, ce->engine, vaddr); +} + +static void guc_context_unpin(struct intel_context *ce) +{ + struct intel_guc *guc = ce_to_guc(ce); + + unpin_guc_id(guc, ce); + lrc_unpin(ce); +} + +static void guc_context_post_unpin(struct intel_context *ce) +{ + lrc_post_unpin(ce); +} + +static void __guc_context_sched_enable(struct intel_guc *guc, + struct intel_context *ce) +{ + u32 action[] = { + INTEL_GUC_ACTION_SCHED_CONTEXT_MODE_SET, + ce->guc_id, + GUC_CONTEXT_ENABLE + }; + + trace_intel_context_sched_enable(ce); + + guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action), + G2H_LEN_DW_SCHED_CONTEXT_MODE_SET, true); +} + +static void __guc_context_sched_disable(struct intel_guc *guc, + struct intel_context *ce, + u16 guc_id) +{ + u32 action[] = { + INTEL_GUC_ACTION_SCHED_CONTEXT_MODE_SET, + guc_id, /* ce->guc_id not stable */ + GUC_CONTEXT_DISABLE + }; + + GEM_BUG_ON(guc_id == GUC_INVALID_LRC_ID); + + trace_intel_context_sched_disable(ce); + + guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action), + G2H_LEN_DW_SCHED_CONTEXT_MODE_SET, true); +} + +static void guc_blocked_fence_complete(struct intel_context *ce) +{ + lockdep_assert_held(&ce->guc_state.lock); + + if (!i915_sw_fence_done(&ce->guc_blocked)) + i915_sw_fence_complete(&ce->guc_blocked); +} + +static void guc_blocked_fence_reinit(struct intel_context *ce) +{ + lockdep_assert_held(&ce->guc_state.lock); + GEM_BUG_ON(!i915_sw_fence_done(&ce->guc_blocked)); + + /* + * This fence is always complete unless a pending schedule disable is + * outstanding. We arm the fence here and complete it when we receive + * the pending schedule disable complete message. + */ + i915_sw_fence_fini(&ce->guc_blocked); + i915_sw_fence_reinit(&ce->guc_blocked); + i915_sw_fence_await(&ce->guc_blocked); + i915_sw_fence_commit(&ce->guc_blocked); +} + +static u16 prep_context_pending_disable(struct intel_context *ce) +{ + lockdep_assert_held(&ce->guc_state.lock); + + set_context_pending_disable(ce); + clr_context_enabled(ce); + guc_blocked_fence_reinit(ce); + intel_context_get(ce); + + return ce->guc_id; +} + +static struct i915_sw_fence *guc_context_block(struct intel_context *ce) +{ + struct intel_guc *guc = ce_to_guc(ce); + struct i915_sched_engine *sched_engine = ce->engine->sched_engine; + unsigned long flags; + struct intel_runtime_pm *runtime_pm = ce->engine->uncore->rpm; + intel_wakeref_t wakeref; + u16 guc_id; + bool enabled; + + spin_lock_irqsave(&ce->guc_state.lock, flags); + + /* + * Sync with submission path, increment before below changes to context + * state. + */ + spin_lock(&sched_engine->lock); + incr_context_blocked(ce); + spin_unlock(&sched_engine->lock); + + enabled = context_enabled(ce); + if (unlikely(!enabled || submission_disabled(guc))) { + if (enabled) + clr_context_enabled(ce); + spin_unlock_irqrestore(&ce->guc_state.lock, flags); + return &ce->guc_blocked; + } + + /* + * We add +2 here as the schedule disable complete CTB handler calls + * intel_context_sched_disable_unpin (-2 to pin_count). + */ + atomic_add(2, &ce->pin_count); + + guc_id = prep_context_pending_disable(ce); + + spin_unlock_irqrestore(&ce->guc_state.lock, flags); + + with_intel_runtime_pm(runtime_pm, wakeref) + __guc_context_sched_disable(guc, ce, guc_id); + + return &ce->guc_blocked; +} + +static void guc_context_unblock(struct intel_context *ce) +{ + struct intel_guc *guc = ce_to_guc(ce); + struct i915_sched_engine *sched_engine = ce->engine->sched_engine; + unsigned long flags; + struct intel_runtime_pm *runtime_pm = ce->engine->uncore->rpm; + intel_wakeref_t wakeref; + bool enable; + + GEM_BUG_ON(context_enabled(ce)); + + spin_lock_irqsave(&ce->guc_state.lock, flags); + + if (unlikely(submission_disabled(guc) || + !intel_context_is_pinned(ce) || + context_pending_disable(ce) || + context_blocked(ce) > 1)) { + enable = false; + } else { + enable = true; + set_context_pending_enable(ce); + set_context_enabled(ce); + intel_context_get(ce); + } + + /* + * Sync with submission path, decrement after above changes to context + * state. + */ + spin_lock(&sched_engine->lock); + decr_context_blocked(ce); + spin_unlock(&sched_engine->lock); + + spin_unlock_irqrestore(&ce->guc_state.lock, flags); + + if (enable) { + with_intel_runtime_pm(runtime_pm, wakeref) + __guc_context_sched_enable(guc, ce); + } +} + +static void guc_context_cancel_request(struct intel_context *ce, + struct i915_request *rq) +{ + if (i915_sw_fence_signaled(&rq->submit)) { + struct i915_sw_fence *fence = guc_context_block(ce); + + i915_sw_fence_wait(fence); + if (!i915_request_completed(rq)) { + __i915_request_skip(rq); + guc_reset_state(ce, intel_ring_wrap(ce->ring, rq->head), + true); + } + guc_context_unblock(ce); + } +} + +static void __guc_context_set_preemption_timeout(struct intel_guc *guc, + u16 guc_id, + u32 preemption_timeout) +{ + u32 action[] = { + INTEL_GUC_ACTION_SET_CONTEXT_PREEMPTION_TIMEOUT, + guc_id, + preemption_timeout + }; + + intel_guc_send_busy_loop(guc, action, ARRAY_SIZE(action), 0, true); +} + +static void guc_context_ban(struct intel_context *ce, struct i915_request *rq) +{ + struct intel_guc *guc = ce_to_guc(ce); + struct intel_runtime_pm *runtime_pm = + &ce->engine->gt->i915->runtime_pm; + intel_wakeref_t wakeref; + unsigned long flags; + + guc_flush_submissions(guc); + + spin_lock_irqsave(&ce->guc_state.lock, flags); + set_context_banned(ce); + + if (submission_disabled(guc) || + (!context_enabled(ce) && !context_pending_disable(ce))) { + spin_unlock_irqrestore(&ce->guc_state.lock, flags); + + guc_cancel_context_requests(ce); + intel_engine_signal_breadcrumbs(ce->engine); + } else if (!context_pending_disable(ce)) { + u16 guc_id; + + /* + * We add +2 here as the schedule disable complete CTB handler + * calls intel_context_sched_disable_unpin (-2 to pin_count). + */ + atomic_add(2, &ce->pin_count); + + guc_id = prep_context_pending_disable(ce); + spin_unlock_irqrestore(&ce->guc_state.lock, flags); + + /* + * In addition to disabling scheduling, set the preemption + * timeout to the minimum value (1 us) so the banned context + * gets kicked off the HW ASAP. + */ + with_intel_runtime_pm(runtime_pm, wakeref) { + __guc_context_set_preemption_timeout(guc, guc_id, 1); + __guc_context_sched_disable(guc, ce, guc_id); + } + } else { + if (!context_guc_id_invalid(ce)) + with_intel_runtime_pm(runtime_pm, wakeref) + __guc_context_set_preemption_timeout(guc, + ce->guc_id, + 1); + spin_unlock_irqrestore(&ce->guc_state.lock, flags); + } +} + +static void guc_context_sched_disable(struct intel_context *ce) +{ + struct intel_guc *guc = ce_to_guc(ce); + unsigned long flags; + struct intel_runtime_pm *runtime_pm = &ce->engine->gt->i915->runtime_pm; + intel_wakeref_t wakeref; + u16 guc_id; + bool enabled; + + if (submission_disabled(guc) || context_guc_id_invalid(ce) || + !lrc_desc_registered(guc, ce->guc_id)) { + clr_context_enabled(ce); + goto unpin; + } + + if (!context_enabled(ce)) + goto unpin; + + spin_lock_irqsave(&ce->guc_state.lock, flags); + + /* + * We have to check if the context has been disabled by another thread. + * We also have to check if the context has been pinned again as another + * pin operation is allowed to pass this function. Checking the pin + * count, within ce->guc_state.lock, synchronizes this function with + * guc_request_alloc ensuring a request doesn't slip through the + * 'context_pending_disable' fence. Checking within the spin lock (can't + * sleep) ensures another process doesn't pin this context and generate + * a request before we set the 'context_pending_disable' flag here. + */ + enabled = context_enabled(ce); + if (unlikely(!enabled || submission_disabled(guc))) { + if (enabled) + clr_context_enabled(ce); + spin_unlock_irqrestore(&ce->guc_state.lock, flags); + goto unpin; + } + if (unlikely(atomic_add_unless(&ce->pin_count, -2, 2))) { + spin_unlock_irqrestore(&ce->guc_state.lock, flags); + return; + } + guc_id = prep_context_pending_disable(ce); + + spin_unlock_irqrestore(&ce->guc_state.lock, flags); + + with_intel_runtime_pm(runtime_pm, wakeref) + __guc_context_sched_disable(guc, ce, guc_id); + + return; +unpin: + intel_context_sched_disable_unpin(ce); +} + +static inline void guc_lrc_desc_unpin(struct intel_context *ce) +{ + struct intel_guc *guc = ce_to_guc(ce); + + GEM_BUG_ON(!lrc_desc_registered(guc, ce->guc_id)); + GEM_BUG_ON(ce != __get_context(guc, ce->guc_id)); + GEM_BUG_ON(context_enabled(ce)); + + clr_context_registered(ce); + deregister_context(ce, ce->guc_id, true); +} + +static void __guc_context_destroy(struct intel_context *ce) +{ + GEM_BUG_ON(ce->guc_prio_count[GUC_CLIENT_PRIORITY_KMD_HIGH] || + ce->guc_prio_count[GUC_CLIENT_PRIORITY_HIGH] || + ce->guc_prio_count[GUC_CLIENT_PRIORITY_KMD_NORMAL] || + ce->guc_prio_count[GUC_CLIENT_PRIORITY_NORMAL]); + + lrc_fini(ce); + intel_context_fini(ce); + + if (intel_engine_is_virtual(ce->engine)) { + struct guc_virtual_engine *ve = + container_of(ce, typeof(*ve), context); + + if (ve->base.breadcrumbs) + intel_breadcrumbs_put(ve->base.breadcrumbs); + + kfree(ve); + } else { + intel_context_free(ce); + } +} + +static void guc_context_destroy(struct kref *kref) +{ + struct intel_context *ce = container_of(kref, typeof(*ce), ref); + struct intel_runtime_pm *runtime_pm = ce->engine->uncore->rpm; + struct intel_guc *guc = ce_to_guc(ce); + intel_wakeref_t wakeref; + unsigned long flags; + bool disabled; + + /* + * If the guc_id is invalid this context has been stolen and we can free + * it immediately. Also can be freed immediately if the context is not + * registered with the GuC or the GuC is in the middle of a reset. + */ + if (context_guc_id_invalid(ce)) { + __guc_context_destroy(ce); + return; + } else if (submission_disabled(guc) || + !lrc_desc_registered(guc, ce->guc_id)) { + release_guc_id(guc, ce); + __guc_context_destroy(ce); + return; + } + + /* + * We have to acquire the context spinlock and check guc_id again, if it + * is valid it hasn't been stolen and needs to be deregistered. We + * delete this context from the list of unpinned guc_ids available to + * steal to seal a race with guc_lrc_desc_pin(). When the G2H CTB + * returns indicating this context has been deregistered the guc_id is + * returned to the pool of available guc_ids. + */ + spin_lock_irqsave(&guc->contexts_lock, flags); + if (context_guc_id_invalid(ce)) { + spin_unlock_irqrestore(&guc->contexts_lock, flags); + __guc_context_destroy(ce); + return; + } + + if (!list_empty(&ce->guc_id_link)) + list_del_init(&ce->guc_id_link); + spin_unlock_irqrestore(&guc->contexts_lock, flags); + + /* Seal race with Reset */ + spin_lock_irqsave(&ce->guc_state.lock, flags); + disabled = submission_disabled(guc); + if (likely(!disabled)) + set_context_destroyed(ce); + spin_unlock_irqrestore(&ce->guc_state.lock, flags); + if (unlikely(disabled)) { + release_guc_id(guc, ce); + __guc_context_destroy(ce); + return; + } + + /* + * We defer GuC context deregistration until the context is destroyed + * in order to save on CTBs. With this optimization ideally we only need + * 1 CTB to register the context during the first pin and 1 CTB to + * deregister the context when the context is destroyed. Without this + * optimization, a CTB would be needed every pin & unpin. + * + * XXX: Need to acqiure the runtime wakeref as this can be triggered + * from context_free_worker when runtime wakeref is not held. + * guc_lrc_desc_unpin requires the runtime as a GuC register is written + * in H2G CTB to deregister the context. A future patch may defer this + * H2G CTB if the runtime wakeref is zero. + */ + with_intel_runtime_pm(runtime_pm, wakeref) + guc_lrc_desc_unpin(ce); +} + +static int guc_context_alloc(struct intel_context *ce) +{ + return lrc_alloc(ce, ce->engine); +} + +static void guc_context_set_prio(struct intel_guc *guc, + struct intel_context *ce, + u8 prio) +{ + u32 action[] = { + INTEL_GUC_ACTION_SET_CONTEXT_PRIORITY, + ce->guc_id, + prio, + }; + + GEM_BUG_ON(prio < GUC_CLIENT_PRIORITY_KMD_HIGH || + prio > GUC_CLIENT_PRIORITY_NORMAL); + + if (ce->guc_prio == prio || submission_disabled(guc) || + !context_registered(ce)) + return; + + guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action), 0, true); + + ce->guc_prio = prio; + trace_intel_context_set_prio(ce); +} + +static inline u8 map_i915_prio_to_guc_prio(int prio) +{ + if (prio == I915_PRIORITY_NORMAL) + return GUC_CLIENT_PRIORITY_KMD_NORMAL; + else if (prio < I915_PRIORITY_NORMAL) + return GUC_CLIENT_PRIORITY_NORMAL; + else if (prio < I915_PRIORITY_DISPLAY) + return GUC_CLIENT_PRIORITY_HIGH; + else + return GUC_CLIENT_PRIORITY_KMD_HIGH; +} + +static inline void add_context_inflight_prio(struct intel_context *ce, + u8 guc_prio) +{ + lockdep_assert_held(&ce->guc_active.lock); + GEM_BUG_ON(guc_prio >= ARRAY_SIZE(ce->guc_prio_count)); + + ++ce->guc_prio_count[guc_prio]; + + /* Overflow protection */ + GEM_WARN_ON(!ce->guc_prio_count[guc_prio]); +} + +static inline void sub_context_inflight_prio(struct intel_context *ce, + u8 guc_prio) +{ + lockdep_assert_held(&ce->guc_active.lock); + GEM_BUG_ON(guc_prio >= ARRAY_SIZE(ce->guc_prio_count)); + + /* Underflow protection */ + GEM_WARN_ON(!ce->guc_prio_count[guc_prio]); + + --ce->guc_prio_count[guc_prio]; +} + +static inline void update_context_prio(struct intel_context *ce) +{ + struct intel_guc *guc = &ce->engine->gt->uc.guc; + int i; + + BUILD_BUG_ON(GUC_CLIENT_PRIORITY_KMD_HIGH != 0); + BUILD_BUG_ON(GUC_CLIENT_PRIORITY_KMD_HIGH > GUC_CLIENT_PRIORITY_NORMAL); + + lockdep_assert_held(&ce->guc_active.lock); + + for (i = 0; i < ARRAY_SIZE(ce->guc_prio_count); ++i) { + if (ce->guc_prio_count[i]) { + guc_context_set_prio(guc, ce, i); + break; + } + } +} + +static inline bool new_guc_prio_higher(u8 old_guc_prio, u8 new_guc_prio) +{ + /* Lower value is higher priority */ + return new_guc_prio < old_guc_prio; +} + +static void add_to_context(struct i915_request *rq) +{ + struct intel_context *ce = rq->context; + u8 new_guc_prio = map_i915_prio_to_guc_prio(rq_prio(rq)); + + GEM_BUG_ON(rq->guc_prio == GUC_PRIO_FINI); + + spin_lock(&ce->guc_active.lock); + list_move_tail(&rq->sched.link, &ce->guc_active.requests); + + if (rq->guc_prio == GUC_PRIO_INIT) { + rq->guc_prio = new_guc_prio; + add_context_inflight_prio(ce, rq->guc_prio); + } else if (new_guc_prio_higher(rq->guc_prio, new_guc_prio)) { + sub_context_inflight_prio(ce, rq->guc_prio); + rq->guc_prio = new_guc_prio; + add_context_inflight_prio(ce, rq->guc_prio); + } + update_context_prio(ce); + + spin_unlock(&ce->guc_active.lock); +} + +static void guc_prio_fini(struct i915_request *rq, struct intel_context *ce) +{ + lockdep_assert_held(&ce->guc_active.lock); + + if (rq->guc_prio != GUC_PRIO_INIT && + rq->guc_prio != GUC_PRIO_FINI) { + sub_context_inflight_prio(ce, rq->guc_prio); + update_context_prio(ce); + } + rq->guc_prio = GUC_PRIO_FINI; +} + +static void remove_from_context(struct i915_request *rq) +{ + struct intel_context *ce = rq->context; + + spin_lock_irq(&ce->guc_active.lock); + + list_del_init(&rq->sched.link); + clear_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); + + /* Prevent further __await_execution() registering a cb, then flush */ + set_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags); + + guc_prio_fini(rq, ce); + + spin_unlock_irq(&ce->guc_active.lock); + + atomic_dec(&ce->guc_id_ref); + i915_request_notify_execute_cb_imm(rq); } static const struct intel_context_ops guc_context_ops = { @@ -454,28 +1976,71 @@ static const struct intel_context_ops guc_context_ops = { .pre_pin = guc_context_pre_pin, .pin = guc_context_pin, - .unpin = lrc_unpin, - .post_unpin = lrc_post_unpin, + .unpin = guc_context_unpin, + .post_unpin = guc_context_post_unpin, + + .ban = guc_context_ban, + + .cancel_request = guc_context_cancel_request, .enter = intel_context_enter_engine, .exit = intel_context_exit_engine, + .sched_disable = guc_context_sched_disable, + .reset = lrc_reset, - .destroy = lrc_destroy, + .destroy = guc_context_destroy, + + .create_virtual = guc_create_virtual, }; -static int guc_request_alloc(struct i915_request *request) +static void __guc_signal_context_fence(struct intel_context *ce) { + struct i915_request *rq; + + lockdep_assert_held(&ce->guc_state.lock); + + if (!list_empty(&ce->guc_state.fences)) + trace_intel_context_fence_release(ce); + + list_for_each_entry(rq, &ce->guc_state.fences, guc_fence_link) + i915_sw_fence_complete(&rq->submit); + + INIT_LIST_HEAD(&ce->guc_state.fences); +} + +static void guc_signal_context_fence(struct intel_context *ce) +{ + unsigned long flags; + + spin_lock_irqsave(&ce->guc_state.lock, flags); + clr_context_wait_for_deregister_to_register(ce); + __guc_signal_context_fence(ce); + spin_unlock_irqrestore(&ce->guc_state.lock, flags); +} + +static bool context_needs_register(struct intel_context *ce, bool new_guc_id) +{ + return (new_guc_id || test_bit(CONTEXT_LRCA_DIRTY, &ce->flags) || + !lrc_desc_registered(ce_to_guc(ce), ce->guc_id)) && + !submission_disabled(ce_to_guc(ce)); +} + +static int guc_request_alloc(struct i915_request *rq) +{ + struct intel_context *ce = rq->context; + struct intel_guc *guc = ce_to_guc(ce); + unsigned long flags; int ret; - GEM_BUG_ON(!intel_context_is_pinned(request->context)); + GEM_BUG_ON(!intel_context_is_pinned(rq->context)); /* * Flush enough space to reduce the likelihood of waiting after * we start building the request - in which case we will just * have to repeat work. */ - request->reserved_space += GUC_REQUEST_SIZE; + rq->reserved_space += GUC_REQUEST_SIZE; /* * Note that after this point, we have committed to using @@ -486,40 +2051,232 @@ static int guc_request_alloc(struct i915_request *request) */ /* Unconditionally invalidate GPU caches and TLBs. */ - ret = request->engine->emit_flush(request, EMIT_INVALIDATE); + ret = rq->engine->emit_flush(rq, EMIT_INVALIDATE); if (ret) return ret; - request->reserved_space -= GUC_REQUEST_SIZE; + rq->reserved_space -= GUC_REQUEST_SIZE; + + /* + * Call pin_guc_id here rather than in the pinning step as with + * dma_resv, contexts can be repeatedly pinned / unpinned trashing the + * guc_ids and creating horrible race conditions. This is especially bad + * when guc_ids are being stolen due to over subscription. By the time + * this function is reached, it is guaranteed that the guc_id will be + * persistent until the generated request is retired. Thus, sealing these + * race conditions. It is still safe to fail here if guc_ids are + * exhausted and return -EAGAIN to the user indicating that they can try + * again in the future. + * + * There is no need for a lock here as the timeline mutex ensures at + * most one context can be executing this code path at once. The + * guc_id_ref is incremented once for every request in flight and + * decremented on each retire. When it is zero, a lock around the + * increment (in pin_guc_id) is needed to seal a race with unpin_guc_id. + */ + if (atomic_add_unless(&ce->guc_id_ref, 1, 0)) + goto out; + + ret = pin_guc_id(guc, ce); /* returns 1 if new guc_id assigned */ + if (unlikely(ret < 0)) + return ret; + if (context_needs_register(ce, !!ret)) { + ret = guc_lrc_desc_pin(ce, true); + if (unlikely(ret)) { /* unwind */ + if (ret == -EPIPE) { + disable_submission(guc); + goto out; /* GPU will be reset */ + } + atomic_dec(&ce->guc_id_ref); + unpin_guc_id(guc, ce); + return ret; + } + } + + clear_bit(CONTEXT_LRCA_DIRTY, &ce->flags); + +out: + /* + * We block all requests on this context if a G2H is pending for a + * schedule disable or context deregistration as the GuC will fail a + * schedule enable or context registration if either G2H is pending + * respectfully. Once a G2H returns, the fence is released that is + * blocking these requests (see guc_signal_context_fence). + * + * We can safely check the below fields outside of the lock as it isn't + * possible for these fields to transition from being clear to set but + * converse is possible, hence the need for the check within the lock. + */ + if (likely(!context_wait_for_deregister_to_register(ce) && + !context_pending_disable(ce))) + return 0; + + spin_lock_irqsave(&ce->guc_state.lock, flags); + if (context_wait_for_deregister_to_register(ce) || + context_pending_disable(ce)) { + i915_sw_fence_await(&rq->submit); + + list_add_tail(&rq->guc_fence_link, &ce->guc_state.fences); + } + spin_unlock_irqrestore(&ce->guc_state.lock, flags); + return 0; } -static inline void queue_request(struct intel_engine_cs *engine, - struct i915_request *rq, - int prio) +static int guc_virtual_context_pre_pin(struct intel_context *ce, + struct i915_gem_ww_ctx *ww, + void **vaddr) { - GEM_BUG_ON(!list_empty(&rq->sched.link)); - list_add_tail(&rq->sched.link, - i915_sched_lookup_priolist(engine, prio)); - set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); + struct intel_engine_cs *engine = guc_virtual_get_sibling(ce->engine, 0); + + return __guc_context_pre_pin(ce, engine, ww, vaddr); } -static void guc_submit_request(struct i915_request *rq) +static int guc_virtual_context_pin(struct intel_context *ce, void *vaddr) { - struct intel_engine_cs *engine = rq->engine; - unsigned long flags; + struct intel_engine_cs *engine = guc_virtual_get_sibling(ce->engine, 0); - /* Will be called from irq-context when using foreign fences. */ - spin_lock_irqsave(&engine->active.lock, flags); + return __guc_context_pin(ce, engine, vaddr); +} - queue_request(engine, rq, rq_prio(rq)); +static void guc_virtual_context_enter(struct intel_context *ce) +{ + intel_engine_mask_t tmp, mask = ce->engine->mask; + struct intel_engine_cs *engine; - GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root)); - GEM_BUG_ON(list_empty(&rq->sched.link)); + for_each_engine_masked(engine, ce->engine->gt, mask, tmp) + intel_engine_pm_get(engine); - tasklet_hi_schedule(&engine->execlists.tasklet); + intel_timeline_enter(ce->timeline); +} + +static void guc_virtual_context_exit(struct intel_context *ce) +{ + intel_engine_mask_t tmp, mask = ce->engine->mask; + struct intel_engine_cs *engine; - spin_unlock_irqrestore(&engine->active.lock, flags); + for_each_engine_masked(engine, ce->engine->gt, mask, tmp) + intel_engine_pm_put(engine); + + intel_timeline_exit(ce->timeline); +} + +static int guc_virtual_context_alloc(struct intel_context *ce) +{ + struct intel_engine_cs *engine = guc_virtual_get_sibling(ce->engine, 0); + + return lrc_alloc(ce, engine); +} + +static const struct intel_context_ops virtual_guc_context_ops = { + .alloc = guc_virtual_context_alloc, + + .pre_pin = guc_virtual_context_pre_pin, + .pin = guc_virtual_context_pin, + .unpin = guc_context_unpin, + .post_unpin = guc_context_post_unpin, + + .ban = guc_context_ban, + + .cancel_request = guc_context_cancel_request, + + .enter = guc_virtual_context_enter, + .exit = guc_virtual_context_exit, + + .sched_disable = guc_context_sched_disable, + + .destroy = guc_context_destroy, + + .get_sibling = guc_virtual_get_sibling, +}; + +static bool +guc_irq_enable_breadcrumbs(struct intel_breadcrumbs *b) +{ + struct intel_engine_cs *sibling; + intel_engine_mask_t tmp, mask = b->engine_mask; + bool result = false; + + for_each_engine_masked(sibling, b->irq_engine->gt, mask, tmp) + result |= intel_engine_irq_enable(sibling); + + return result; +} + +static void +guc_irq_disable_breadcrumbs(struct intel_breadcrumbs *b) +{ + struct intel_engine_cs *sibling; + intel_engine_mask_t tmp, mask = b->engine_mask; + + for_each_engine_masked(sibling, b->irq_engine->gt, mask, tmp) + intel_engine_irq_disable(sibling); +} + +static void guc_init_breadcrumbs(struct intel_engine_cs *engine) +{ + int i; + + /* + * In GuC submission mode we do not know which physical engine a request + * will be scheduled on, this creates a problem because the breadcrumb + * interrupt is per physical engine. To work around this we attach + * requests and direct all breadcrumb interrupts to the first instance + * of an engine per class. In addition all breadcrumb interrupts are + * enabled / disabled across an engine class in unison. + */ + for (i = 0; i < MAX_ENGINE_INSTANCE; ++i) { + struct intel_engine_cs *sibling = + engine->gt->engine_class[engine->class][i]; + + if (sibling) { + if (engine->breadcrumbs != sibling->breadcrumbs) { + intel_breadcrumbs_put(engine->breadcrumbs); + engine->breadcrumbs = + intel_breadcrumbs_get(sibling->breadcrumbs); + } + break; + } + } + + if (engine->breadcrumbs) { + engine->breadcrumbs->engine_mask |= engine->mask; + engine->breadcrumbs->irq_enable = guc_irq_enable_breadcrumbs; + engine->breadcrumbs->irq_disable = guc_irq_disable_breadcrumbs; + } +} + +static void guc_bump_inflight_request_prio(struct i915_request *rq, + int prio) +{ + struct intel_context *ce = rq->context; + u8 new_guc_prio = map_i915_prio_to_guc_prio(prio); + + /* Short circuit function */ + if (prio < I915_PRIORITY_NORMAL || + rq->guc_prio == GUC_PRIO_FINI || + (rq->guc_prio != GUC_PRIO_INIT && + !new_guc_prio_higher(rq->guc_prio, new_guc_prio))) + return; + + spin_lock(&ce->guc_active.lock); + if (rq->guc_prio != GUC_PRIO_FINI) { + if (rq->guc_prio != GUC_PRIO_INIT) + sub_context_inflight_prio(ce, rq->guc_prio); + rq->guc_prio = new_guc_prio; + add_context_inflight_prio(ce, rq->guc_prio); + update_context_prio(ce); + } + spin_unlock(&ce->guc_active.lock); +} + +static void guc_retire_inflight_request_prio(struct i915_request *rq) +{ + struct intel_context *ce = rq->context; + + spin_lock(&ce->guc_active.lock); + guc_prio_fini(rq, ce); + spin_unlock(&ce->guc_active.lock); } static void sanitize_hwsp(struct intel_engine_cs *engine) @@ -588,21 +2345,68 @@ static int guc_resume(struct intel_engine_cs *engine) return 0; } +static bool guc_sched_engine_disabled(struct i915_sched_engine *sched_engine) +{ + return !sched_engine->tasklet.callback; +} + static void guc_set_default_submission(struct intel_engine_cs *engine) { engine->submit_request = guc_submit_request; } +static inline void guc_kernel_context_pin(struct intel_guc *guc, + struct intel_context *ce) +{ + if (context_guc_id_invalid(ce)) + pin_guc_id(guc, ce); + guc_lrc_desc_pin(ce, true); +} + +static inline void guc_init_lrc_mapping(struct intel_guc *guc) +{ + struct intel_gt *gt = guc_to_gt(guc); + struct intel_engine_cs *engine; + enum intel_engine_id id; + + /* make sure all descriptors are clean... */ + xa_destroy(&guc->context_lookup); + + /* + * Some contexts might have been pinned before we enabled GuC + * submission, so we need to add them to the GuC bookeeping. + * Also, after a reset the of the GuC we want to make sure that the + * information shared with GuC is properly reset. The kernel LRCs are + * not attached to the gem_context, so they need to be added separately. + * + * Note: we purposefully do not check the return of guc_lrc_desc_pin, + * because that function can only fail if a reset is just starting. This + * is at the end of reset so presumably another reset isn't happening + * and even it did this code would be run again. + */ + + for_each_engine(engine, gt, id) + if (engine->kernel_context) + guc_kernel_context_pin(guc, engine->kernel_context); +} + static void guc_release(struct intel_engine_cs *engine) { engine->sanitize = NULL; /* no longer in control, nothing to sanitize */ - tasklet_kill(&engine->execlists.tasklet); - intel_engine_cleanup_common(engine); lrc_fini_wa_ctx(engine); } +static void virtual_guc_bump_serial(struct intel_engine_cs *engine) +{ + struct intel_engine_cs *e; + intel_engine_mask_t tmp, mask = engine->mask; + + for_each_engine_masked(e, engine->gt, mask, tmp) + e->serial++; +} + static void guc_default_vfuncs(struct intel_engine_cs *engine) { /* Default vfuncs which can be overridden by each engine. */ @@ -611,13 +2415,15 @@ static void guc_default_vfuncs(struct intel_engine_cs *engine) engine->cops = &guc_context_ops; engine->request_alloc = guc_request_alloc; + engine->add_active_request = add_to_context; + engine->remove_active_request = remove_from_context; - engine->schedule = i915_schedule; + engine->sched_engine->schedule = i915_schedule; - engine->reset.prepare = guc_reset_prepare; - engine->reset.rewind = guc_reset_rewind; - engine->reset.cancel = guc_reset_cancel; - engine->reset.finish = guc_reset_finish; + engine->reset.prepare = guc_reset_nop; + engine->reset.rewind = guc_rewind_nop; + engine->reset.cancel = guc_reset_nop; + engine->reset.finish = guc_reset_nop; engine->emit_flush = gen8_emit_flush_xcs; engine->emit_init_breadcrumb = gen8_emit_init_breadcrumb; @@ -629,13 +2435,13 @@ static void guc_default_vfuncs(struct intel_engine_cs *engine) engine->set_default_submission = guc_set_default_submission; engine->flags |= I915_ENGINE_HAS_PREEMPTION; + engine->flags |= I915_ENGINE_HAS_TIMESLICES; /* * TODO: GuC supports timeslicing and semaphores as well, but they're * handled by the firmware so some minor tweaks are required before * enabling. * - * engine->flags |= I915_ENGINE_HAS_TIMESLICES; * engine->flags |= I915_ENGINE_HAS_SEMAPHORES; */ @@ -666,9 +2472,21 @@ static inline void guc_default_irqs(struct intel_engine_cs *engine) intel_engine_set_irq_handler(engine, cs_irq_handler); } +static void guc_sched_engine_destroy(struct kref *kref) +{ + struct i915_sched_engine *sched_engine = + container_of(kref, typeof(*sched_engine), ref); + struct intel_guc *guc = sched_engine->private_data; + + guc->sched_engine = NULL; + tasklet_kill(&sched_engine->tasklet); /* flush the callback */ + kfree(sched_engine); +} + int intel_guc_submission_setup(struct intel_engine_cs *engine) { struct drm_i915_private *i915 = engine->i915; + struct intel_guc *guc = &engine->gt->uc.guc; /* * The setup relies on several assumptions (e.g. irqs always enabled) @@ -676,10 +2494,28 @@ int intel_guc_submission_setup(struct intel_engine_cs *engine) */ GEM_BUG_ON(GRAPHICS_VER(i915) < 11); - tasklet_setup(&engine->execlists.tasklet, guc_submission_tasklet); + if (!guc->sched_engine) { + guc->sched_engine = i915_sched_engine_create(ENGINE_VIRTUAL); + if (!guc->sched_engine) + return -ENOMEM; + + guc->sched_engine->schedule = i915_schedule; + guc->sched_engine->disabled = guc_sched_engine_disabled; + guc->sched_engine->private_data = guc; + guc->sched_engine->destroy = guc_sched_engine_destroy; + guc->sched_engine->bump_inflight_request_prio = + guc_bump_inflight_request_prio; + guc->sched_engine->retire_inflight_request_prio = + guc_retire_inflight_request_prio; + tasklet_setup(&guc->sched_engine->tasklet, + guc_submission_tasklet); + } + i915_sched_engine_put(engine->sched_engine); + engine->sched_engine = i915_sched_engine_get(guc->sched_engine); guc_default_vfuncs(engine); guc_default_irqs(engine); + guc_init_breadcrumbs(engine); if (engine->class == RENDER_CLASS) rcs_submission_override(engine); @@ -695,18 +2531,19 @@ int intel_guc_submission_setup(struct intel_engine_cs *engine) void intel_guc_submission_enable(struct intel_guc *guc) { - guc_stage_desc_init(guc); + guc_init_lrc_mapping(guc); } void intel_guc_submission_disable(struct intel_guc *guc) { - struct intel_gt *gt = guc_to_gt(guc); - - GEM_BUG_ON(gt->awake); /* GT should be parked first */ - /* Note: By the time we're here, GuC may have already been reset */ +} - guc_stage_desc_fini(guc); +static bool __guc_submission_supported(struct intel_guc *guc) +{ + /* GuC submission is unavailable for pre-Gen11 */ + return intel_guc_is_supported(guc) && + GRAPHICS_VER(guc_to_gt(guc)->i915) >= 11; } static bool __guc_submission_selected(struct intel_guc *guc) @@ -721,5 +2558,481 @@ static bool __guc_submission_selected(struct intel_guc *guc) void intel_guc_submission_init_early(struct intel_guc *guc) { + guc->submission_supported = __guc_submission_supported(guc); guc->submission_selected = __guc_submission_selected(guc); } + +static inline struct intel_context * +g2h_context_lookup(struct intel_guc *guc, u32 desc_idx) +{ + struct intel_context *ce; + + if (unlikely(desc_idx >= GUC_MAX_LRC_DESCRIPTORS)) { + drm_err(&guc_to_gt(guc)->i915->drm, + "Invalid desc_idx %u", desc_idx); + return NULL; + } + + ce = __get_context(guc, desc_idx); + if (unlikely(!ce)) { + drm_err(&guc_to_gt(guc)->i915->drm, + "Context is NULL, desc_idx %u", desc_idx); + return NULL; + } + + return ce; +} + +static void decr_outstanding_submission_g2h(struct intel_guc *guc) +{ + if (atomic_dec_and_test(&guc->outstanding_submission_g2h)) + wake_up_all(&guc->ct.wq); +} + +int intel_guc_deregister_done_process_msg(struct intel_guc *guc, + const u32 *msg, + u32 len) +{ + struct intel_context *ce; + u32 desc_idx = msg[0]; + + if (unlikely(len < 1)) { + drm_err(&guc_to_gt(guc)->i915->drm, "Invalid length %u", len); + return -EPROTO; + } + + ce = g2h_context_lookup(guc, desc_idx); + if (unlikely(!ce)) + return -EPROTO; + + trace_intel_context_deregister_done(ce); + + if (context_wait_for_deregister_to_register(ce)) { + struct intel_runtime_pm *runtime_pm = + &ce->engine->gt->i915->runtime_pm; + intel_wakeref_t wakeref; + + /* + * Previous owner of this guc_id has been deregistered, now safe + * register this context. + */ + with_intel_runtime_pm(runtime_pm, wakeref) + register_context(ce, true); + guc_signal_context_fence(ce); + intel_context_put(ce); + } else if (context_destroyed(ce)) { + /* Context has been destroyed */ + release_guc_id(guc, ce); + __guc_context_destroy(ce); + } + + decr_outstanding_submission_g2h(guc); + + return 0; +} + +int intel_guc_sched_done_process_msg(struct intel_guc *guc, + const u32 *msg, + u32 len) +{ + struct intel_context *ce; + unsigned long flags; + u32 desc_idx = msg[0]; + + if (unlikely(len < 2)) { + drm_err(&guc_to_gt(guc)->i915->drm, "Invalid length %u", len); + return -EPROTO; + } + + ce = g2h_context_lookup(guc, desc_idx); + if (unlikely(!ce)) + return -EPROTO; + + if (unlikely(context_destroyed(ce) || + (!context_pending_enable(ce) && + !context_pending_disable(ce)))) { + drm_err(&guc_to_gt(guc)->i915->drm, + "Bad context sched_state 0x%x, 0x%x, desc_idx %u", + atomic_read(&ce->guc_sched_state_no_lock), + ce->guc_state.sched_state, desc_idx); + return -EPROTO; + } + + trace_intel_context_sched_done(ce); + + if (context_pending_enable(ce)) { + clr_context_pending_enable(ce); + } else if (context_pending_disable(ce)) { + bool banned; + + /* + * Unpin must be done before __guc_signal_context_fence, + * otherwise a race exists between the requests getting + * submitted + retired before this unpin completes resulting in + * the pin_count going to zero and the context still being + * enabled. + */ + intel_context_sched_disable_unpin(ce); + + spin_lock_irqsave(&ce->guc_state.lock, flags); + banned = context_banned(ce); + clr_context_banned(ce); + clr_context_pending_disable(ce); + __guc_signal_context_fence(ce); + guc_blocked_fence_complete(ce); + spin_unlock_irqrestore(&ce->guc_state.lock, flags); + + if (banned) { + guc_cancel_context_requests(ce); + intel_engine_signal_breadcrumbs(ce->engine); + } + } + + decr_outstanding_submission_g2h(guc); + intel_context_put(ce); + + return 0; +} + +static void capture_error_state(struct intel_guc *guc, + struct intel_context *ce) +{ + struct intel_gt *gt = guc_to_gt(guc); + struct drm_i915_private *i915 = gt->i915; + struct intel_engine_cs *engine = __context_to_physical_engine(ce); + intel_wakeref_t wakeref; + + intel_engine_set_hung_context(engine, ce); + with_intel_runtime_pm(&i915->runtime_pm, wakeref) + i915_capture_error_state(gt, engine->mask); + atomic_inc(&i915->gpu_error.reset_engine_count[engine->uabi_class]); +} + +static void guc_context_replay(struct intel_context *ce) +{ + struct i915_sched_engine *sched_engine = ce->engine->sched_engine; + + __guc_reset_context(ce, true); + tasklet_hi_schedule(&sched_engine->tasklet); +} + +static void guc_handle_context_reset(struct intel_guc *guc, + struct intel_context *ce) +{ + trace_intel_context_reset(ce); + + if (likely(!intel_context_is_banned(ce))) { + capture_error_state(guc, ce); + guc_context_replay(ce); + } +} + +int intel_guc_context_reset_process_msg(struct intel_guc *guc, + const u32 *msg, u32 len) +{ + struct intel_context *ce; + int desc_idx; + + if (unlikely(len != 1)) { + drm_err(&guc_to_gt(guc)->i915->drm, "Invalid length %u", len); + return -EPROTO; + } + + desc_idx = msg[0]; + ce = g2h_context_lookup(guc, desc_idx); + if (unlikely(!ce)) + return -EPROTO; + + guc_handle_context_reset(guc, ce); + + return 0; +} + +static struct intel_engine_cs * +guc_lookup_engine(struct intel_guc *guc, u8 guc_class, u8 instance) +{ + struct intel_gt *gt = guc_to_gt(guc); + u8 engine_class = guc_class_to_engine_class(guc_class); + + /* Class index is checked in class converter */ + GEM_BUG_ON(instance > MAX_ENGINE_INSTANCE); + + return gt->engine_class[engine_class][instance]; +} + +int intel_guc_engine_failure_process_msg(struct intel_guc *guc, + const u32 *msg, u32 len) +{ + struct intel_engine_cs *engine; + u8 guc_class, instance; + u32 reason; + + if (unlikely(len != 3)) { + drm_err(&guc_to_gt(guc)->i915->drm, "Invalid length %u", len); + return -EPROTO; + } + + guc_class = msg[0]; + instance = msg[1]; + reason = msg[2]; + + engine = guc_lookup_engine(guc, guc_class, instance); + if (unlikely(!engine)) { + drm_err(&guc_to_gt(guc)->i915->drm, + "Invalid engine %d:%d", guc_class, instance); + return -EPROTO; + } + + intel_gt_handle_error(guc_to_gt(guc), engine->mask, + I915_ERROR_CAPTURE, + "GuC failed to reset %s (reason=0x%08x)\n", + engine->name, reason); + + return 0; +} + +void intel_guc_find_hung_context(struct intel_engine_cs *engine) +{ + struct intel_guc *guc = &engine->gt->uc.guc; + struct intel_context *ce; + struct i915_request *rq; + unsigned long index; + + /* Reset called during driver load? GuC not yet initialised! */ + if (unlikely(!guc_submission_initialized(guc))) + return; + + xa_for_each(&guc->context_lookup, index, ce) { + if (!intel_context_is_pinned(ce)) + continue; + + if (intel_engine_is_virtual(ce->engine)) { + if (!(ce->engine->mask & engine->mask)) + continue; + } else { + if (ce->engine != engine) + continue; + } + + list_for_each_entry(rq, &ce->guc_active.requests, sched.link) { + if (i915_test_request_state(rq) != I915_REQUEST_ACTIVE) + continue; + + intel_engine_set_hung_context(engine, ce); + + /* Can only cope with one hang at a time... */ + return; + } + } +} + +void intel_guc_dump_active_requests(struct intel_engine_cs *engine, + struct i915_request *hung_rq, + struct drm_printer *m) +{ + struct intel_guc *guc = &engine->gt->uc.guc; + struct intel_context *ce; + unsigned long index; + unsigned long flags; + + /* Reset called during driver load? GuC not yet initialised! */ + if (unlikely(!guc_submission_initialized(guc))) + return; + + xa_for_each(&guc->context_lookup, index, ce) { + if (!intel_context_is_pinned(ce)) + continue; + + if (intel_engine_is_virtual(ce->engine)) { + if (!(ce->engine->mask & engine->mask)) + continue; + } else { + if (ce->engine != engine) + continue; + } + + spin_lock_irqsave(&ce->guc_active.lock, flags); + intel_engine_dump_active_requests(&ce->guc_active.requests, + hung_rq, m); + spin_unlock_irqrestore(&ce->guc_active.lock, flags); + } +} + +void intel_guc_submission_print_info(struct intel_guc *guc, + struct drm_printer *p) +{ + struct i915_sched_engine *sched_engine = guc->sched_engine; + struct rb_node *rb; + unsigned long flags; + + if (!sched_engine) + return; + + drm_printf(p, "GuC Number Outstanding Submission G2H: %u\n", + atomic_read(&guc->outstanding_submission_g2h)); + drm_printf(p, "GuC tasklet count: %u\n\n", + atomic_read(&sched_engine->tasklet.count)); + + spin_lock_irqsave(&sched_engine->lock, flags); + drm_printf(p, "Requests in GuC submit tasklet:\n"); + for (rb = rb_first_cached(&sched_engine->queue); rb; rb = rb_next(rb)) { + struct i915_priolist *pl = to_priolist(rb); + struct i915_request *rq; + + priolist_for_each_request(rq, pl) + drm_printf(p, "guc_id=%u, seqno=%llu\n", + rq->context->guc_id, + rq->fence.seqno); + } + spin_unlock_irqrestore(&sched_engine->lock, flags); + drm_printf(p, "\n"); +} + +static inline void guc_log_context_priority(struct drm_printer *p, + struct intel_context *ce) +{ + int i; + + drm_printf(p, "\t\tPriority: %d\n", + ce->guc_prio); + drm_printf(p, "\t\tNumber Requests (lower index == higher priority)\n"); + for (i = GUC_CLIENT_PRIORITY_KMD_HIGH; + i < GUC_CLIENT_PRIORITY_NUM; ++i) { + drm_printf(p, "\t\tNumber requests in priority band[%d]: %d\n", + i, ce->guc_prio_count[i]); + } + drm_printf(p, "\n"); +} + +void intel_guc_submission_print_context_info(struct intel_guc *guc, + struct drm_printer *p) +{ + struct intel_context *ce; + unsigned long index; + + xa_for_each(&guc->context_lookup, index, ce) { + drm_printf(p, "GuC lrc descriptor %u:\n", ce->guc_id); + drm_printf(p, "\tHW Context Desc: 0x%08x\n", ce->lrc.lrca); + drm_printf(p, "\t\tLRC Head: Internal %u, Memory %u\n", + ce->ring->head, + ce->lrc_reg_state[CTX_RING_HEAD]); + drm_printf(p, "\t\tLRC Tail: Internal %u, Memory %u\n", + ce->ring->tail, + ce->lrc_reg_state[CTX_RING_TAIL]); + drm_printf(p, "\t\tContext Pin Count: %u\n", + atomic_read(&ce->pin_count)); + drm_printf(p, "\t\tGuC ID Ref Count: %u\n", + atomic_read(&ce->guc_id_ref)); + drm_printf(p, "\t\tSchedule State: 0x%x, 0x%x\n\n", + ce->guc_state.sched_state, + atomic_read(&ce->guc_sched_state_no_lock)); + + guc_log_context_priority(p, ce); + } +} + +static struct intel_context * +guc_create_virtual(struct intel_engine_cs **siblings, unsigned int count) +{ + struct guc_virtual_engine *ve; + struct intel_guc *guc; + unsigned int n; + int err; + + ve = kzalloc(sizeof(*ve), GFP_KERNEL); + if (!ve) + return ERR_PTR(-ENOMEM); + + guc = &siblings[0]->gt->uc.guc; + + ve->base.i915 = siblings[0]->i915; + ve->base.gt = siblings[0]->gt; + ve->base.uncore = siblings[0]->uncore; + ve->base.id = -1; + + ve->base.uabi_class = I915_ENGINE_CLASS_INVALID; + ve->base.instance = I915_ENGINE_CLASS_INVALID_VIRTUAL; + ve->base.uabi_instance = I915_ENGINE_CLASS_INVALID_VIRTUAL; + ve->base.saturated = ALL_ENGINES; + + snprintf(ve->base.name, sizeof(ve->base.name), "virtual"); + + ve->base.sched_engine = i915_sched_engine_get(guc->sched_engine); + + ve->base.cops = &virtual_guc_context_ops; + ve->base.request_alloc = guc_request_alloc; + ve->base.bump_serial = virtual_guc_bump_serial; + + ve->base.submit_request = guc_submit_request; + + ve->base.flags = I915_ENGINE_IS_VIRTUAL; + + intel_context_init(&ve->context, &ve->base); + + for (n = 0; n < count; n++) { + struct intel_engine_cs *sibling = siblings[n]; + + GEM_BUG_ON(!is_power_of_2(sibling->mask)); + if (sibling->mask & ve->base.mask) { + DRM_DEBUG("duplicate %s entry in load balancer\n", + sibling->name); + err = -EINVAL; + goto err_put; + } + + ve->base.mask |= sibling->mask; + + if (n != 0 && ve->base.class != sibling->class) { + DRM_DEBUG("invalid mixing of engine class, sibling %d, already %d\n", + sibling->class, ve->base.class); + err = -EINVAL; + goto err_put; + } else if (n == 0) { + ve->base.class = sibling->class; + ve->base.uabi_class = sibling->uabi_class; + snprintf(ve->base.name, sizeof(ve->base.name), + "v%dx%d", ve->base.class, count); + ve->base.context_size = sibling->context_size; + + ve->base.add_active_request = + sibling->add_active_request; + ve->base.remove_active_request = + sibling->remove_active_request; + ve->base.emit_bb_start = sibling->emit_bb_start; + ve->base.emit_flush = sibling->emit_flush; + ve->base.emit_init_breadcrumb = + sibling->emit_init_breadcrumb; + ve->base.emit_fini_breadcrumb = + sibling->emit_fini_breadcrumb; + ve->base.emit_fini_breadcrumb_dw = + sibling->emit_fini_breadcrumb_dw; + ve->base.breadcrumbs = + intel_breadcrumbs_get(sibling->breadcrumbs); + + ve->base.flags |= sibling->flags; + + ve->base.props.timeslice_duration_ms = + sibling->props.timeslice_duration_ms; + ve->base.props.preempt_timeout_ms = + sibling->props.preempt_timeout_ms; + } + } + + return &ve->context; + +err_put: + intel_context_put(&ve->context); + return ERR_PTR(err); +} + +bool intel_guc_virtual_engine_has_heartbeat(const struct intel_engine_cs *ve) +{ + struct intel_engine_cs *engine; + intel_engine_mask_t tmp, mask = ve->mask; + + for_each_engine_masked(engine, ve->gt, mask, tmp) + if (READ_ONCE(engine->props.heartbeat_interval_ms)) + return true; + + return false; +} diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.h index 3f7005018939..c7ef44fa0c36 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.h @@ -10,6 +10,7 @@ #include "intel_guc.h" +struct drm_printer; struct intel_engine_cs; void intel_guc_submission_init_early(struct intel_guc *guc); @@ -20,11 +21,24 @@ void intel_guc_submission_fini(struct intel_guc *guc); int intel_guc_preempt_work_create(struct intel_guc *guc); void intel_guc_preempt_work_destroy(struct intel_guc *guc); int intel_guc_submission_setup(struct intel_engine_cs *engine); +void intel_guc_submission_print_info(struct intel_guc *guc, + struct drm_printer *p); +void intel_guc_submission_print_context_info(struct intel_guc *guc, + struct drm_printer *p); +void intel_guc_dump_active_requests(struct intel_engine_cs *engine, + struct i915_request *hung_rq, + struct drm_printer *m); + +bool intel_guc_virtual_engine_has_heartbeat(const struct intel_engine_cs *ve); + +int intel_guc_wait_for_pending_msg(struct intel_guc *guc, + atomic_t *wait_var, + bool interruptible, + long timeout); static inline bool intel_guc_submission_is_supported(struct intel_guc *guc) { - /* XXX: GuC submission is unavailable for now */ - return false; + return guc->submission_supported; } static inline bool intel_guc_submission_is_wanted(struct intel_guc *guc) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.c b/drivers/gpu/drm/i915/gt/uc/intel_uc.c index 6d8b9233214e..b104fb7607eb 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.c @@ -34,8 +34,14 @@ static void uc_expand_default_options(struct intel_uc *uc) return; } - /* Default: enable HuC authentication only */ - i915->params.enable_guc = ENABLE_GUC_LOAD_HUC; + /* Intermediate platforms are HuC authentication only */ + if (IS_DG1(i915) || IS_ALDERLAKE_S(i915)) { + i915->params.enable_guc = ENABLE_GUC_LOAD_HUC; + return; + } + + /* Default: enable HuC authentication and GuC submission */ + i915->params.enable_guc = ENABLE_GUC_LOAD_HUC | ENABLE_GUC_SUBMISSION; } /* Reset GuC providing us with fresh state for both GuC and HuC. @@ -69,16 +75,18 @@ static void __confirm_options(struct intel_uc *uc) struct drm_i915_private *i915 = uc_to_gt(uc)->i915; drm_dbg(&i915->drm, - "enable_guc=%d (guc:%s submission:%s huc:%s)\n", + "enable_guc=%d (guc:%s submission:%s huc:%s slpc:%s)\n", i915->params.enable_guc, yesno(intel_uc_wants_guc(uc)), yesno(intel_uc_wants_guc_submission(uc)), - yesno(intel_uc_wants_huc(uc))); + yesno(intel_uc_wants_huc(uc)), + yesno(intel_uc_wants_guc_slpc(uc))); if (i915->params.enable_guc == 0) { GEM_BUG_ON(intel_uc_wants_guc(uc)); GEM_BUG_ON(intel_uc_wants_guc_submission(uc)); GEM_BUG_ON(intel_uc_wants_huc(uc)); + GEM_BUG_ON(intel_uc_wants_guc_slpc(uc)); return; } @@ -120,6 +128,11 @@ void intel_uc_init_early(struct intel_uc *uc) uc->ops = &uc_ops_off; } +void intel_uc_init_late(struct intel_uc *uc) +{ + intel_guc_init_late(&uc->guc); +} + void intel_uc_driver_late_release(struct intel_uc *uc) { } @@ -207,21 +220,6 @@ static void guc_handle_mmio_msg(struct intel_guc *guc) spin_unlock_irq(&guc->irq_lock); } -static void guc_reset_interrupts(struct intel_guc *guc) -{ - guc->interrupts.reset(guc); -} - -static void guc_enable_interrupts(struct intel_guc *guc) -{ - guc->interrupts.enable(guc); -} - -static void guc_disable_interrupts(struct intel_guc *guc) -{ - guc->interrupts.disable(guc); -} - static int guc_enable_communication(struct intel_guc *guc) { struct intel_gt *gt = guc_to_gt(guc); @@ -242,7 +240,7 @@ static int guc_enable_communication(struct intel_guc *guc) guc_get_mmio_msg(guc); guc_handle_mmio_msg(guc); - guc_enable_interrupts(guc); + intel_guc_enable_interrupts(guc); /* check for CT messages received before we enabled interrupts */ spin_lock_irq(>->irq_lock); @@ -265,7 +263,7 @@ static void guc_disable_communication(struct intel_guc *guc) */ guc_clear_mmio_msg(guc); - guc_disable_interrupts(guc); + intel_guc_disable_interrupts(guc); intel_guc_ct_disable(&guc->ct); @@ -323,9 +321,6 @@ static int __uc_init(struct intel_uc *uc) if (i915_inject_probe_failure(uc_to_gt(uc)->i915)) return -ENOMEM; - /* XXX: GuC submission is unavailable for now */ - GEM_BUG_ON(intel_uc_uses_guc_submission(uc)); - ret = intel_guc_init(guc); if (ret) return ret; @@ -463,7 +458,7 @@ static int __uc_init_hw(struct intel_uc *uc) if (ret) goto err_out; - guc_reset_interrupts(guc); + intel_guc_reset_interrupts(guc); /* WaEnableuKernelHeaderValidFix:skl */ /* WaEnableGuCBootHashCheckNotSet:skl,bxt,kbl */ @@ -505,12 +500,21 @@ static int __uc_init_hw(struct intel_uc *uc) if (intel_uc_uses_guc_submission(uc)) intel_guc_submission_enable(guc); + if (intel_uc_uses_guc_slpc(uc)) { + ret = intel_guc_slpc_enable(&guc->slpc); + if (ret) + goto err_submission; + } + drm_info(&i915->drm, "%s firmware %s version %u.%u %s:%s\n", intel_uc_fw_type_repr(INTEL_UC_FW_TYPE_GUC), guc->fw.path, guc->fw.major_ver_found, guc->fw.minor_ver_found, "submission", enableddisabled(intel_uc_uses_guc_submission(uc))); + drm_info(&i915->drm, "GuC SLPC: %s\n", + enableddisabled(intel_uc_uses_guc_slpc(uc))); + if (intel_uc_uses_huc(uc)) { drm_info(&i915->drm, "%s firmware %s version %u.%u %s:%s\n", intel_uc_fw_type_repr(INTEL_UC_FW_TYPE_HUC), @@ -525,6 +529,8 @@ static int __uc_init_hw(struct intel_uc *uc) /* * We've failed to load the firmware :( */ +err_submission: + intel_guc_submission_disable(guc); err_log_capture: __uc_capture_load_err_log(uc); err_out: @@ -565,23 +571,67 @@ void intel_uc_reset_prepare(struct intel_uc *uc) { struct intel_guc *guc = &uc->guc; - if (!intel_guc_is_ready(guc)) + uc->reset_in_progress = true; + + /* Nothing to do if GuC isn't supported */ + if (!intel_uc_supports_guc(uc)) return; + /* Firmware expected to be running when this function is called */ + if (!intel_guc_is_ready(guc)) + goto sanitize; + + if (intel_uc_uses_guc_submission(uc)) + intel_guc_submission_reset_prepare(guc); + +sanitize: __uc_sanitize(uc); } +void intel_uc_reset(struct intel_uc *uc, bool stalled) +{ + struct intel_guc *guc = &uc->guc; + + /* Firmware can not be running when this function is called */ + if (intel_uc_uses_guc_submission(uc)) + intel_guc_submission_reset(guc, stalled); +} + +void intel_uc_reset_finish(struct intel_uc *uc) +{ + struct intel_guc *guc = &uc->guc; + + uc->reset_in_progress = false; + + /* Firmware expected to be running when this function is called */ + if (intel_guc_is_fw_running(guc) && intel_uc_uses_guc_submission(uc)) + intel_guc_submission_reset_finish(guc); +} + +void intel_uc_cancel_requests(struct intel_uc *uc) +{ + struct intel_guc *guc = &uc->guc; + + /* Firmware can not be running when this function is called */ + if (intel_uc_uses_guc_submission(uc)) + intel_guc_submission_cancel_requests(guc); +} + void intel_uc_runtime_suspend(struct intel_uc *uc) { struct intel_guc *guc = &uc->guc; - int err; if (!intel_guc_is_ready(guc)) return; - err = intel_guc_suspend(guc); - if (err) - DRM_DEBUG_DRIVER("Failed to suspend GuC, err=%d", err); + /* + * Wait for any outstanding CTB before tearing down communication /w the + * GuC. + */ +#define OUTSTANDING_CTB_TIMEOUT_PERIOD (HZ / 5) + intel_guc_wait_for_pending_msg(guc, &guc->outstanding_submission_g2h, + false, OUTSTANDING_CTB_TIMEOUT_PERIOD); + GEM_WARN_ON(atomic_read(&guc->outstanding_submission_g2h)); guc_disable_communication(guc); } @@ -590,17 +640,22 @@ void intel_uc_suspend(struct intel_uc *uc) { struct intel_guc *guc = &uc->guc; intel_wakeref_t wakeref; + int err; if (!intel_guc_is_ready(guc)) return; - with_intel_runtime_pm(uc_to_gt(uc)->uncore->rpm, wakeref) - intel_uc_runtime_suspend(uc); + with_intel_runtime_pm(&uc_to_gt(uc)->i915->runtime_pm, wakeref) { + err = intel_guc_suspend(guc); + if (err) + DRM_DEBUG_DRIVER("Failed to suspend GuC, err=%d", err); + } } static int __uc_resume(struct intel_uc *uc, bool enable_communication) { struct intel_guc *guc = &uc->guc; + struct intel_gt *gt = guc_to_gt(guc); int err; if (!intel_guc_is_fw_running(guc)) @@ -612,6 +667,13 @@ static int __uc_resume(struct intel_uc *uc, bool enable_communication) if (enable_communication) guc_enable_communication(guc); + /* If we are only resuming GuC communication but not reloading + * GuC, we need to ensure the ARAT timer interrupt is enabled + * again. In case of GuC reload, it is enabled during SLPC enable. + */ + if (enable_communication && intel_uc_uses_guc_slpc(uc)) + intel_guc_pm_intrmsk_enable(gt); + err = intel_guc_resume(guc); if (err) { DRM_DEBUG_DRIVER("Failed to resume GuC, err=%d", err); diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.h b/drivers/gpu/drm/i915/gt/uc/intel_uc.h index 9c954c589edf..866b462821c0 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.h @@ -7,7 +7,9 @@ #define _INTEL_UC_H_ #include "intel_guc.h" +#include "intel_guc_rc.h" #include "intel_guc_submission.h" +#include "intel_guc_slpc.h" #include "intel_huc.h" #include "i915_params.h" @@ -30,13 +32,19 @@ struct intel_uc { /* Snapshot of GuC log from last failed load */ struct drm_i915_gem_object *load_err_log; + + bool reset_in_progress; }; void intel_uc_init_early(struct intel_uc *uc); +void intel_uc_init_late(struct intel_uc *uc); void intel_uc_driver_late_release(struct intel_uc *uc); void intel_uc_driver_remove(struct intel_uc *uc); void intel_uc_init_mmio(struct intel_uc *uc); void intel_uc_reset_prepare(struct intel_uc *uc); +void intel_uc_reset(struct intel_uc *uc, bool stalled); +void intel_uc_reset_finish(struct intel_uc *uc); +void intel_uc_cancel_requests(struct intel_uc *uc); void intel_uc_suspend(struct intel_uc *uc); void intel_uc_runtime_suspend(struct intel_uc *uc); int intel_uc_resume(struct intel_uc *uc); @@ -77,10 +85,17 @@ __uc_state_checker(x, func, uses, used) uc_state_checkers(guc, guc); uc_state_checkers(huc, huc); uc_state_checkers(guc, guc_submission); +uc_state_checkers(guc, guc_slpc); +uc_state_checkers(guc, guc_rc); #undef uc_state_checkers #undef __uc_state_checker +static inline int intel_uc_wait_for_idle(struct intel_uc *uc, long timeout) +{ + return intel_guc_wait_for_idle(&uc->guc, timeout); +} + #define intel_uc_ops_function(_NAME, _OPS, _TYPE, _RET) \ static inline _TYPE intel_uc_##_NAME(struct intel_uc *uc) \ { \ diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c index df647c9a8d56..3a16d08608a5 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c @@ -48,19 +48,20 @@ void intel_uc_fw_change_status(struct intel_uc_fw *uc_fw, * firmware as TGL. */ #define INTEL_UC_FIRMWARE_DEFS(fw_def, guc_def, huc_def) \ - fw_def(ALDERLAKE_S, 0, guc_def(tgl, 49, 0, 1), huc_def(tgl, 7, 5, 0)) \ - fw_def(ROCKETLAKE, 0, guc_def(tgl, 49, 0, 1), huc_def(tgl, 7, 5, 0)) \ - fw_def(TIGERLAKE, 0, guc_def(tgl, 49, 0, 1), huc_def(tgl, 7, 5, 0)) \ - fw_def(JASPERLAKE, 0, guc_def(ehl, 49, 0, 1), huc_def(ehl, 9, 0, 0)) \ - fw_def(ELKHARTLAKE, 0, guc_def(ehl, 49, 0, 1), huc_def(ehl, 9, 0, 0)) \ - fw_def(ICELAKE, 0, guc_def(icl, 49, 0, 1), huc_def(icl, 9, 0, 0)) \ - fw_def(COMETLAKE, 5, guc_def(cml, 49, 0, 1), huc_def(cml, 4, 0, 0)) \ - fw_def(COMETLAKE, 0, guc_def(kbl, 49, 0, 1), huc_def(kbl, 4, 0, 0)) \ - fw_def(COFFEELAKE, 0, guc_def(kbl, 49, 0, 1), huc_def(kbl, 4, 0, 0)) \ - fw_def(GEMINILAKE, 0, guc_def(glk, 49, 0, 1), huc_def(glk, 4, 0, 0)) \ - fw_def(KABYLAKE, 0, guc_def(kbl, 49, 0, 1), huc_def(kbl, 4, 0, 0)) \ - fw_def(BROXTON, 0, guc_def(bxt, 49, 0, 1), huc_def(bxt, 2, 0, 0)) \ - fw_def(SKYLAKE, 0, guc_def(skl, 49, 0, 1), huc_def(skl, 2, 0, 0)) + fw_def(ALDERLAKE_P, 0, guc_def(adlp, 62, 0, 3), huc_def(tgl, 7, 9, 3)) \ + fw_def(ALDERLAKE_S, 0, guc_def(tgl, 62, 0, 0), huc_def(tgl, 7, 9, 3)) \ + fw_def(ROCKETLAKE, 0, guc_def(tgl, 62, 0, 0), huc_def(tgl, 7, 9, 3)) \ + fw_def(TIGERLAKE, 0, guc_def(tgl, 62, 0, 0), huc_def(tgl, 7, 9, 3)) \ + fw_def(JASPERLAKE, 0, guc_def(ehl, 62, 0, 0), huc_def(ehl, 9, 0, 0)) \ + fw_def(ELKHARTLAKE, 0, guc_def(ehl, 62, 0, 0), huc_def(ehl, 9, 0, 0)) \ + fw_def(ICELAKE, 0, guc_def(icl, 62, 0, 0), huc_def(icl, 9, 0, 0)) \ + fw_def(COMETLAKE, 5, guc_def(cml, 62, 0, 0), huc_def(cml, 4, 0, 0)) \ + fw_def(COMETLAKE, 0, guc_def(kbl, 62, 0, 0), huc_def(kbl, 4, 0, 0)) \ + fw_def(COFFEELAKE, 0, guc_def(kbl, 62, 0, 0), huc_def(kbl, 4, 0, 0)) \ + fw_def(GEMINILAKE, 0, guc_def(glk, 62, 0, 0), huc_def(glk, 4, 0, 0)) \ + fw_def(KABYLAKE, 0, guc_def(kbl, 62, 0, 0), huc_def(kbl, 4, 0, 0)) \ + fw_def(BROXTON, 0, guc_def(bxt, 62, 0, 0), huc_def(bxt, 2, 0, 0)) \ + fw_def(SKYLAKE, 0, guc_def(skl, 62, 0, 0), huc_def(skl, 2, 0, 0)) #define __MAKE_UC_FW_PATH(prefix_, name_, major_, minor_, patch_) \ "i915/" \ diff --git a/drivers/gpu/drm/i915/gvt/gtt.c b/drivers/gpu/drm/i915/gvt/gtt.c index cc2c05e18206..e5c2fdfc20e3 100644 --- a/drivers/gpu/drm/i915/gvt/gtt.c +++ b/drivers/gpu/drm/i915/gvt/gtt.c @@ -1055,7 +1055,7 @@ static bool vgpu_ips_enabled(struct intel_vgpu *vgpu) { struct drm_i915_private *dev_priv = vgpu->gvt->gt->i915; - if (GRAPHICS_VER(dev_priv) == 9 || GRAPHICS_VER(dev_priv) == 10) { + if (GRAPHICS_VER(dev_priv) == 9) { u32 ips = vgpu_vreg_t(vgpu, GEN8_GAMW_ECO_DEV_RW_IA) & GAMW_ECO_ENABLE_64K_IPS_FIELD; diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c index 734c37c5e347..b56a8e37a3cd 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.c +++ b/drivers/gpu/drm/i915/gvt/scheduler.c @@ -1409,11 +1409,8 @@ int intel_vgpu_setup_submission(struct intel_vgpu *vgpu) intel_context_set_single_submission(ce); /* Max ring buffer size */ - if (!intel_uc_wants_guc_submission(&engine->gt->uc)) { - const unsigned int ring_size = 512 * SZ_4K; - - ce->ring = __intel_context_ring_size(ring_size); - } + if (!intel_uc_wants_guc_submission(&engine->gt->uc)) + ce->ring_size = SZ_2M; s->shadow[i] = ce; } diff --git a/drivers/gpu/drm/i915/i915_active.c b/drivers/gpu/drm/i915/i915_active.c index b1aa1c482c32..3103c1e1fd14 100644 --- a/drivers/gpu/drm/i915/i915_active.c +++ b/drivers/gpu/drm/i915/i915_active.c @@ -13,7 +13,6 @@ #include "i915_drv.h" #include "i915_active.h" -#include "i915_globals.h" /* * Active refs memory management @@ -22,10 +21,7 @@ * they idle (when we know the active requests are inactive) and allocate the * nodes from a local slab cache to hopefully reduce the fragmentation. */ -static struct i915_global_active { - struct i915_global base; - struct kmem_cache *slab_cache; -} global; +static struct kmem_cache *slab_cache; struct active_node { struct rb_node node; @@ -174,7 +170,7 @@ __active_retire(struct i915_active *ref) /* Finally free the discarded timeline tree */ rbtree_postorder_for_each_entry_safe(it, n, &root, node) { GEM_BUG_ON(i915_active_fence_isset(&it->base)); - kmem_cache_free(global.slab_cache, it); + kmem_cache_free(slab_cache, it); } } @@ -322,7 +318,7 @@ active_instance(struct i915_active *ref, u64 idx) * XXX: We should preallocate this before i915_active_ref() is ever * called, but we cannot call into fs_reclaim() anyway, so use GFP_ATOMIC. */ - node = kmem_cache_alloc(global.slab_cache, GFP_ATOMIC); + node = kmem_cache_alloc(slab_cache, GFP_ATOMIC); if (!node) goto out; @@ -788,7 +784,7 @@ void i915_active_fini(struct i915_active *ref) mutex_destroy(&ref->mutex); if (ref->cache) - kmem_cache_free(global.slab_cache, ref->cache); + kmem_cache_free(slab_cache, ref->cache); } static inline bool is_idle_barrier(struct active_node *node, u64 idx) @@ -908,7 +904,7 @@ int i915_active_acquire_preallocate_barrier(struct i915_active *ref, node = reuse_idle_barrier(ref, idx); rcu_read_unlock(); if (!node) { - node = kmem_cache_alloc(global.slab_cache, GFP_KERNEL); + node = kmem_cache_alloc(slab_cache, GFP_KERNEL); if (!node) goto unwind; @@ -956,7 +952,7 @@ unwind: atomic_dec(&ref->count); intel_engine_pm_put(barrier_to_engine(node)); - kmem_cache_free(global.slab_cache, node); + kmem_cache_free(slab_cache, node); } return -ENOMEM; } @@ -1176,27 +1172,16 @@ struct i915_active *i915_active_create(void) #include "selftests/i915_active.c" #endif -static void i915_global_active_shrink(void) +void i915_active_module_exit(void) { - kmem_cache_shrink(global.slab_cache); + kmem_cache_destroy(slab_cache); } -static void i915_global_active_exit(void) +int __init i915_active_module_init(void) { - kmem_cache_destroy(global.slab_cache); -} - -static struct i915_global_active global = { { - .shrink = i915_global_active_shrink, - .exit = i915_global_active_exit, -} }; - -int __init i915_global_active_init(void) -{ - global.slab_cache = KMEM_CACHE(active_node, SLAB_HWCACHE_ALIGN); - if (!global.slab_cache) + slab_cache = KMEM_CACHE(active_node, SLAB_HWCACHE_ALIGN); + if (!slab_cache) return -ENOMEM; - i915_global_register(&global.base); return 0; } diff --git a/drivers/gpu/drm/i915/i915_active.h b/drivers/gpu/drm/i915/i915_active.h index d0feda68b874..5fcdb0e2bc9e 100644 --- a/drivers/gpu/drm/i915/i915_active.h +++ b/drivers/gpu/drm/i915/i915_active.h @@ -247,4 +247,7 @@ static inline int __i915_request_await_exclusive(struct i915_request *rq, return err; } +void i915_active_module_exit(void); +int i915_active_module_init(void); + #endif /* _I915_ACTIVE_H_ */ diff --git a/drivers/gpu/drm/i915/i915_buddy.c b/drivers/gpu/drm/i915/i915_buddy.c new file mode 100644 index 000000000000..7b274c51cac0 --- /dev/null +++ b/drivers/gpu/drm/i915/i915_buddy.c @@ -0,0 +1,421 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2021 Intel Corporation + */ + +#include <linux/kmemleak.h> + +#include "i915_buddy.h" + +#include "i915_gem.h" +#include "i915_utils.h" + +static struct kmem_cache *slab_blocks; + +static struct i915_buddy_block *i915_block_alloc(struct i915_buddy_mm *mm, + struct i915_buddy_block *parent, + unsigned int order, + u64 offset) +{ + struct i915_buddy_block *block; + + GEM_BUG_ON(order > I915_BUDDY_MAX_ORDER); + + block = kmem_cache_zalloc(slab_blocks, GFP_KERNEL); + if (!block) + return NULL; + + block->header = offset; + block->header |= order; + block->parent = parent; + + GEM_BUG_ON(block->header & I915_BUDDY_HEADER_UNUSED); + return block; +} + +static void i915_block_free(struct i915_buddy_mm *mm, + struct i915_buddy_block *block) +{ + kmem_cache_free(slab_blocks, block); +} + +static void mark_allocated(struct i915_buddy_block *block) +{ + block->header &= ~I915_BUDDY_HEADER_STATE; + block->header |= I915_BUDDY_ALLOCATED; + + list_del(&block->link); +} + +static void mark_free(struct i915_buddy_mm *mm, + struct i915_buddy_block *block) +{ + block->header &= ~I915_BUDDY_HEADER_STATE; + block->header |= I915_BUDDY_FREE; + + list_add(&block->link, + &mm->free_list[i915_buddy_block_order(block)]); +} + +static void mark_split(struct i915_buddy_block *block) +{ + block->header &= ~I915_BUDDY_HEADER_STATE; + block->header |= I915_BUDDY_SPLIT; + + list_del(&block->link); +} + +int i915_buddy_init(struct i915_buddy_mm *mm, u64 size, u64 chunk_size) +{ + unsigned int i; + u64 offset; + + if (size < chunk_size) + return -EINVAL; + + if (chunk_size < PAGE_SIZE) + return -EINVAL; + + if (!is_power_of_2(chunk_size)) + return -EINVAL; + + size = round_down(size, chunk_size); + + mm->size = size; + mm->chunk_size = chunk_size; + mm->max_order = ilog2(size) - ilog2(chunk_size); + + GEM_BUG_ON(mm->max_order > I915_BUDDY_MAX_ORDER); + + mm->free_list = kmalloc_array(mm->max_order + 1, + sizeof(struct list_head), + GFP_KERNEL); + if (!mm->free_list) + return -ENOMEM; + + for (i = 0; i <= mm->max_order; ++i) + INIT_LIST_HEAD(&mm->free_list[i]); + + mm->n_roots = hweight64(size); + + mm->roots = kmalloc_array(mm->n_roots, + sizeof(struct i915_buddy_block *), + GFP_KERNEL); + if (!mm->roots) + goto out_free_list; + + offset = 0; + i = 0; + + /* + * Split into power-of-two blocks, in case we are given a size that is + * not itself a power-of-two. + */ + do { + struct i915_buddy_block *root; + unsigned int order; + u64 root_size; + + root_size = rounddown_pow_of_two(size); + order = ilog2(root_size) - ilog2(chunk_size); + + root = i915_block_alloc(mm, NULL, order, offset); + if (!root) + goto out_free_roots; + + mark_free(mm, root); + + GEM_BUG_ON(i > mm->max_order); + GEM_BUG_ON(i915_buddy_block_size(mm, root) < chunk_size); + + mm->roots[i] = root; + + offset += root_size; + size -= root_size; + i++; + } while (size); + + return 0; + +out_free_roots: + while (i--) + i915_block_free(mm, mm->roots[i]); + kfree(mm->roots); +out_free_list: + kfree(mm->free_list); + return -ENOMEM; +} + +void i915_buddy_fini(struct i915_buddy_mm *mm) +{ + int i; + + for (i = 0; i < mm->n_roots; ++i) { + GEM_WARN_ON(!i915_buddy_block_is_free(mm->roots[i])); + i915_block_free(mm, mm->roots[i]); + } + + kfree(mm->roots); + kfree(mm->free_list); +} + +static int split_block(struct i915_buddy_mm *mm, + struct i915_buddy_block *block) +{ + unsigned int block_order = i915_buddy_block_order(block) - 1; + u64 offset = i915_buddy_block_offset(block); + + GEM_BUG_ON(!i915_buddy_block_is_free(block)); + GEM_BUG_ON(!i915_buddy_block_order(block)); + + block->left = i915_block_alloc(mm, block, block_order, offset); + if (!block->left) + return -ENOMEM; + + block->right = i915_block_alloc(mm, block, block_order, + offset + (mm->chunk_size << block_order)); + if (!block->right) { + i915_block_free(mm, block->left); + return -ENOMEM; + } + + mark_free(mm, block->left); + mark_free(mm, block->right); + + mark_split(block); + + return 0; +} + +static struct i915_buddy_block * +get_buddy(struct i915_buddy_block *block) +{ + struct i915_buddy_block *parent; + + parent = block->parent; + if (!parent) + return NULL; + + if (parent->left == block) + return parent->right; + + return parent->left; +} + +static void __i915_buddy_free(struct i915_buddy_mm *mm, + struct i915_buddy_block *block) +{ + struct i915_buddy_block *parent; + + while ((parent = block->parent)) { + struct i915_buddy_block *buddy; + + buddy = get_buddy(block); + + if (!i915_buddy_block_is_free(buddy)) + break; + + list_del(&buddy->link); + + i915_block_free(mm, block); + i915_block_free(mm, buddy); + + block = parent; + } + + mark_free(mm, block); +} + +void i915_buddy_free(struct i915_buddy_mm *mm, + struct i915_buddy_block *block) +{ + GEM_BUG_ON(!i915_buddy_block_is_allocated(block)); + __i915_buddy_free(mm, block); +} + +void i915_buddy_free_list(struct i915_buddy_mm *mm, struct list_head *objects) +{ + struct i915_buddy_block *block, *on; + + list_for_each_entry_safe(block, on, objects, link) { + i915_buddy_free(mm, block); + cond_resched(); + } + INIT_LIST_HEAD(objects); +} + +/* + * Allocate power-of-two block. The order value here translates to: + * + * 0 = 2^0 * mm->chunk_size + * 1 = 2^1 * mm->chunk_size + * 2 = 2^2 * mm->chunk_size + * ... + */ +struct i915_buddy_block * +i915_buddy_alloc(struct i915_buddy_mm *mm, unsigned int order) +{ + struct i915_buddy_block *block = NULL; + unsigned int i; + int err; + + for (i = order; i <= mm->max_order; ++i) { + block = list_first_entry_or_null(&mm->free_list[i], + struct i915_buddy_block, + link); + if (block) + break; + } + + if (!block) + return ERR_PTR(-ENOSPC); + + GEM_BUG_ON(!i915_buddy_block_is_free(block)); + + while (i != order) { + err = split_block(mm, block); + if (unlikely(err)) + goto out_free; + + /* Go low */ + block = block->left; + i--; + } + + mark_allocated(block); + kmemleak_update_trace(block); + return block; + +out_free: + if (i != order) + __i915_buddy_free(mm, block); + return ERR_PTR(err); +} + +static inline bool overlaps(u64 s1, u64 e1, u64 s2, u64 e2) +{ + return s1 <= e2 && e1 >= s2; +} + +static inline bool contains(u64 s1, u64 e1, u64 s2, u64 e2) +{ + return s1 <= s2 && e1 >= e2; +} + +/* + * Allocate range. Note that it's safe to chain together multiple alloc_ranges + * with the same blocks list. + * + * Intended for pre-allocating portions of the address space, for example to + * reserve a block for the initial framebuffer or similar, hence the expectation + * here is that i915_buddy_alloc() is still the main vehicle for + * allocations, so if that's not the case then the drm_mm range allocator is + * probably a much better fit, and so you should probably go use that instead. + */ +int i915_buddy_alloc_range(struct i915_buddy_mm *mm, + struct list_head *blocks, + u64 start, u64 size) +{ + struct i915_buddy_block *block; + struct i915_buddy_block *buddy; + LIST_HEAD(allocated); + LIST_HEAD(dfs); + u64 end; + int err; + int i; + + if (size < mm->chunk_size) + return -EINVAL; + + if (!IS_ALIGNED(size | start, mm->chunk_size)) + return -EINVAL; + + if (range_overflows(start, size, mm->size)) + return -EINVAL; + + for (i = 0; i < mm->n_roots; ++i) + list_add_tail(&mm->roots[i]->tmp_link, &dfs); + + end = start + size - 1; + + do { + u64 block_start; + u64 block_end; + + block = list_first_entry_or_null(&dfs, + struct i915_buddy_block, + tmp_link); + if (!block) + break; + + list_del(&block->tmp_link); + + block_start = i915_buddy_block_offset(block); + block_end = block_start + i915_buddy_block_size(mm, block) - 1; + + if (!overlaps(start, end, block_start, block_end)) + continue; + + if (i915_buddy_block_is_allocated(block)) { + err = -ENOSPC; + goto err_free; + } + + if (contains(start, end, block_start, block_end)) { + if (!i915_buddy_block_is_free(block)) { + err = -ENOSPC; + goto err_free; + } + + mark_allocated(block); + list_add_tail(&block->link, &allocated); + continue; + } + + if (!i915_buddy_block_is_split(block)) { + err = split_block(mm, block); + if (unlikely(err)) + goto err_undo; + } + + list_add(&block->right->tmp_link, &dfs); + list_add(&block->left->tmp_link, &dfs); + } while (1); + + list_splice_tail(&allocated, blocks); + return 0; + +err_undo: + /* + * We really don't want to leave around a bunch of split blocks, since + * bigger is better, so make sure we merge everything back before we + * free the allocated blocks. + */ + buddy = get_buddy(block); + if (buddy && + (i915_buddy_block_is_free(block) && + i915_buddy_block_is_free(buddy))) + __i915_buddy_free(mm, block); + +err_free: + i915_buddy_free_list(mm, &allocated); + return err; +} + +#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) +#include "selftests/i915_buddy.c" +#endif + +void i915_buddy_module_exit(void) +{ + kmem_cache_destroy(slab_blocks); +} + +int __init i915_buddy_module_init(void) +{ + slab_blocks = KMEM_CACHE(i915_buddy_block, 0); + if (!slab_blocks) + return -ENOMEM; + + return 0; +} diff --git a/drivers/gpu/drm/i915/i915_buddy.h b/drivers/gpu/drm/i915/i915_buddy.h new file mode 100644 index 000000000000..3940d632f208 --- /dev/null +++ b/drivers/gpu/drm/i915/i915_buddy.h @@ -0,0 +1,135 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2021 Intel Corporation + */ + +#ifndef __I915_BUDDY_H__ +#define __I915_BUDDY_H__ + +#include <linux/bitops.h> +#include <linux/list.h> +#include <linux/slab.h> + +struct i915_buddy_block { +#define I915_BUDDY_HEADER_OFFSET GENMASK_ULL(63, 12) +#define I915_BUDDY_HEADER_STATE GENMASK_ULL(11, 10) +#define I915_BUDDY_ALLOCATED (1 << 10) +#define I915_BUDDY_FREE (2 << 10) +#define I915_BUDDY_SPLIT (3 << 10) +/* Free to be used, if needed in the future */ +#define I915_BUDDY_HEADER_UNUSED GENMASK_ULL(9, 6) +#define I915_BUDDY_HEADER_ORDER GENMASK_ULL(5, 0) + u64 header; + + struct i915_buddy_block *left; + struct i915_buddy_block *right; + struct i915_buddy_block *parent; + + void *private; /* owned by creator */ + + /* + * While the block is allocated by the user through i915_buddy_alloc*, + * the user has ownership of the link, for example to maintain within + * a list, if so desired. As soon as the block is freed with + * i915_buddy_free* ownership is given back to the mm. + */ + struct list_head link; + struct list_head tmp_link; +}; + +/* Order-zero must be at least PAGE_SIZE */ +#define I915_BUDDY_MAX_ORDER (63 - PAGE_SHIFT) + +/* + * Binary Buddy System. + * + * Locking should be handled by the user, a simple mutex around + * i915_buddy_alloc* and i915_buddy_free* should suffice. + */ +struct i915_buddy_mm { + /* Maintain a free list for each order. */ + struct list_head *free_list; + + /* + * Maintain explicit binary tree(s) to track the allocation of the + * address space. This gives us a simple way of finding a buddy block + * and performing the potentially recursive merge step when freeing a + * block. Nodes are either allocated or free, in which case they will + * also exist on the respective free list. + */ + struct i915_buddy_block **roots; + + /* + * Anything from here is public, and remains static for the lifetime of + * the mm. Everything above is considered do-not-touch. + */ + unsigned int n_roots; + unsigned int max_order; + + /* Must be at least PAGE_SIZE */ + u64 chunk_size; + u64 size; +}; + +static inline u64 +i915_buddy_block_offset(struct i915_buddy_block *block) +{ + return block->header & I915_BUDDY_HEADER_OFFSET; +} + +static inline unsigned int +i915_buddy_block_order(struct i915_buddy_block *block) +{ + return block->header & I915_BUDDY_HEADER_ORDER; +} + +static inline unsigned int +i915_buddy_block_state(struct i915_buddy_block *block) +{ + return block->header & I915_BUDDY_HEADER_STATE; +} + +static inline bool +i915_buddy_block_is_allocated(struct i915_buddy_block *block) +{ + return i915_buddy_block_state(block) == I915_BUDDY_ALLOCATED; +} + +static inline bool +i915_buddy_block_is_free(struct i915_buddy_block *block) +{ + return i915_buddy_block_state(block) == I915_BUDDY_FREE; +} + +static inline bool +i915_buddy_block_is_split(struct i915_buddy_block *block) +{ + return i915_buddy_block_state(block) == I915_BUDDY_SPLIT; +} + +static inline u64 +i915_buddy_block_size(struct i915_buddy_mm *mm, + struct i915_buddy_block *block) +{ + return mm->chunk_size << i915_buddy_block_order(block); +} + +int i915_buddy_init(struct i915_buddy_mm *mm, u64 size, u64 chunk_size); + +void i915_buddy_fini(struct i915_buddy_mm *mm); + +struct i915_buddy_block * +i915_buddy_alloc(struct i915_buddy_mm *mm, unsigned int order); + +int i915_buddy_alloc_range(struct i915_buddy_mm *mm, + struct list_head *blocks, + u64 start, u64 size); + +void i915_buddy_free(struct i915_buddy_mm *mm, struct i915_buddy_block *block); + +void i915_buddy_free_list(struct i915_buddy_mm *mm, struct list_head *objects); + +void i915_buddy_module_exit(void); +int i915_buddy_module_init(void); + +#endif diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c index a3b4d99d64b9..e0403ce9ce69 100644 --- a/drivers/gpu/drm/i915/i915_cmd_parser.c +++ b/drivers/gpu/drm/i915/i915_cmd_parser.c @@ -1468,42 +1468,43 @@ int intel_engine_cmd_parser(struct intel_engine_cs *engine, * space. Parsing should be faster in some cases this way. */ batch_end = cmd + batch_length / sizeof(*batch_end); - while (*cmd != MI_BATCH_BUFFER_END) { - u32 length = 1; - - if (*cmd != MI_NOOP) { /* MI_NOOP == 0 */ - desc = find_cmd(engine, *cmd, desc, &default_desc); - if (!desc) { - DRM_DEBUG("CMD: Unrecognized command: 0x%08X\n", *cmd); - ret = -EINVAL; - break; - } + do { + u32 length; - if (desc->flags & CMD_DESC_FIXED) - length = desc->length.fixed; - else - length = (*cmd & desc->length.mask) + LENGTH_BIAS; + if (*cmd == MI_BATCH_BUFFER_END) + break; - if ((batch_end - cmd) < length) { - DRM_DEBUG("CMD: Command length exceeds batch length: 0x%08X length=%u batchlen=%td\n", - *cmd, - length, - batch_end - cmd); - ret = -EINVAL; - break; - } + desc = find_cmd(engine, *cmd, desc, &default_desc); + if (!desc) { + DRM_DEBUG("CMD: Unrecognized command: 0x%08X\n", *cmd); + ret = -EINVAL; + break; + } - if (!check_cmd(engine, desc, cmd, length)) { - ret = -EACCES; - break; - } + if (desc->flags & CMD_DESC_FIXED) + length = desc->length.fixed; + else + length = (*cmd & desc->length.mask) + LENGTH_BIAS; - if (cmd_desc_is(desc, MI_BATCH_BUFFER_START)) { - ret = check_bbstart(cmd, offset, length, batch_length, - batch_addr, shadow_addr, - jump_whitelist); - break; - } + if ((batch_end - cmd) < length) { + DRM_DEBUG("CMD: Command length exceeds batch length: 0x%08X length=%u batchlen=%td\n", + *cmd, + length, + batch_end - cmd); + ret = -EINVAL; + break; + } + + if (!check_cmd(engine, desc, cmd, length)) { + ret = -EACCES; + break; + } + + if (cmd_desc_is(desc, MI_BATCH_BUFFER_START)) { + ret = check_bbstart(cmd, offset, length, batch_length, + batch_addr, shadow_addr, + jump_whitelist); + break; } if (!IS_ERR_OR_NULL(jump_whitelist)) @@ -1516,7 +1517,7 @@ int intel_engine_cmd_parser(struct intel_engine_cs *engine, ret = -EINVAL; break; } - } + } while (1); if (trampoline) { /* diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index cc745751ac53..44969f5dde50 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -538,20 +538,20 @@ static int i915_frequency_info(struct seq_file *m, void *unused) max_freq = (IS_GEN9_LP(dev_priv) ? rp_state_cap >> 0 : rp_state_cap >> 16) & 0xff; max_freq *= (IS_GEN9_BC(dev_priv) || - GRAPHICS_VER(dev_priv) >= 10 ? GEN9_FREQ_SCALER : 1); + GRAPHICS_VER(dev_priv) >= 11 ? GEN9_FREQ_SCALER : 1); seq_printf(m, "Lowest (RPN) frequency: %dMHz\n", intel_gpu_freq(rps, max_freq)); max_freq = (rp_state_cap & 0xff00) >> 8; max_freq *= (IS_GEN9_BC(dev_priv) || - GRAPHICS_VER(dev_priv) >= 10 ? GEN9_FREQ_SCALER : 1); + GRAPHICS_VER(dev_priv) >= 11 ? GEN9_FREQ_SCALER : 1); seq_printf(m, "Nominal (RP1) frequency: %dMHz\n", intel_gpu_freq(rps, max_freq)); max_freq = (IS_GEN9_LP(dev_priv) ? rp_state_cap >> 16 : rp_state_cap >> 0) & 0xff; max_freq *= (IS_GEN9_BC(dev_priv) || - GRAPHICS_VER(dev_priv) >= 10 ? GEN9_FREQ_SCALER : 1); + GRAPHICS_VER(dev_priv) >= 11 ? GEN9_FREQ_SCALER : 1); seq_printf(m, "Max non-overclocked (RP0) frequency: %dMHz\n", intel_gpu_freq(rps, max_freq)); seq_printf(m, "Max overclocked frequency: %dMHz\n", @@ -636,7 +636,7 @@ static int i915_swizzle_info(struct seq_file *m, void *data) intel_uncore_read16(uncore, C0DRB3_BW)); seq_printf(m, "C1DRB3 = 0x%04x\n", intel_uncore_read16(uncore, C1DRB3_BW)); - } else if (INTEL_GEN(dev_priv) >= 6) { + } else if (GRAPHICS_VER(dev_priv) >= 6) { seq_printf(m, "MAD_DIMM_C0 = 0x%08x\n", intel_uncore_read(uncore, MAD_DIMM_C0)); seq_printf(m, "MAD_DIMM_C1 = 0x%08x\n", diff --git a/drivers/gpu/drm/i915/i915_debugfs_params.c b/drivers/gpu/drm/i915/i915_debugfs_params.c index 4e2b077692cb..20424275d41e 100644 --- a/drivers/gpu/drm/i915/i915_debugfs_params.c +++ b/drivers/gpu/drm/i915/i915_debugfs_params.c @@ -6,9 +6,21 @@ #include <linux/kernel.h> #include "i915_debugfs_params.h" +#include "gt/intel_gt.h" +#include "gt/uc/intel_guc.h" #include "i915_drv.h" #include "i915_params.h" +#define MATCH_DEBUGFS_NODE_NAME(_file, _name) \ + (strcmp((_file)->f_path.dentry->d_name.name, (_name)) == 0) + +#define GET_I915(i915, name, ptr) \ + do { \ + struct i915_params *params; \ + params = container_of(((void *)(ptr)), typeof(*params), name); \ + (i915) = container_of(params, typeof(*(i915)), params); \ + } while (0) + /* int param */ static int i915_param_int_show(struct seq_file *m, void *data) { @@ -24,6 +36,16 @@ static int i915_param_int_open(struct inode *inode, struct file *file) return single_open(file, i915_param_int_show, inode->i_private); } +static int notify_guc(struct drm_i915_private *i915) +{ + int ret = 0; + + if (intel_uc_uses_guc_submission(&i915->gt.uc)) + ret = intel_guc_global_policies_update(&i915->gt.uc.guc); + + return ret; +} + static ssize_t i915_param_int_write(struct file *file, const char __user *ubuf, size_t len, loff_t *offp) @@ -81,8 +103,10 @@ static ssize_t i915_param_uint_write(struct file *file, const char __user *ubuf, size_t len, loff_t *offp) { + struct drm_i915_private *i915; struct seq_file *m = file->private_data; unsigned int *value = m->private; + unsigned int old = *value; int ret; ret = kstrtouint_from_user(ubuf, len, 0, value); @@ -95,6 +119,14 @@ static ssize_t i915_param_uint_write(struct file *file, *value = b; } + if (!ret && MATCH_DEBUGFS_NODE_NAME(file, "reset")) { + GET_I915(i915, reset, value); + + ret = notify_guc(i915); + if (ret) + *value = old; + } + return ret ?: len; } diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 73de45472f60..59fb4c710c8c 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -271,10 +271,11 @@ static void intel_detect_preproduction_hw(struct drm_i915_private *dev_priv) bool pre = false; pre |= IS_HSW_EARLY_SDV(dev_priv); - pre |= IS_SKL_REVID(dev_priv, 0, SKL_REVID_F0); - pre |= IS_BXT_REVID(dev_priv, 0, BXT_REVID_B_LAST); - pre |= IS_KBL_GT_STEP(dev_priv, 0, STEP_A0); - pre |= IS_GLK_REVID(dev_priv, 0, GLK_REVID_A2); + pre |= IS_SKYLAKE(dev_priv) && INTEL_REVID(dev_priv) < 0x6; + pre |= IS_BROXTON(dev_priv) && INTEL_REVID(dev_priv) < 0xA; + pre |= IS_KABYLAKE(dev_priv) && INTEL_REVID(dev_priv) < 0x1; + pre |= IS_GEMINILAKE(dev_priv) && INTEL_REVID(dev_priv) < 0x3; + pre |= IS_ICELAKE(dev_priv) && INTEL_REVID(dev_priv) < 0x7; if (pre) { drm_err(&dev_priv->drm, "This is a pre-production stepping. " @@ -561,7 +562,7 @@ static int i915_driver_hw_probe(struct drm_i915_private *dev_priv) if (ret) goto err_perf; - ret = drm_aperture_remove_conflicting_pci_framebuffers(pdev, "inteldrmfb"); + ret = drm_aperture_remove_conflicting_pci_framebuffers(pdev, dev_priv->drm.driver); if (ret) goto err_ggtt; @@ -619,7 +620,9 @@ static int i915_driver_hw_probe(struct drm_i915_private *dev_priv) intel_opregion_setup(dev_priv); - intel_pcode_init(dev_priv); + ret = intel_pcode_init(dev_priv); + if (ret) + goto err_msi; /* * Fill the dram structure to get the system dram info. This will be @@ -1230,6 +1233,10 @@ static int i915_drm_resume(struct drm_device *dev) disable_rpm_wakeref_asserts(&dev_priv->runtime_pm); + ret = intel_pcode_init(dev_priv); + if (ret) + return ret; + sanitize_gpu(dev_priv); ret = i915_ggtt_enable_hw(dev_priv); diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index b30397b04529..005b1cec7007 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -202,6 +202,68 @@ struct drm_i915_file_private { struct rcu_head rcu; }; + /** @proto_context_lock: Guards all struct i915_gem_proto_context + * operations + * + * This not only guards @proto_context_xa, but is always held + * whenever we manipulate any struct i915_gem_proto_context, + * including finalizing it on first actual use of the GEM context. + * + * See i915_gem_proto_context. + */ + struct mutex proto_context_lock; + + /** @proto_context_xa: xarray of struct i915_gem_proto_context + * + * Historically, the context uAPI allowed for two methods of + * setting context parameters: SET_CONTEXT_PARAM and + * CONTEXT_CREATE_EXT_SETPARAM. The former is allowed to be called + * at any time while the later happens as part of + * GEM_CONTEXT_CREATE. Everything settable via one was settable + * via the other. While some params are fairly simple and setting + * them on a live context is harmless such as the context priority, + * others are far trickier such as the VM or the set of engines. + * In order to swap out the VM, for instance, we have to delay + * until all current in-flight work is complete, swap in the new + * VM, and then continue. This leads to a plethora of potential + * race conditions we'd really rather avoid. + * + * We have since disallowed setting these more complex parameters + * on active contexts. This works by delaying the creation of the + * actual context until after the client is done configuring it + * with SET_CONTEXT_PARAM. From the perspective of the client, it + * has the same u32 context ID the whole time. From the + * perspective of i915, however, it's a struct i915_gem_proto_context + * right up until the point where we attempt to do something which + * the proto-context can't handle. Then the struct i915_gem_context + * gets created. + * + * This is accomplished via a little xarray dance. When + * GEM_CONTEXT_CREATE is called, we create a struct + * i915_gem_proto_context, reserve a slot in @context_xa but leave + * it NULL, and place the proto-context in the corresponding slot + * in @proto_context_xa. Then, in i915_gem_context_lookup(), we + * first check @context_xa. If it's there, we return the struct + * i915_gem_context and we're done. If it's not, we look in + * @proto_context_xa and, if we find it there, we create the actual + * context and kill the proto-context. + * + * In order for this dance to work properly, everything which ever + * touches a struct i915_gem_proto_context is guarded by + * @proto_context_lock, including context creation. Yes, this + * means context creation now takes a giant global lock but it + * can't really be helped and that should never be on any driver's + * fast-path anyway. + */ + struct xarray proto_context_xa; + + /** @context_xa: xarray of fully created i915_gem_context + * + * Write access to this xarray is guarded by @proto_context_lock. + * Otherwise, writers may race with finalize_create_context_locked(). + * + * See @proto_context_xa. + */ struct xarray context_xa; struct xarray vm_xa; @@ -270,8 +332,10 @@ struct drm_i915_display_funcs { int (*bw_calc_min_cdclk)(struct intel_atomic_state *state); int (*get_fifo_size)(struct drm_i915_private *dev_priv, enum i9xx_plane_id i9xx_plane); - int (*compute_pipe_wm)(struct intel_crtc_state *crtc_state); - int (*compute_intermediate_wm)(struct intel_crtc_state *crtc_state); + int (*compute_pipe_wm)(struct intel_atomic_state *state, + struct intel_crtc *crtc); + int (*compute_intermediate_wm)(struct intel_atomic_state *state, + struct intel_crtc *crtc); void (*initial_watermarks)(struct intel_atomic_state *state, struct intel_crtc *crtc); void (*atomic_update_watermarks)(struct intel_atomic_state *state, @@ -330,15 +394,6 @@ struct drm_i915_display_funcs { void (*read_luts)(struct intel_crtc_state *crtc_state); }; -enum i915_cache_level { - I915_CACHE_NONE = 0, - I915_CACHE_LLC, /* also used for snoopable memory on non-LLC */ - I915_CACHE_L3_LLC, /* gen7+, L3 sits between the domain specifc - caches, eg sampler/render caches, and the - large Last-Level-Cache. LLC is coherent with - the CPU, but L3 is only visible to the GPU. */ - I915_CACHE_WT, /* hsw:gt3e WriteThrough for scanouts */ -}; #define I915_COLOR_UNEVICTABLE (-1) /* a non-vma sharing the address space */ @@ -346,13 +401,14 @@ struct intel_fbc { /* This is always the inner lock when overlapping with struct_mutex and * it's the outer lock when overlapping with stolen_lock. */ struct mutex lock; - unsigned threshold; unsigned int possible_framebuffer_bits; unsigned int busy_bits; struct intel_crtc *crtc; struct drm_mm_node compressed_fb; - struct drm_mm_node *compressed_llb; + struct drm_mm_node compressed_llb; + + u8 limit; bool false_color; @@ -467,6 +523,7 @@ struct i915_drrs { #define QUIRK_PIN_SWIZZLED_PAGES (1<<5) #define QUIRK_INCREASE_T12_DELAY (1<<6) #define QUIRK_INCREASE_DDI_DISABLED_TIME (1<<7) +#define QUIRK_NO_PPS_BACKLIGHT_POWER_HOOK (1<<8) struct intel_fbdev; struct intel_fbc_work; @@ -552,7 +609,7 @@ struct i915_gem_mm { * notifier_lock for mmu notifiers, memory may not be allocated * while holding this lock. */ - spinlock_t notifier_lock; + rwlock_t notifier_lock; #endif /* shrinker accounting, also useful for userland debugging */ @@ -576,6 +633,9 @@ i915_fence_timeout(const struct drm_i915_private *i915) #define HAS_HW_SAGV_WM(i915) (DISPLAY_VER(i915) >= 13 && !IS_DGFX(i915)) +/* Amount of PSF GV points, BSpec precisely defines this */ +#define I915_NUM_PSF_GV_POINTS 3 + struct ddi_vbt_port_info { /* Non-NULL if port present. */ struct intel_bios_encoder_data *devdata; @@ -1089,12 +1149,16 @@ struct drm_i915_private { INTEL_DRAM_LPDDR5, } type; u8 num_qgv_points; + u8 num_psf_gv_points; } dram_info; struct intel_bw_info { /* for each QGV point */ unsigned int deratedbw[I915_NUM_QGV_POINTS]; + /* for each PSF GV point */ + unsigned int psf_bw[I915_NUM_PSF_GV_POINTS]; u8 num_qgv_points; + u8 num_psf_gv_points; u8 num_planes; } max_bw[6]; @@ -1134,6 +1198,8 @@ struct drm_i915_private { /* For i915gm/i945gm vblank irq workaround */ u8 vblank_enabled; + bool irq_enabled; + /* perform PHY state sanity checks? */ bool chv_phy_assert[2]; @@ -1237,26 +1303,17 @@ static inline struct drm_i915_private *pdev_to_i915(struct pci_dev *pdev) #define INTEL_DEVID(dev_priv) (RUNTIME_INFO(dev_priv)->device_id) -/* - * Deprecated: this will be replaced by individual IP checks: - * GRAPHICS_VER(), MEDIA_VER() and DISPLAY_VER() - */ -#define INTEL_GEN(dev_priv) GRAPHICS_VER(dev_priv) -/* - * Deprecated: use IS_GRAPHICS_VER(), IS_MEDIA_VER() and IS_DISPLAY_VER() as - * appropriate. - */ -#define IS_GEN_RANGE(dev_priv, s, e) IS_GRAPHICS_VER(dev_priv, (s), (e)) -/* - * Deprecated: use GRAPHICS_VER(), MEDIA_VER() and DISPLAY_VER() as appropriate. - */ -#define IS_GEN(dev_priv, n) (GRAPHICS_VER(dev_priv) == (n)) +#define IP_VER(ver, rel) ((ver) << 8 | (rel)) #define GRAPHICS_VER(i915) (INTEL_INFO(i915)->graphics_ver) +#define GRAPHICS_VER_FULL(i915) IP_VER(INTEL_INFO(i915)->graphics_ver, \ + INTEL_INFO(i915)->graphics_rel) #define IS_GRAPHICS_VER(i915, from, until) \ (GRAPHICS_VER(i915) >= (from) && GRAPHICS_VER(i915) <= (until)) #define MEDIA_VER(i915) (INTEL_INFO(i915)->media_ver) +#define MEDIA_VER_FULL(i915) IP_VER(INTEL_INFO(i915)->media_ver, \ + INTEL_INFO(i915)->media_rel) #define IS_MEDIA_VER(i915, from, until) \ (MEDIA_VER(i915) >= (from) && MEDIA_VER(i915) <= (until)) @@ -1264,29 +1321,20 @@ static inline struct drm_i915_private *pdev_to_i915(struct pci_dev *pdev) #define IS_DISPLAY_VER(i915, from, until) \ (DISPLAY_VER(i915) >= (from) && DISPLAY_VER(i915) <= (until)) -#define REVID_FOREVER 0xff #define INTEL_REVID(dev_priv) (to_pci_dev((dev_priv)->drm.dev)->revision) #define HAS_DSB(dev_priv) (INTEL_INFO(dev_priv)->display.has_dsb) -/* - * Return true if revision is in range [since,until] inclusive. - * - * Use 0 for open-ended since, and REVID_FOREVER for open-ended until. - */ -#define IS_REVID(p, since, until) \ - (INTEL_REVID(p) >= (since) && INTEL_REVID(p) <= (until)) - #define INTEL_DISPLAY_STEP(__i915) (RUNTIME_INFO(__i915)->step.display_step) #define INTEL_GT_STEP(__i915) (RUNTIME_INFO(__i915)->step.gt_step) #define IS_DISPLAY_STEP(__i915, since, until) \ (drm_WARN_ON(&(__i915)->drm, INTEL_DISPLAY_STEP(__i915) == STEP_NONE), \ - INTEL_DISPLAY_STEP(__i915) >= (since) && INTEL_DISPLAY_STEP(__i915) <= (until)) + INTEL_DISPLAY_STEP(__i915) >= (since) && INTEL_DISPLAY_STEP(__i915) < (until)) #define IS_GT_STEP(__i915, since, until) \ (drm_WARN_ON(&(__i915)->drm, INTEL_GT_STEP(__i915) == STEP_NONE), \ - INTEL_GT_STEP(__i915) >= (since) && INTEL_GT_STEP(__i915) <= (until)) + INTEL_GT_STEP(__i915) >= (since) && INTEL_GT_STEP(__i915) < (until)) static __always_inline unsigned int __platform_mask_index(const struct intel_runtime_info *info, @@ -1385,7 +1433,7 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915, #define IS_GEMINILAKE(dev_priv) IS_PLATFORM(dev_priv, INTEL_GEMINILAKE) #define IS_COFFEELAKE(dev_priv) IS_PLATFORM(dev_priv, INTEL_COFFEELAKE) #define IS_COMETLAKE(dev_priv) IS_PLATFORM(dev_priv, INTEL_COMETLAKE) -#define IS_CANNONLAKE(dev_priv) IS_PLATFORM(dev_priv, INTEL_CANNONLAKE) +#define IS_CANNONLAKE(dev_priv) 0 #define IS_ICELAKE(dev_priv) IS_PLATFORM(dev_priv, INTEL_ICELAKE) #define IS_JSL_EHL(dev_priv) (IS_PLATFORM(dev_priv, INTEL_JASPERLAKE) || \ IS_PLATFORM(dev_priv, INTEL_ELKHARTLAKE)) @@ -1394,6 +1442,12 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915, #define IS_DG1(dev_priv) IS_PLATFORM(dev_priv, INTEL_DG1) #define IS_ALDERLAKE_S(dev_priv) IS_PLATFORM(dev_priv, INTEL_ALDERLAKE_S) #define IS_ALDERLAKE_P(dev_priv) IS_PLATFORM(dev_priv, INTEL_ALDERLAKE_P) +#define IS_XEHPSDV(dev_priv) IS_PLATFORM(dev_priv, INTEL_XEHPSDV) +#define IS_DG2(dev_priv) IS_PLATFORM(dev_priv, INTEL_DG2) +#define IS_DG2_G10(dev_priv) \ + IS_SUBPLATFORM(dev_priv, INTEL_DG2, INTEL_SUBPLATFORM_G10) +#define IS_DG2_G11(dev_priv) \ + IS_SUBPLATFORM(dev_priv, INTEL_DG2, INTEL_SUBPLATFORM_G11) #define IS_HSW_EARLY_SDV(dev_priv) (IS_HASWELL(dev_priv) && \ (INTEL_DEVID(dev_priv) & 0xFF00) == 0x0C00) #define IS_BDW_ULT(dev_priv) \ @@ -1445,8 +1499,6 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915, #define IS_CML_GT2(dev_priv) (IS_COMETLAKE(dev_priv) && \ INTEL_INFO(dev_priv)->gt == 2) -#define IS_CNL_WITH_PORT_F(dev_priv) \ - IS_SUBPLATFORM(dev_priv, INTEL_CANNONLAKE, INTEL_SUBPLATFORM_PORTF) #define IS_ICL_WITH_PORT_F(dev_priv) \ IS_SUBPLATFORM(dev_priv, INTEL_ICELAKE, INTEL_SUBPLATFORM_PORTF) @@ -1456,60 +1508,17 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915, #define IS_TGL_Y(dev_priv) \ IS_SUBPLATFORM(dev_priv, INTEL_TIGERLAKE, INTEL_SUBPLATFORM_ULX) -#define SKL_REVID_A0 0x0 -#define SKL_REVID_B0 0x1 -#define SKL_REVID_C0 0x2 -#define SKL_REVID_D0 0x3 -#define SKL_REVID_E0 0x4 -#define SKL_REVID_F0 0x5 -#define SKL_REVID_G0 0x6 -#define SKL_REVID_H0 0x7 - -#define IS_SKL_REVID(p, since, until) (IS_SKYLAKE(p) && IS_REVID(p, since, until)) - -#define BXT_REVID_A0 0x0 -#define BXT_REVID_A1 0x1 -#define BXT_REVID_B0 0x3 -#define BXT_REVID_B_LAST 0x8 -#define BXT_REVID_C0 0x9 - -#define IS_BXT_REVID(dev_priv, since, until) \ - (IS_BROXTON(dev_priv) && IS_REVID(dev_priv, since, until)) +#define IS_SKL_GT_STEP(p, since, until) (IS_SKYLAKE(p) && IS_GT_STEP(p, since, until)) #define IS_KBL_GT_STEP(dev_priv, since, until) \ (IS_KABYLAKE(dev_priv) && IS_GT_STEP(dev_priv, since, until)) #define IS_KBL_DISPLAY_STEP(dev_priv, since, until) \ (IS_KABYLAKE(dev_priv) && IS_DISPLAY_STEP(dev_priv, since, until)) -#define GLK_REVID_A0 0x0 -#define GLK_REVID_A1 0x1 -#define GLK_REVID_A2 0x2 -#define GLK_REVID_B0 0x3 - -#define IS_GLK_REVID(dev_priv, since, until) \ - (IS_GEMINILAKE(dev_priv) && IS_REVID(dev_priv, since, until)) - -#define CNL_REVID_A0 0x0 -#define CNL_REVID_B0 0x1 -#define CNL_REVID_C0 0x2 - -#define IS_CNL_REVID(p, since, until) \ - (IS_CANNONLAKE(p) && IS_REVID(p, since, until)) - -#define ICL_REVID_A0 0x0 -#define ICL_REVID_A2 0x1 -#define ICL_REVID_B0 0x3 -#define ICL_REVID_B2 0x4 -#define ICL_REVID_C0 0x5 - -#define IS_ICL_REVID(p, since, until) \ - (IS_ICELAKE(p) && IS_REVID(p, since, until)) - -#define EHL_REVID_A0 0x0 -#define EHL_REVID_B0 0x1 - -#define IS_JSL_EHL_REVID(p, since, until) \ - (IS_JSL_EHL(p) && IS_REVID(p, since, until)) +#define IS_JSL_EHL_GT_STEP(p, since, until) \ + (IS_JSL_EHL(p) && IS_GT_STEP(p, since, until)) +#define IS_JSL_EHL_DISPLAY_STEP(p, since, until) \ + (IS_JSL_EHL(p) && IS_DISPLAY_STEP(p, since, until)) #define IS_TGL_DISPLAY_STEP(__i915, since, until) \ (IS_TIGERLAKE(__i915) && \ @@ -1523,18 +1532,13 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915, (IS_TIGERLAKE(__i915) && !(IS_TGL_U(__i915) || IS_TGL_Y(__i915)) && \ IS_GT_STEP(__i915, since, until)) -#define RKL_REVID_A0 0x0 -#define RKL_REVID_B0 0x1 -#define RKL_REVID_C0 0x4 +#define IS_RKL_DISPLAY_STEP(p, since, until) \ + (IS_ROCKETLAKE(p) && IS_DISPLAY_STEP(p, since, until)) -#define IS_RKL_REVID(p, since, until) \ - (IS_ROCKETLAKE(p) && IS_REVID(p, since, until)) - -#define DG1_REVID_A0 0x0 -#define DG1_REVID_B0 0x1 - -#define IS_DG1_REVID(p, since, until) \ - (IS_DG1(p) && IS_REVID(p, since, until)) +#define IS_DG1_GT_STEP(p, since, until) \ + (IS_DG1(p) && IS_GT_STEP(p, since, until)) +#define IS_DG1_DISPLAY_STEP(p, since, until) \ + (IS_DG1(p) && IS_DISPLAY_STEP(p, since, until)) #define IS_ADLS_DISPLAY_STEP(__i915, since, until) \ (IS_ALDERLAKE_S(__i915) && \ @@ -1552,6 +1556,31 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915, (IS_ALDERLAKE_P(__i915) && \ IS_GT_STEP(__i915, since, until)) +#define IS_XEHPSDV_GT_STEP(__i915, since, until) \ + (IS_XEHPSDV(__i915) && IS_GT_STEP(__i915, since, until)) + +/* + * DG2 hardware steppings are a bit unusual. The hardware design was forked + * to create two variants (G10 and G11) which have distinct workaround sets. + * The G11 fork of the DG2 design resets the GT stepping back to "A0" for its + * first iteration, even though it's more similar to a G10 B0 stepping in terms + * of functionality and workarounds. However the display stepping does not + * reset in the same manner --- a specific stepping like "B0" has a consistent + * meaning regardless of whether it belongs to a G10 or G11 DG2. + * + * TLDR: All GT workarounds and stepping-specific logic must be applied in + * relation to a specific subplatform (G10 or G11), whereas display workarounds + * and stepping-specific logic will be applied with a general DG2-wide stepping + * number. + */ +#define IS_DG2_GT_STEP(__i915, variant, since, until) \ + (IS_SUBPLATFORM(__i915, INTEL_DG2, INTEL_SUBPLATFORM_##variant) && \ + IS_GT_STEP(__i915, since, until)) + +#define IS_DG2_DISP_STEP(__i915, since, until) \ + (IS_DG2(__i915) && \ + IS_DISPLAY_STEP(__i915, since, until)) + #define IS_LP(dev_priv) (INTEL_INFO(dev_priv)->is_lp) #define IS_GEN9_LP(dev_priv) (GRAPHICS_VER(dev_priv) == 9 && IS_LP(dev_priv)) #define IS_GEN9_BC(dev_priv) (GRAPHICS_VER(dev_priv) == 9 && !IS_LP(dev_priv)) @@ -1589,8 +1618,6 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915, #define HAS_LOGICAL_RING_ELSQ(dev_priv) \ (INTEL_INFO(dev_priv)->has_logical_ring_elsq) -#define HAS_MASTER_UNIT_IRQ(dev_priv) (INTEL_INFO(dev_priv)->has_master_unit_irq) - #define HAS_EXECLISTS(dev_priv) HAS_LOGICAL_RING_CONTEXTS(dev_priv) #define INTEL_PPGTT(dev_priv) (INTEL_INFO(dev_priv)->ppgtt_type) @@ -1616,12 +1643,10 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915, /* WaRsDisableCoarsePowerGating:skl,cnl */ #define NEEDS_WaRsDisableCoarsePowerGating(dev_priv) \ - (IS_CANNONLAKE(dev_priv) || \ - IS_SKL_GT3(dev_priv) || \ - IS_SKL_GT4(dev_priv)) + (IS_SKL_GT3(dev_priv) || IS_SKL_GT4(dev_priv)) #define HAS_GMBUS_IRQ(dev_priv) (GRAPHICS_VER(dev_priv) >= 4) -#define HAS_GMBUS_BURST_READ(dev_priv) (GRAPHICS_VER(dev_priv) >= 10 || \ +#define HAS_GMBUS_BURST_READ(dev_priv) (GRAPHICS_VER(dev_priv) >= 11 || \ IS_GEMINILAKE(dev_priv) || \ IS_KABYLAKE(dev_priv)) @@ -1641,6 +1666,7 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915, #define HAS_DP_MST(dev_priv) (INTEL_INFO(dev_priv)->display.has_dp_mst) +#define HAS_CDCLK_CRAWL(dev_priv) (INTEL_INFO(dev_priv)->display.has_cdclk_crawl) #define HAS_DDI(dev_priv) (INTEL_INFO(dev_priv)->display.has_ddi) #define HAS_FPGA_DBG_UNCLAIMED(dev_priv) (INTEL_INFO(dev_priv)->display.has_fpga_dbg) #define HAS_PSR(dev_priv) (INTEL_INFO(dev_priv)->display.has_psr) @@ -1662,6 +1688,9 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915, #define HAS_RUNTIME_PM(dev_priv) (INTEL_INFO(dev_priv)->has_runtime_pm) #define HAS_64BIT_RELOC(dev_priv) (INTEL_INFO(dev_priv)->has_64bit_reloc) +#define HAS_MSLICES(dev_priv) \ + (INTEL_INFO(dev_priv)->has_mslices) + #define HAS_IPC(dev_priv) (INTEL_INFO(dev_priv)->display.has_ipc) #define HAS_REGION(i915, i) (INTEL_INFO(i915)->memory_regions & (i)) @@ -1751,9 +1780,6 @@ void i915_gem_cleanup_userptr(struct drm_i915_private *dev_priv); void i915_gem_init_early(struct drm_i915_private *dev_priv); void i915_gem_cleanup_early(struct drm_i915_private *dev_priv); -struct intel_memory_region *i915_gem_shmem_setup(struct drm_i915_private *i915, - u16 type, u16 instance); - static inline void i915_gem_drain_freed_objects(struct drm_i915_private *i915) { /* @@ -1850,24 +1876,18 @@ struct drm_gem_object *i915_gem_prime_import(struct drm_device *dev, struct dma_buf *i915_gem_prime_export(struct drm_gem_object *gem_obj, int flags); -static inline struct i915_gem_context * -__i915_gem_context_lookup_rcu(struct drm_i915_file_private *file_priv, u32 id) -{ - return xa_load(&file_priv->context_xa, id); -} - -static inline struct i915_gem_context * -i915_gem_context_lookup(struct drm_i915_file_private *file_priv, u32 id) +static inline struct i915_address_space * +i915_gem_vm_lookup(struct drm_i915_file_private *file_priv, u32 id) { - struct i915_gem_context *ctx; + struct i915_address_space *vm; rcu_read_lock(); - ctx = __i915_gem_context_lookup_rcu(file_priv, id); - if (ctx && !kref_get_unless_zero(&ctx->ref)) - ctx = NULL; + vm = xa_load(&file_priv->vm_xa, id); + if (vm && !kref_get_unless_zero(&vm->ref)) + vm = NULL; rcu_read_unlock(); - return ctx; + return vm; } /* i915_gem_evict.c */ @@ -1934,8 +1954,8 @@ int remap_io_sg(struct vm_area_struct *vma, static inline int intel_hws_csb_write_index(struct drm_i915_private *i915) { - if (GRAPHICS_VER(i915) >= 10) - return CNL_HWS_CSB_WRITE_INDEX; + if (GRAPHICS_VER(i915) >= 11) + return ICL_HWS_CSB_WRITE_INDEX; else return I915_HWS_CSB_WRITE_INDEX; } diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 589388dec48a..590efc8b0265 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -469,12 +469,6 @@ i915_gem_pread_ioctl(struct drm_device *dev, void *data, if (ret != -ENODEV) goto out; - ret = -ENODEV; - if (obj->ops->pread) - ret = obj->ops->pread(obj, args); - if (ret != -ENODEV) - goto out; - ret = i915_gem_object_wait(obj, I915_WAIT_INTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT); @@ -1005,8 +999,11 @@ i915_gem_madvise_ioctl(struct drm_device *dev, void *data, } } - if (obj->mm.madv != __I915_MADV_PURGED) + if (obj->mm.madv != __I915_MADV_PURGED) { obj->mm.madv = args->madv; + if (obj->ops->adjust_lru) + obj->ops->adjust_lru(obj); + } if (i915_gem_object_has_pages(obj)) { unsigned long flags; @@ -1204,58 +1201,6 @@ int i915_gem_open(struct drm_i915_private *i915, struct drm_file *file) return ret; } -void i915_gem_ww_ctx_init(struct i915_gem_ww_ctx *ww, bool intr) -{ - ww_acquire_init(&ww->ctx, &reservation_ww_class); - INIT_LIST_HEAD(&ww->obj_list); - ww->intr = intr; - ww->contended = NULL; -} - -static void i915_gem_ww_ctx_unlock_all(struct i915_gem_ww_ctx *ww) -{ - struct drm_i915_gem_object *obj; - - while ((obj = list_first_entry_or_null(&ww->obj_list, struct drm_i915_gem_object, obj_link))) { - list_del(&obj->obj_link); - i915_gem_object_unlock(obj); - } -} - -void i915_gem_ww_unlock_single(struct drm_i915_gem_object *obj) -{ - list_del(&obj->obj_link); - i915_gem_object_unlock(obj); -} - -void i915_gem_ww_ctx_fini(struct i915_gem_ww_ctx *ww) -{ - i915_gem_ww_ctx_unlock_all(ww); - WARN_ON(ww->contended); - ww_acquire_fini(&ww->ctx); -} - -int __must_check i915_gem_ww_ctx_backoff(struct i915_gem_ww_ctx *ww) -{ - int ret = 0; - - if (WARN_ON(!ww->contended)) - return -EINVAL; - - i915_gem_ww_ctx_unlock_all(ww); - if (ww->intr) - ret = dma_resv_lock_slow_interruptible(ww->contended->base.resv, &ww->ctx); - else - dma_resv_lock_slow(ww->contended->base.resv, &ww->ctx); - - if (!ret) - list_add_tail(&ww->contended->obj_link, &ww->obj_list); - - ww->contended = NULL; - - return ret; -} - #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) #include "selftests/mock_gem_device.c" #include "selftests/i915_gem.c" diff --git a/drivers/gpu/drm/i915/i915_gem.h b/drivers/gpu/drm/i915/i915_gem.h index 440c35f1abc9..d0752e5553db 100644 --- a/drivers/gpu/drm/i915/i915_gem.h +++ b/drivers/gpu/drm/i915/i915_gem.h @@ -123,16 +123,4 @@ static inline bool __tasklet_is_scheduled(struct tasklet_struct *t) return test_bit(TASKLET_STATE_SCHED, &t->state); } -struct i915_gem_ww_ctx { - struct ww_acquire_ctx ctx; - struct list_head obj_list; - bool intr; - struct drm_i915_gem_object *contended; -}; - -void i915_gem_ww_ctx_init(struct i915_gem_ww_ctx *ctx, bool intr); -void i915_gem_ww_ctx_fini(struct i915_gem_ww_ctx *ctx); -int __must_check i915_gem_ww_ctx_backoff(struct i915_gem_ww_ctx *ctx); -void i915_gem_ww_unlock_single(struct drm_i915_gem_object *obj); - #endif /* __I915_GEM_H__ */ diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c index 4d2d59a9942b..2b73ddb11c66 100644 --- a/drivers/gpu/drm/i915/i915_gem_evict.c +++ b/drivers/gpu/drm/i915/i915_gem_evict.c @@ -27,6 +27,7 @@ */ #include "gem/i915_gem_context.h" +#include "gt/intel_gt.h" #include "gt/intel_gt_requests.h" #include "i915_drv.h" diff --git a/drivers/gpu/drm/i915/i915_gem_ww.c b/drivers/gpu/drm/i915/i915_gem_ww.c new file mode 100644 index 000000000000..3f6ff139478e --- /dev/null +++ b/drivers/gpu/drm/i915/i915_gem_ww.c @@ -0,0 +1,63 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2020 Intel Corporation + */ +#include <linux/dma-resv.h> +#include "i915_gem_ww.h" +#include "gem/i915_gem_object.h" + +void i915_gem_ww_ctx_init(struct i915_gem_ww_ctx *ww, bool intr) +{ + ww_acquire_init(&ww->ctx, &reservation_ww_class); + INIT_LIST_HEAD(&ww->obj_list); + ww->intr = intr; + ww->contended = NULL; +} + +static void i915_gem_ww_ctx_unlock_all(struct i915_gem_ww_ctx *ww) +{ + struct drm_i915_gem_object *obj; + + while ((obj = list_first_entry_or_null(&ww->obj_list, struct drm_i915_gem_object, obj_link))) { + list_del(&obj->obj_link); + i915_gem_object_unlock(obj); + i915_gem_object_put(obj); + } +} + +void i915_gem_ww_unlock_single(struct drm_i915_gem_object *obj) +{ + list_del(&obj->obj_link); + i915_gem_object_unlock(obj); + i915_gem_object_put(obj); +} + +void i915_gem_ww_ctx_fini(struct i915_gem_ww_ctx *ww) +{ + i915_gem_ww_ctx_unlock_all(ww); + WARN_ON(ww->contended); + ww_acquire_fini(&ww->ctx); +} + +int __must_check i915_gem_ww_ctx_backoff(struct i915_gem_ww_ctx *ww) +{ + int ret = 0; + + if (WARN_ON(!ww->contended)) + return -EINVAL; + + i915_gem_ww_ctx_unlock_all(ww); + if (ww->intr) + ret = dma_resv_lock_slow_interruptible(ww->contended->base.resv, &ww->ctx); + else + dma_resv_lock_slow(ww->contended->base.resv, &ww->ctx); + + if (!ret) + list_add_tail(&ww->contended->obj_link, &ww->obj_list); + else + i915_gem_object_put(ww->contended); + + ww->contended = NULL; + + return ret; +} diff --git a/drivers/gpu/drm/i915/i915_gem_ww.h b/drivers/gpu/drm/i915/i915_gem_ww.h new file mode 100644 index 000000000000..f6b1a796667b --- /dev/null +++ b/drivers/gpu/drm/i915/i915_gem_ww.h @@ -0,0 +1,50 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2020 Intel Corporation + */ +#ifndef __I915_GEM_WW_H__ +#define __I915_GEM_WW_H__ + +#include <drm/drm_drv.h> + +struct i915_gem_ww_ctx { + struct ww_acquire_ctx ctx; + struct list_head obj_list; + struct drm_i915_gem_object *contended; + unsigned short intr; + unsigned short loop; +}; + +void i915_gem_ww_ctx_init(struct i915_gem_ww_ctx *ctx, bool intr); +void i915_gem_ww_ctx_fini(struct i915_gem_ww_ctx *ctx); +int __must_check i915_gem_ww_ctx_backoff(struct i915_gem_ww_ctx *ctx); +void i915_gem_ww_unlock_single(struct drm_i915_gem_object *obj); + +/* Internal functions used by the inlines! Don't use. */ +static inline int __i915_gem_ww_fini(struct i915_gem_ww_ctx *ww, int err) +{ + ww->loop = 0; + if (err == -EDEADLK) { + err = i915_gem_ww_ctx_backoff(ww); + if (!err) + ww->loop = 1; + } + + if (!ww->loop) + i915_gem_ww_ctx_fini(ww); + + return err; +} + +static inline void +__i915_gem_ww_init(struct i915_gem_ww_ctx *ww, bool intr) +{ + i915_gem_ww_ctx_init(ww, intr); + ww->loop = 1; +} + +#define for_i915_gem_ww(_ww, _err, _intr) \ + for (__i915_gem_ww_init(_ww, _intr); (_ww)->loop; \ + _err = __i915_gem_ww_fini(_ww, _err)) + +#endif diff --git a/drivers/gpu/drm/i915/i915_getparam.c b/drivers/gpu/drm/i915/i915_getparam.c index 24e18219eb50..77490cb5ff9c 100644 --- a/drivers/gpu/drm/i915/i915_getparam.c +++ b/drivers/gpu/drm/i915/i915_getparam.c @@ -15,7 +15,7 @@ int i915_getparam_ioctl(struct drm_device *dev, void *data, struct pci_dev *pdev = to_pci_dev(dev->dev); const struct sseu_dev_info *sseu = &i915->gt.info.sseu; drm_i915_getparam_t *param = data; - int value; + int value = 0; switch (param->param) { case I915_PARAM_IRQ_ACTIVE: @@ -134,6 +134,7 @@ int i915_getparam_ioctl(struct drm_device *dev, void *data, case I915_PARAM_HAS_EXEC_FENCE_ARRAY: case I915_PARAM_HAS_EXEC_SUBMIT_FENCE: case I915_PARAM_HAS_EXEC_TIMELINE_FENCES: + case I915_PARAM_HAS_USERPTR_PROBE: /* For the time being all of these are always true; * if some supported hardware does not have one of these * features this value needs to be provided from @@ -150,7 +151,9 @@ int i915_getparam_ioctl(struct drm_device *dev, void *data, return -ENODEV; break; case I915_PARAM_SUBSLICE_MASK: - value = sseu->subslice_mask[0]; + /* Only copy bits from the first slice */ + memcpy(&value, sseu->subslice_mask, + min(sseu->ss_stride, (u8)sizeof(value))); if (!value) return -ENODEV; break; diff --git a/drivers/gpu/drm/i915/i915_globals.c b/drivers/gpu/drm/i915/i915_globals.c deleted file mode 100644 index 3acb0b6be284..000000000000 --- a/drivers/gpu/drm/i915/i915_globals.c +++ /dev/null @@ -1,160 +0,0 @@ -/* - * SPDX-License-Identifier: MIT - * - * Copyright © 2019 Intel Corporation - */ - -#include <linux/slab.h> -#include <linux/workqueue.h> - -#include "i915_active.h" -#include "gem/i915_gem_context.h" -#include "gem/i915_gem_object.h" -#include "i915_globals.h" -#include "i915_request.h" -#include "i915_scheduler.h" -#include "i915_vma.h" - -static LIST_HEAD(globals); - -static atomic_t active; -static atomic_t epoch; -static struct park_work { - struct delayed_work work; - struct rcu_head rcu; - unsigned long flags; -#define PENDING 0 - int epoch; -} park; - -static void i915_globals_shrink(void) -{ - struct i915_global *global; - - /* - * kmem_cache_shrink() discards empty slabs and reorders partially - * filled slabs to prioritise allocating from the mostly full slabs, - * with the aim of reducing fragmentation. - */ - list_for_each_entry(global, &globals, link) - global->shrink(); -} - -static void __i915_globals_grace(struct rcu_head *rcu) -{ - /* Ratelimit parking as shrinking is quite slow */ - schedule_delayed_work(&park.work, round_jiffies_up_relative(2 * HZ)); -} - -static void __i915_globals_queue_rcu(void) -{ - park.epoch = atomic_inc_return(&epoch); - if (!atomic_read(&active)) { - init_rcu_head(&park.rcu); - call_rcu(&park.rcu, __i915_globals_grace); - } -} - -static void __i915_globals_park(struct work_struct *work) -{ - destroy_rcu_head(&park.rcu); - - /* Confirm nothing woke up in the last grace period */ - if (park.epoch != atomic_read(&epoch)) { - __i915_globals_queue_rcu(); - return; - } - - clear_bit(PENDING, &park.flags); - i915_globals_shrink(); -} - -void __init i915_global_register(struct i915_global *global) -{ - GEM_BUG_ON(!global->shrink); - GEM_BUG_ON(!global->exit); - - list_add_tail(&global->link, &globals); -} - -static void __i915_globals_cleanup(void) -{ - struct i915_global *global, *next; - - list_for_each_entry_safe_reverse(global, next, &globals, link) - global->exit(); -} - -static __initconst int (* const initfn[])(void) = { - i915_global_active_init, - i915_global_context_init, - i915_global_gem_context_init, - i915_global_objects_init, - i915_global_request_init, - i915_global_scheduler_init, - i915_global_vma_init, -}; - -int __init i915_globals_init(void) -{ - int i; - - for (i = 0; i < ARRAY_SIZE(initfn); i++) { - int err; - - err = initfn[i](); - if (err) { - __i915_globals_cleanup(); - return err; - } - } - - INIT_DELAYED_WORK(&park.work, __i915_globals_park); - return 0; -} - -void i915_globals_park(void) -{ - /* - * Defer shrinking the global slab caches (and other work) until - * after a RCU grace period has completed with no activity. This - * is to try and reduce the latency impact on the consumers caused - * by us shrinking the caches the same time as they are trying to - * allocate, with the assumption being that if we idle long enough - * for an RCU grace period to elapse since the last use, it is likely - * to be longer until we need the caches again. - */ - if (!atomic_dec_and_test(&active)) - return; - - /* Queue cleanup after the next RCU grace period has freed slabs */ - if (!test_and_set_bit(PENDING, &park.flags)) - __i915_globals_queue_rcu(); -} - -void i915_globals_unpark(void) -{ - atomic_inc(&epoch); - atomic_inc(&active); -} - -static void __i915_globals_flush(void) -{ - atomic_inc(&active); /* skip shrinking */ - - rcu_barrier(); /* wait for the work to be queued */ - flush_delayed_work(&park.work); - - atomic_dec(&active); -} - -void i915_globals_exit(void) -{ - GEM_BUG_ON(atomic_read(&active)); - - __i915_globals_flush(); - __i915_globals_cleanup(); - - /* And ensure that our DESTROY_BY_RCU slabs are truly destroyed */ - rcu_barrier(); -} diff --git a/drivers/gpu/drm/i915/i915_globals.h b/drivers/gpu/drm/i915/i915_globals.h deleted file mode 100644 index 2d199f411a4a..000000000000 --- a/drivers/gpu/drm/i915/i915_globals.h +++ /dev/null @@ -1,37 +0,0 @@ -/* - * SPDX-License-Identifier: MIT - * - * Copyright © 2019 Intel Corporation - */ - -#ifndef _I915_GLOBALS_H_ -#define _I915_GLOBALS_H_ - -#include <linux/types.h> - -typedef void (*i915_global_func_t)(void); - -struct i915_global { - struct list_head link; - - i915_global_func_t shrink; - i915_global_func_t exit; -}; - -void i915_global_register(struct i915_global *global); - -int i915_globals_init(void); -void i915_globals_park(void); -void i915_globals_unpark(void); -void i915_globals_exit(void); - -/* constructors */ -int i915_global_active_init(void); -int i915_global_context_init(void); -int i915_global_gem_context_init(void); -int i915_global_objects_init(void); -int i915_global_request_init(void); -int i915_global_scheduler_init(void); -int i915_global_vma_init(void); - -#endif /* _I915_GLOBALS_H_ */ diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index 966664610c8c..9cf6ac575de1 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -1048,7 +1048,7 @@ i915_vma_coredump_create(const struct intel_gt *gt, if (ret) break; } - } else if (i915_gem_object_is_lmem(vma->obj)) { + } else if (__i915_gem_object_is_lmem(vma->obj)) { struct intel_memory_region *mem = vma->obj->mm.region; dma_addr_t dma; @@ -1438,20 +1438,37 @@ capture_engine(struct intel_engine_cs *engine, { struct intel_engine_capture_vma *capture = NULL; struct intel_engine_coredump *ee; - struct i915_request *rq; + struct intel_context *ce; + struct i915_request *rq = NULL; unsigned long flags; ee = intel_engine_coredump_alloc(engine, GFP_KERNEL); if (!ee) return NULL; - spin_lock_irqsave(&engine->active.lock, flags); - rq = intel_engine_find_active_request(engine); + ce = intel_engine_get_hung_context(engine); + if (ce) { + intel_engine_clear_hung_context(engine); + rq = intel_context_find_active_request(ce); + if (!rq || !i915_request_started(rq)) + goto no_request_capture; + } else { + /* + * Getting here with GuC enabled means it is a forced error capture + * with no actual hang. So, no need to attempt the execlist search. + */ + if (!intel_uc_uses_guc_submission(&engine->gt->uc)) { + spin_lock_irqsave(&engine->sched_engine->lock, flags); + rq = intel_engine_execlist_find_hung_request(engine); + spin_unlock_irqrestore(&engine->sched_engine->lock, + flags); + } + } if (rq) capture = intel_engine_coredump_add_request(ee, rq, ATOMIC_MAYFAIL); - spin_unlock_irqrestore(&engine->active.lock, flags); if (!capture) { +no_request_capture: kfree(ee); return NULL; } diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index c3816f5c6900..9bc4f4a8e12e 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -207,7 +207,7 @@ static void intel_hpd_init_pins(struct drm_i915_private *dev_priv) (!HAS_PCH_SPLIT(dev_priv) || HAS_PCH_NOP(dev_priv))) return; - if (HAS_PCH_DG1(dev_priv)) + if (INTEL_PCH_TYPE(dev_priv) >= PCH_DG1) hpd->pch_hpd = hpd_sde_dg1; else if (INTEL_PCH_TYPE(dev_priv) >= PCH_ICP) hpd->pch_hpd = hpd_icp; @@ -2297,11 +2297,10 @@ static u32 gen8_de_port_aux_mask(struct drm_i915_private *dev_priv) GEN9_AUX_CHANNEL_C | GEN9_AUX_CHANNEL_D; - if (IS_CNL_WITH_PORT_F(dev_priv) || DISPLAY_VER(dev_priv) == 11) - mask |= CNL_AUX_CHANNEL_F; - - if (DISPLAY_VER(dev_priv) == 11) + if (DISPLAY_VER(dev_priv) == 11) { + mask |= ICL_AUX_CHANNEL_F; mask |= ICL_AUX_CHANNEL_E; + } return mask; } @@ -2698,11 +2697,9 @@ gen11_display_irq_handler(struct drm_i915_private *i915) enable_rpm_wakeref_asserts(&i915->runtime_pm); } -static __always_inline irqreturn_t -__gen11_irq_handler(struct drm_i915_private * const i915, - u32 (*intr_disable)(void __iomem * const regs), - void (*intr_enable)(void __iomem * const regs)) +static irqreturn_t gen11_irq_handler(int irq, void *arg) { + struct drm_i915_private *i915 = arg; void __iomem * const regs = i915->uncore.regs; struct intel_gt *gt = &i915->gt; u32 master_ctl; @@ -2711,9 +2708,9 @@ __gen11_irq_handler(struct drm_i915_private * const i915, if (!intel_irqs_enabled(i915)) return IRQ_NONE; - master_ctl = intr_disable(regs); + master_ctl = gen11_master_intr_disable(regs); if (!master_ctl) { - intr_enable(regs); + gen11_master_intr_enable(regs); return IRQ_NONE; } @@ -2726,7 +2723,7 @@ __gen11_irq_handler(struct drm_i915_private * const i915, gu_misc_iir = gen11_gu_misc_irq_ack(gt, master_ctl); - intr_enable(regs); + gen11_master_intr_enable(regs); gen11_gu_misc_irq_handler(gt, gu_misc_iir); @@ -2735,51 +2732,69 @@ __gen11_irq_handler(struct drm_i915_private * const i915, return IRQ_HANDLED; } -static irqreturn_t gen11_irq_handler(int irq, void *arg) -{ - return __gen11_irq_handler(arg, - gen11_master_intr_disable, - gen11_master_intr_enable); -} - -static u32 dg1_master_intr_disable_and_ack(void __iomem * const regs) +static inline u32 dg1_master_intr_disable(void __iomem * const regs) { u32 val; /* First disable interrupts */ - raw_reg_write(regs, DG1_MSTR_UNIT_INTR, 0); + raw_reg_write(regs, DG1_MSTR_TILE_INTR, 0); /* Get the indication levels and ack the master unit */ - val = raw_reg_read(regs, DG1_MSTR_UNIT_INTR); + val = raw_reg_read(regs, DG1_MSTR_TILE_INTR); if (unlikely(!val)) return 0; - raw_reg_write(regs, DG1_MSTR_UNIT_INTR, val); - - /* - * Now with master disabled, get a sample of level indications - * for this interrupt and ack them right away - we keep GEN11_MASTER_IRQ - * out as this bit doesn't exist anymore for DG1 - */ - val = raw_reg_read(regs, GEN11_GFX_MSTR_IRQ) & ~GEN11_MASTER_IRQ; - if (unlikely(!val)) - return 0; - - raw_reg_write(regs, GEN11_GFX_MSTR_IRQ, val); + raw_reg_write(regs, DG1_MSTR_TILE_INTR, val); return val; } static inline void dg1_master_intr_enable(void __iomem * const regs) { - raw_reg_write(regs, DG1_MSTR_UNIT_INTR, DG1_MSTR_IRQ); + raw_reg_write(regs, DG1_MSTR_TILE_INTR, DG1_MSTR_IRQ); } static irqreturn_t dg1_irq_handler(int irq, void *arg) { - return __gen11_irq_handler(arg, - dg1_master_intr_disable_and_ack, - dg1_master_intr_enable); + struct drm_i915_private * const i915 = arg; + struct intel_gt *gt = &i915->gt; + void __iomem * const regs = i915->uncore.regs; + u32 master_tile_ctl, master_ctl; + u32 gu_misc_iir; + + if (!intel_irqs_enabled(i915)) + return IRQ_NONE; + + master_tile_ctl = dg1_master_intr_disable(regs); + if (!master_tile_ctl) { + dg1_master_intr_enable(regs); + return IRQ_NONE; + } + + /* FIXME: we only support tile 0 for now. */ + if (master_tile_ctl & DG1_MSTR_TILE(0)) { + master_ctl = raw_reg_read(regs, GEN11_GFX_MSTR_IRQ); + raw_reg_write(regs, GEN11_GFX_MSTR_IRQ, master_ctl); + } else { + DRM_ERROR("Tile not supported: 0x%08x\n", master_tile_ctl); + dg1_master_intr_enable(regs); + return IRQ_NONE; + } + + gen11_gt_irq_handler(gt, master_ctl); + + if (master_ctl & GEN11_DISPLAY_IRQ) + gen11_display_irq_handler(i915); + + gu_misc_iir = gen11_gu_misc_irq_ack(gt, master_ctl); + + dg1_master_intr_enable(regs); + + gen11_gu_misc_irq_handler(gt, gu_misc_iir); + + pmu_irq_stats(i915, IRQ_HANDLED); + + return IRQ_HANDLED; } /* Called from drm generic code, passed 'crtc' which @@ -2880,14 +2895,14 @@ static bool gen11_dsi_configure_te(struct intel_crtc *intel_crtc, return true; } -int bdw_enable_vblank(struct drm_crtc *crtc) +int bdw_enable_vblank(struct drm_crtc *_crtc) { - struct drm_i915_private *dev_priv = to_i915(crtc->dev); - struct intel_crtc *intel_crtc = to_intel_crtc(crtc); - enum pipe pipe = intel_crtc->pipe; + struct intel_crtc *crtc = to_intel_crtc(_crtc); + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + enum pipe pipe = crtc->pipe; unsigned long irqflags; - if (gen11_dsi_configure_te(intel_crtc, true)) + if (gen11_dsi_configure_te(crtc, true)) return 0; spin_lock_irqsave(&dev_priv->irq_lock, irqflags); @@ -2898,7 +2913,7 @@ int bdw_enable_vblank(struct drm_crtc *crtc) * PSR is active as no frames are generated, so check only for PSR. */ if (HAS_PSR(dev_priv)) - drm_crtc_vblank_restore(crtc); + drm_crtc_vblank_restore(&crtc->base); return 0; } @@ -2952,14 +2967,14 @@ void ilk_disable_vblank(struct drm_crtc *crtc) spin_unlock_irqrestore(&dev_priv->irq_lock, irqflags); } -void bdw_disable_vblank(struct drm_crtc *crtc) +void bdw_disable_vblank(struct drm_crtc *_crtc) { - struct drm_i915_private *dev_priv = to_i915(crtc->dev); - struct intel_crtc *intel_crtc = to_intel_crtc(crtc); - enum pipe pipe = intel_crtc->pipe; + struct intel_crtc *crtc = to_intel_crtc(_crtc); + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + enum pipe pipe = crtc->pipe; unsigned long irqflags; - if (gen11_dsi_configure_te(intel_crtc, false)) + if (gen11_dsi_configure_te(crtc, false)) return; spin_lock_irqsave(&dev_priv->irq_lock, irqflags); @@ -3146,10 +3161,20 @@ static void gen11_irq_reset(struct drm_i915_private *dev_priv) { struct intel_uncore *uncore = &dev_priv->uncore; - if (HAS_MASTER_UNIT_IRQ(dev_priv)) - dg1_master_intr_disable_and_ack(dev_priv->uncore.regs); - else - gen11_master_intr_disable(dev_priv->uncore.regs); + gen11_master_intr_disable(dev_priv->uncore.regs); + + gen11_gt_irq_reset(&dev_priv->gt); + gen11_display_irq_reset(dev_priv); + + GEN3_IRQ_RESET(uncore, GEN11_GU_MISC_); + GEN3_IRQ_RESET(uncore, GEN8_PCU_); +} + +static void dg1_irq_reset(struct drm_i915_private *dev_priv) +{ + struct intel_uncore *uncore = &dev_priv->uncore; + + dg1_master_intr_disable(dev_priv->uncore.regs); gen11_gt_irq_reset(&dev_priv->gt); gen11_display_irq_reset(dev_priv); @@ -3841,13 +3866,28 @@ static void gen11_irq_postinstall(struct drm_i915_private *dev_priv) GEN3_IRQ_INIT(uncore, GEN11_GU_MISC_, ~gu_misc_masked, gu_misc_masked); - if (HAS_MASTER_UNIT_IRQ(dev_priv)) { - dg1_master_intr_enable(uncore->regs); - intel_uncore_posting_read(&dev_priv->uncore, DG1_MSTR_UNIT_INTR); - } else { - gen11_master_intr_enable(uncore->regs); - intel_uncore_posting_read(&dev_priv->uncore, GEN11_GFX_MSTR_IRQ); + gen11_master_intr_enable(uncore->regs); + intel_uncore_posting_read(&dev_priv->uncore, GEN11_GFX_MSTR_IRQ); +} + +static void dg1_irq_postinstall(struct drm_i915_private *dev_priv) +{ + struct intel_uncore *uncore = &dev_priv->uncore; + u32 gu_misc_masked = GEN11_GU_MISC_GSE; + + gen11_gt_irq_postinstall(&dev_priv->gt); + + GEN3_IRQ_INIT(uncore, GEN11_GU_MISC_, ~gu_misc_masked, gu_misc_masked); + + if (HAS_DISPLAY(dev_priv)) { + icp_irq_postinstall(dev_priv); + gen8_de_irq_postinstall(dev_priv); + intel_uncore_write(&dev_priv->uncore, GEN11_DISPLAY_INT_CTL, + GEN11_DISPLAY_IRQ_ENABLE); } + + dg1_master_intr_enable(dev_priv->uncore.regs); + intel_uncore_posting_read(&dev_priv->uncore, DG1_MSTR_TILE_INTR); } static void cherryview_irq_postinstall(struct drm_i915_private *dev_priv) @@ -4386,9 +4426,9 @@ static irq_handler_t intel_irq_handler(struct drm_i915_private *dev_priv) else return i8xx_irq_handler; } else { - if (HAS_MASTER_UNIT_IRQ(dev_priv)) + if (GRAPHICS_VER_FULL(dev_priv) >= IP_VER(12, 10)) return dg1_irq_handler; - if (GRAPHICS_VER(dev_priv) >= 11) + else if (GRAPHICS_VER(dev_priv) >= 11) return gen11_irq_handler; else if (GRAPHICS_VER(dev_priv) >= 8) return gen8_irq_handler; @@ -4411,7 +4451,9 @@ static void intel_irq_reset(struct drm_i915_private *dev_priv) else i8xx_irq_reset(dev_priv); } else { - if (GRAPHICS_VER(dev_priv) >= 11) + if (GRAPHICS_VER_FULL(dev_priv) >= IP_VER(12, 10)) + dg1_irq_reset(dev_priv); + else if (GRAPHICS_VER(dev_priv) >= 11) gen11_irq_reset(dev_priv); else if (GRAPHICS_VER(dev_priv) >= 8) gen8_irq_reset(dev_priv); @@ -4434,7 +4476,9 @@ static void intel_irq_postinstall(struct drm_i915_private *dev_priv) else i8xx_irq_postinstall(dev_priv); } else { - if (GRAPHICS_VER(dev_priv) >= 11) + if (GRAPHICS_VER_FULL(dev_priv) >= IP_VER(12, 10)) + dg1_irq_postinstall(dev_priv); + else if (GRAPHICS_VER(dev_priv) >= 11) gen11_irq_postinstall(dev_priv); else if (GRAPHICS_VER(dev_priv) >= 8) gen8_irq_postinstall(dev_priv); @@ -4466,14 +4510,14 @@ int intel_irq_install(struct drm_i915_private *dev_priv) */ dev_priv->runtime_pm.irqs_enabled = true; - dev_priv->drm.irq_enabled = true; + dev_priv->irq_enabled = true; intel_irq_reset(dev_priv); ret = request_irq(irq, intel_irq_handler(dev_priv), IRQF_SHARED, DRIVER_NAME, dev_priv); if (ret < 0) { - dev_priv->drm.irq_enabled = false; + dev_priv->irq_enabled = false; return ret; } @@ -4499,10 +4543,10 @@ void intel_irq_uninstall(struct drm_i915_private *dev_priv) * intel_modeset_driver_remove() calling us out of sequence. * Would be nice if it didn't do that... */ - if (!dev_priv->drm.irq_enabled) + if (!dev_priv->irq_enabled) return; - dev_priv->drm.irq_enabled = false; + dev_priv->irq_enabled = false; intel_irq_reset(dev_priv); diff --git a/drivers/gpu/drm/i915/i915_module.c b/drivers/gpu/drm/i915/i915_module.c new file mode 100644 index 000000000000..d8b4482c69d0 --- /dev/null +++ b/drivers/gpu/drm/i915/i915_module.c @@ -0,0 +1,124 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2021 Intel Corporation + */ + +#include <linux/console.h> + +#include "gem/i915_gem_context.h" +#include "gem/i915_gem_object.h" +#include "i915_active.h" +#include "i915_buddy.h" +#include "i915_params.h" +#include "i915_pci.h" +#include "i915_perf.h" +#include "i915_request.h" +#include "i915_scheduler.h" +#include "i915_selftest.h" +#include "i915_vma.h" + +static int i915_check_nomodeset(void) +{ + bool use_kms = true; + + /* + * Enable KMS by default, unless explicitly overriden by + * either the i915.modeset prarameter or by the + * vga_text_mode_force boot option. + */ + + if (i915_modparams.modeset == 0) + use_kms = false; + + if (vgacon_text_force() && i915_modparams.modeset == -1) + use_kms = false; + + if (!use_kms) { + /* Silently fail loading to not upset userspace. */ + DRM_DEBUG_DRIVER("KMS disabled.\n"); + return 1; + } + + return 0; +} + +static const struct { + int (*init)(void); + void (*exit)(void); +} init_funcs[] = { + { .init = i915_check_nomodeset }, + { .init = i915_active_module_init, + .exit = i915_active_module_exit }, + { .init = i915_buddy_module_init, + .exit = i915_buddy_module_exit }, + { .init = i915_context_module_init, + .exit = i915_context_module_exit }, + { .init = i915_gem_context_module_init, + .exit = i915_gem_context_module_exit }, + { .init = i915_objects_module_init, + .exit = i915_objects_module_exit }, + { .init = i915_request_module_init, + .exit = i915_request_module_exit }, + { .init = i915_scheduler_module_init, + .exit = i915_scheduler_module_exit }, + { .init = i915_vma_module_init, + .exit = i915_vma_module_exit }, + { .init = i915_mock_selftests }, + { .init = i915_pmu_init, + .exit = i915_pmu_exit }, + { .init = i915_register_pci_driver, + .exit = i915_unregister_pci_driver }, + { .init = i915_perf_sysctl_register, + .exit = i915_perf_sysctl_unregister }, +}; +static int init_progress; + +static int __init i915_init(void) +{ + int err, i; + + for (i = 0; i < ARRAY_SIZE(init_funcs); i++) { + err = init_funcs[i].init(); + if (err < 0) { + while (i--) { + if (init_funcs[i].exit) + init_funcs[i].exit(); + } + return err; + } else if (err > 0) { + /* + * Early-exit success is reserved for things which + * don't have an exit() function because we have no + * idea how far they got or how to partially tear + * them down. + */ + WARN_ON(init_funcs[i].exit); + break; + } + } + + init_progress = i; + + return 0; +} + +static void __exit i915_exit(void) +{ + int i; + + for (i = init_progress - 1; i >= 0; i--) { + GEM_BUG_ON(i >= ARRAY_SIZE(init_funcs)); + if (init_funcs[i].exit) + init_funcs[i].exit(); + } +} + +module_init(i915_init); +module_exit(i915_exit); + +MODULE_AUTHOR("Tungsten Graphics, Inc."); +MODULE_AUTHOR("Intel Corporation"); + +MODULE_DESCRIPTION(DRIVER_DESC); +MODULE_LICENSE("GPL and additional rights"); diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c index 2880ec57c97d..1bbd09ad5287 100644 --- a/drivers/gpu/drm/i915/i915_pci.c +++ b/drivers/gpu/drm/i915/i915_pci.c @@ -22,18 +22,13 @@ * */ -#include <linux/console.h> #include <linux/vga_switcheroo.h> #include <drm/drm_drv.h> #include <drm/i915_pciids.h> -#include "display/intel_fbdev.h" - #include "i915_drv.h" -#include "i915_perf.h" -#include "i915_globals.h" -#include "i915_selftest.h" +#include "i915_pci.h" #define PLATFORM(x) .platform = (x) #define GEN(x) \ @@ -787,27 +782,13 @@ static const struct intel_device_info cml_gt2_info = { .gt = 2, }; -#define GEN10_FEATURES \ - GEN9_FEATURES, \ - GEN(10), \ - .dbuf.size = 1024 - 4, /* 4 blocks for bypass path allocation */ \ - .display.has_dsc = 1, \ - .has_coherent_ggtt = false, \ - GLK_COLORS - -static const struct intel_device_info cnl_info = { - GEN10_FEATURES, - PLATFORM(INTEL_CANNONLAKE), - .gt = 2, -}; - #define GEN11_DEFAULT_PAGE_SIZES \ .page_sizes = I915_GTT_PAGE_SIZE_4K | \ I915_GTT_PAGE_SIZE_64K | \ I915_GTT_PAGE_SIZE_2M #define GEN11_FEATURES \ - GEN10_FEATURES, \ + GEN9_FEATURES, \ GEN11_DEFAULT_PAGE_SIZES, \ .abox_mask = BIT(0), \ .cpu_transcoder_mask = BIT(TRANSCODER_A) | BIT(TRANSCODER_B) | \ @@ -830,10 +811,12 @@ static const struct intel_device_info cnl_info = { [TRANSCODER_DSI_1] = TRANSCODER_DSI1_OFFSET, \ }, \ GEN(11), \ + .color = { .degamma_lut_size = 33, .gamma_lut_size = 262145 }, \ .dbuf.size = 2048, \ .dbuf.slice_mask = BIT(DBUF_S1) | BIT(DBUF_S2), \ - .has_logical_ring_elsq = 1, \ - .color = { .degamma_lut_size = 33, .gamma_lut_size = 262145 } + .display.has_dsc = 1, \ + .has_coherent_ggtt = false, \ + .has_logical_ring_elsq = 1 static const struct intel_device_info icl_info = { GEN11_FEATURES, @@ -845,7 +828,6 @@ static const struct intel_device_info icl_info = { static const struct intel_device_info ehl_info = { GEN11_FEATURES, PLATFORM(INTEL_ELKHARTLAKE), - .require_force_probe = 1, .platform_engine_mask = BIT(RCS0) | BIT(BCS0) | BIT(VCS0) | BIT(VECS0), .ppgtt_size = 36, }; @@ -853,7 +835,6 @@ static const struct intel_device_info ehl_info = { static const struct intel_device_info jsl_info = { GEN11_FEATURES, PLATFORM(INTEL_JASPERLAKE), - .require_force_probe = 1, .platform_engine_mask = BIT(RCS0) | BIT(BCS0) | BIT(VCS0) | BIT(VECS0), .ppgtt_size = 36, }; @@ -909,7 +890,6 @@ static const struct intel_device_info rkl_info = { #define DGFX_FEATURES \ .memory_regions = REGION_SMEM | REGION_LMEM | REGION_STOLEN_LMEM, \ - .has_master_unit_irq = 1, \ .has_llc = 0, \ .has_snoop = 1, \ .is_dgfx = 1 @@ -917,6 +897,7 @@ static const struct intel_device_info rkl_info = { static const struct intel_device_info dg1_info __maybe_unused = { GEN12_FEATURES, DGFX_FEATURES, + .graphics_rel = 10, PLATFORM(INTEL_DG1), .pipe_mask = BIT(PIPE_A) | BIT(PIPE_B) | BIT(PIPE_C) | BIT(PIPE_D), .require_force_probe = 1, @@ -936,26 +917,60 @@ static const struct intel_device_info adl_s_info = { .display.has_psr_hw_tracking = 0, .platform_engine_mask = BIT(RCS0) | BIT(BCS0) | BIT(VECS0) | BIT(VCS0) | BIT(VCS2), - .dma_mask_size = 46, + .dma_mask_size = 39, }; +#define XE_LPD_CURSOR_OFFSETS \ + .cursor_offsets = { \ + [PIPE_A] = CURSOR_A_OFFSET, \ + [PIPE_B] = IVB_CURSOR_B_OFFSET, \ + [PIPE_C] = IVB_CURSOR_C_OFFSET, \ + [PIPE_D] = TGL_CURSOR_D_OFFSET, \ + } + #define XE_LPD_FEATURES \ - .display.ver = 13, \ - .display.has_psr_hw_tracking = 0, \ - .abox_mask = GENMASK(1, 0), \ - .pipe_mask = BIT(PIPE_A) | BIT(PIPE_B) | BIT(PIPE_C) | BIT(PIPE_D), \ - .cpu_transcoder_mask = BIT(TRANSCODER_A) | BIT(TRANSCODER_B) | \ - BIT(TRANSCODER_C) | BIT(TRANSCODER_D), \ - .dbuf.size = 4096, \ - .dbuf.slice_mask = BIT(DBUF_S1) | BIT(DBUF_S2) | BIT(DBUF_S3) | BIT(DBUF_S4) + .abox_mask = GENMASK(1, 0), \ + .color = { .degamma_lut_size = 0, .gamma_lut_size = 0 }, \ + .cpu_transcoder_mask = BIT(TRANSCODER_A) | BIT(TRANSCODER_B) | \ + BIT(TRANSCODER_C) | BIT(TRANSCODER_D), \ + .dbuf.size = 4096, \ + .dbuf.slice_mask = BIT(DBUF_S1) | BIT(DBUF_S2) | BIT(DBUF_S3) | \ + BIT(DBUF_S4), \ + .display.has_ddi = 1, \ + .display.has_dmc = 1, \ + .display.has_dp_mst = 1, \ + .display.has_dsb = 1, \ + .display.has_dsc = 1, \ + .display.has_fbc = 1, \ + .display.has_fpga_dbg = 1, \ + .display.has_hdcp = 1, \ + .display.has_hotplug = 1, \ + .display.has_ipc = 1, \ + .display.has_psr = 1, \ + .display.ver = 13, \ + .pipe_mask = BIT(PIPE_A) | BIT(PIPE_B) | BIT(PIPE_C) | BIT(PIPE_D), \ + .pipe_offsets = { \ + [TRANSCODER_A] = PIPE_A_OFFSET, \ + [TRANSCODER_B] = PIPE_B_OFFSET, \ + [TRANSCODER_C] = PIPE_C_OFFSET, \ + [TRANSCODER_D] = PIPE_D_OFFSET, \ + }, \ + .trans_offsets = { \ + [TRANSCODER_A] = TRANSCODER_A_OFFSET, \ + [TRANSCODER_B] = TRANSCODER_B_OFFSET, \ + [TRANSCODER_C] = TRANSCODER_C_OFFSET, \ + [TRANSCODER_D] = TRANSCODER_D_OFFSET, \ + }, \ + XE_LPD_CURSOR_OFFSETS static const struct intel_device_info adl_p_info = { GEN12_FEATURES, XE_LPD_FEATURES, PLATFORM(INTEL_ALDERLAKE_P), - .has_cdclk_crawl = 1, .require_force_probe = 1, + .display.has_cdclk_crawl = 1, .display.has_modular_fia = 1, + .display.has_psr_hw_tracking = 0, .platform_engine_mask = BIT(RCS0) | BIT(BCS0) | BIT(VECS0) | BIT(VCS0) | BIT(VCS2), .ppgtt_size = 48, @@ -963,6 +978,67 @@ static const struct intel_device_info adl_p_info = { }; #undef GEN + +#define XE_HP_PAGE_SIZES \ + .page_sizes = I915_GTT_PAGE_SIZE_4K | \ + I915_GTT_PAGE_SIZE_64K | \ + I915_GTT_PAGE_SIZE_2M + +#define XE_HP_FEATURES \ + .graphics_ver = 12, \ + .graphics_rel = 50, \ + XE_HP_PAGE_SIZES, \ + .dma_mask_size = 46, \ + .has_64bit_reloc = 1, \ + .has_global_mocs = 1, \ + .has_gt_uc = 1, \ + .has_llc = 1, \ + .has_logical_ring_contexts = 1, \ + .has_logical_ring_elsq = 1, \ + .has_mslices = 1, \ + .has_rc6 = 1, \ + .has_reset_engine = 1, \ + .has_rps = 1, \ + .has_runtime_pm = 1, \ + .ppgtt_size = 48, \ + .ppgtt_type = INTEL_PPGTT_FULL + +#define XE_HPM_FEATURES \ + .media_ver = 12, \ + .media_rel = 50 + +__maybe_unused +static const struct intel_device_info xehpsdv_info = { + XE_HP_FEATURES, + XE_HPM_FEATURES, + DGFX_FEATURES, + PLATFORM(INTEL_XEHPSDV), + .display = { }, + .pipe_mask = 0, + .platform_engine_mask = + BIT(RCS0) | BIT(BCS0) | + BIT(VECS0) | BIT(VECS1) | BIT(VECS2) | BIT(VECS3) | + BIT(VCS0) | BIT(VCS1) | BIT(VCS2) | BIT(VCS3) | + BIT(VCS4) | BIT(VCS5) | BIT(VCS6) | BIT(VCS7), + .require_force_probe = 1, +}; + +__maybe_unused +static const struct intel_device_info dg2_info = { + XE_HP_FEATURES, + XE_HPM_FEATURES, + XE_LPD_FEATURES, + DGFX_FEATURES, + .graphics_rel = 55, + .media_rel = 55, + PLATFORM(INTEL_DG2), + .platform_engine_mask = + BIT(RCS0) | BIT(BCS0) | + BIT(VECS0) | BIT(VECS1) | + BIT(VCS0) | BIT(VCS2), + .require_force_probe = 1, +}; + #undef PLATFORM /* @@ -1032,7 +1108,6 @@ static const struct pci_device_id pciidlist[] = { INTEL_CML_GT2_IDS(&cml_gt2_info), INTEL_CML_U_GT1_IDS(&cml_gt1_info), INTEL_CML_U_GT2_IDS(&cml_gt2_info), - INTEL_CNL_IDS(&cnl_info), INTEL_ICL_11_IDS(&icl_info), INTEL_EHL_IDS(&ehl_info), INTEL_JSL_IDS(&jsl_info), @@ -1159,66 +1234,12 @@ static struct pci_driver i915_pci_driver = { .driver.pm = &i915_pm_ops, }; -static int __init i915_init(void) +int i915_register_pci_driver(void) { - bool use_kms = true; - int err; - - err = i915_globals_init(); - if (err) - return err; - - err = i915_mock_selftests(); - if (err) - return err > 0 ? 0 : err; - - /* - * Enable KMS by default, unless explicitly overriden by - * either the i915.modeset prarameter or by the - * vga_text_mode_force boot option. - */ - - if (i915_modparams.modeset == 0) - use_kms = false; - - if (vgacon_text_force() && i915_modparams.modeset == -1) - use_kms = false; - - if (!use_kms) { - /* Silently fail loading to not upset userspace. */ - DRM_DEBUG_DRIVER("KMS disabled.\n"); - return 0; - } - - i915_pmu_init(); - - err = pci_register_driver(&i915_pci_driver); - if (err) { - i915_pmu_exit(); - i915_globals_exit(); - return err; - } - - i915_perf_sysctl_register(); - return 0; + return pci_register_driver(&i915_pci_driver); } -static void __exit i915_exit(void) +void i915_unregister_pci_driver(void) { - if (!i915_pci_driver.driver.owner) - return; - - i915_perf_sysctl_unregister(); pci_unregister_driver(&i915_pci_driver); - i915_globals_exit(); - i915_pmu_exit(); } - -module_init(i915_init); -module_exit(i915_exit); - -MODULE_AUTHOR("Tungsten Graphics, Inc."); -MODULE_AUTHOR("Intel Corporation"); - -MODULE_DESCRIPTION(DRIVER_DESC); -MODULE_LICENSE("GPL and additional rights"); diff --git a/drivers/gpu/drm/i915/i915_pci.h b/drivers/gpu/drm/i915/i915_pci.h new file mode 100644 index 000000000000..b386f319f52e --- /dev/null +++ b/drivers/gpu/drm/i915/i915_pci.h @@ -0,0 +1,8 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2021 Intel Corporation + */ + +int i915_register_pci_driver(void); +void i915_unregister_pci_driver(void); diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index 9f94914958c3..2f01b8c0284c 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -1256,7 +1256,6 @@ static int oa_get_render_ctx_id(struct i915_perf_stream *stream) case 8: case 9: - case 10: if (intel_engine_uses_guc(ce->engine)) { /* * When using GuC, the context descriptor we write in @@ -1284,17 +1283,26 @@ static int oa_get_render_ctx_id(struct i915_perf_stream *stream) break; case 11: - case 12: { - stream->specific_ctx_id_mask = - ((1U << GEN11_SW_CTX_ID_WIDTH) - 1) << (GEN11_SW_CTX_ID_SHIFT - 32); - /* - * Pick an unused context id - * 0 - BITS_PER_LONG are used by other contexts - * GEN12_MAX_CONTEXT_HW_ID (0x7ff) is used by idle context - */ - stream->specific_ctx_id = (GEN12_MAX_CONTEXT_HW_ID - 1) << (GEN11_SW_CTX_ID_SHIFT - 32); + case 12: + if (GRAPHICS_VER_FULL(ce->engine->i915) >= IP_VER(12, 50)) { + stream->specific_ctx_id_mask = + ((1U << XEHP_SW_CTX_ID_WIDTH) - 1) << + (XEHP_SW_CTX_ID_SHIFT - 32); + stream->specific_ctx_id = + (XEHP_MAX_CONTEXT_HW_ID - 1) << + (XEHP_SW_CTX_ID_SHIFT - 32); + } else { + stream->specific_ctx_id_mask = + ((1U << GEN11_SW_CTX_ID_WIDTH) - 1) << (GEN11_SW_CTX_ID_SHIFT - 32); + /* + * Pick an unused context id + * 0 - BITS_PER_LONG are used by other contexts + * GEN12_MAX_CONTEXT_HW_ID (0x7ff) is used by idle context + */ + stream->specific_ctx_id = + (GEN12_MAX_CONTEXT_HW_ID - 1) << (GEN11_SW_CTX_ID_SHIFT - 32); + } break; - } default: MISSING_CASE(GRAPHICS_VER(ce->engine->i915)); @@ -2580,7 +2588,7 @@ static void gen8_disable_metric_set(struct i915_perf_stream *stream) intel_uncore_rmw(uncore, GDT_CHICKEN_BITS, GT_NOA_ENABLE, 0); } -static void gen10_disable_metric_set(struct i915_perf_stream *stream) +static void gen11_disable_metric_set(struct i915_perf_stream *stream) { struct intel_uncore *uncore = stream->uncore; @@ -3414,10 +3422,10 @@ i915_perf_open_ioctl_locked(struct i915_perf *perf, struct drm_i915_file_private *file_priv = file->driver_priv; specific_ctx = i915_gem_context_lookup(file_priv, ctx_handle); - if (!specific_ctx) { + if (IS_ERR(specific_ctx)) { DRM_DEBUG("Failed to look up context with ID %u for opening perf stream\n", ctx_handle); - ret = -ENOENT; + ret = PTR_ERR(specific_ctx); goto err; } } @@ -3887,7 +3895,7 @@ static bool gen8_is_valid_mux_addr(struct i915_perf *perf, u32 addr) REG_IN_RANGE(addr, RPM_CONFIG0, NOA_CONFIG(8)); } -static bool gen10_is_valid_mux_addr(struct i915_perf *perf, u32 addr) +static bool gen11_is_valid_mux_addr(struct i915_perf *perf, u32 addr) { return gen8_is_valid_mux_addr(perf, addr) || REG_EQUAL(addr, GEN10_NOA_WRITE_HIGH) || @@ -4310,7 +4318,6 @@ static void oa_init_supported_formats(struct i915_perf *perf) case INTEL_GEMINILAKE: case INTEL_COFFEELAKE: case INTEL_COMETLAKE: - case INTEL_CANNONLAKE: case INTEL_ICELAKE: case INTEL_ELKHARTLAKE: case INTEL_JASPERLAKE: @@ -4395,27 +4402,23 @@ void i915_perf_init(struct drm_i915_private *i915) perf->gen8_valid_ctx_bit = BIT(16); } - } else if (IS_GRAPHICS_VER(i915, 10, 11)) { + } else if (GRAPHICS_VER(i915) == 11) { perf->ops.is_valid_b_counter_reg = gen7_is_valid_b_counter_addr; perf->ops.is_valid_mux_reg = - gen10_is_valid_mux_addr; + gen11_is_valid_mux_addr; perf->ops.is_valid_flex_reg = gen8_is_valid_flex_addr; perf->ops.oa_enable = gen8_oa_enable; perf->ops.oa_disable = gen8_oa_disable; perf->ops.enable_metric_set = gen8_enable_metric_set; - perf->ops.disable_metric_set = gen10_disable_metric_set; + perf->ops.disable_metric_set = gen11_disable_metric_set; perf->ops.oa_hw_tail_read = gen8_oa_hw_tail_read; - if (GRAPHICS_VER(i915) == 10) { - perf->ctx_oactxctrl_offset = 0x128; - perf->ctx_flexeu0_offset = 0x3de; - } else { - perf->ctx_oactxctrl_offset = 0x124; - perf->ctx_flexeu0_offset = 0x78e; - } + perf->ctx_oactxctrl_offset = 0x124; + perf->ctx_flexeu0_offset = 0x78e; + perf->gen8_valid_ctx_bit = BIT(16); } else if (GRAPHICS_VER(i915) == 12) { perf->ops.is_valid_b_counter_reg = @@ -4483,9 +4486,10 @@ static int destroy_config(int id, void *p, void *data) return 0; } -void i915_perf_sysctl_register(void) +int i915_perf_sysctl_register(void) { sysctl_header = register_sysctl_table(dev_root); + return 0; } void i915_perf_sysctl_unregister(void) diff --git a/drivers/gpu/drm/i915/i915_perf.h b/drivers/gpu/drm/i915/i915_perf.h index 882fdd0a7680..1d1329e5af3a 100644 --- a/drivers/gpu/drm/i915/i915_perf.h +++ b/drivers/gpu/drm/i915/i915_perf.h @@ -23,7 +23,7 @@ void i915_perf_fini(struct drm_i915_private *i915); void i915_perf_register(struct drm_i915_private *i915); void i915_perf_unregister(struct drm_i915_private *i915); int i915_perf_ioctl_version(void); -void i915_perf_sysctl_register(void); +int i915_perf_sysctl_register(void); void i915_perf_sysctl_unregister(void); int i915_perf_open_ioctl(struct drm_device *dev, void *data, diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c index 34d37d46a126..0b488d49694c 100644 --- a/drivers/gpu/drm/i915/i915_pmu.c +++ b/drivers/gpu/drm/i915/i915_pmu.c @@ -407,7 +407,7 @@ frequency_sample(struct intel_gt *gt, unsigned int period_ns) if (pmu->enable & config_mask(I915_PMU_REQUESTED_FREQUENCY)) { add_sample_mult(&pmu->sample[__I915_SAMPLE_FREQ_REQ], - intel_gpu_freq(rps, rps->cur_freq), + intel_rps_get_requested_frequency(rps), period_ns / 1000); } @@ -1088,7 +1088,7 @@ static int i915_pmu_cpu_offline(unsigned int cpu, struct hlist_node *node) static enum cpuhp_state cpuhp_slot = CPUHP_INVALID; -void i915_pmu_init(void) +int i915_pmu_init(void) { int ret; @@ -1101,6 +1101,8 @@ void i915_pmu_init(void) ret); else cpuhp_slot = ret; + + return 0; } void i915_pmu_exit(void) diff --git a/drivers/gpu/drm/i915/i915_pmu.h b/drivers/gpu/drm/i915/i915_pmu.h index 60f9595f902c..449057648f39 100644 --- a/drivers/gpu/drm/i915/i915_pmu.h +++ b/drivers/gpu/drm/i915/i915_pmu.h @@ -147,14 +147,14 @@ struct i915_pmu { }; #ifdef CONFIG_PERF_EVENTS -void i915_pmu_init(void); +int i915_pmu_init(void); void i915_pmu_exit(void); void i915_pmu_register(struct drm_i915_private *i915); void i915_pmu_unregister(struct drm_i915_private *i915); void i915_pmu_gt_parked(struct drm_i915_private *i915); void i915_pmu_gt_unparked(struct drm_i915_private *i915); #else -static inline void i915_pmu_init(void) {} +static inline int i915_pmu_init(void) { return 0; } static inline void i915_pmu_exit(void) {} static inline void i915_pmu_register(struct drm_i915_private *i915) {} static inline void i915_pmu_unregister(struct drm_i915_private *i915) {} diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 476bb3b9ad11..664970f2bc62 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -395,10 +395,18 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define GEN11_GRDOM_MEDIA2 (1 << 6) #define GEN11_GRDOM_MEDIA3 (1 << 7) #define GEN11_GRDOM_MEDIA4 (1 << 8) +#define GEN11_GRDOM_MEDIA5 (1 << 9) +#define GEN11_GRDOM_MEDIA6 (1 << 10) +#define GEN11_GRDOM_MEDIA7 (1 << 11) +#define GEN11_GRDOM_MEDIA8 (1 << 12) #define GEN11_GRDOM_VECS (1 << 13) #define GEN11_GRDOM_VECS2 (1 << 14) +#define GEN11_GRDOM_VECS3 (1 << 15) +#define GEN11_GRDOM_VECS4 (1 << 16) #define GEN11_GRDOM_SFC0 (1 << 17) #define GEN11_GRDOM_SFC1 (1 << 18) +#define GEN11_GRDOM_SFC2 (1 << 19) +#define GEN11_GRDOM_SFC3 (1 << 20) #define GEN11_VCS_SFC_RESET_BIT(instance) (GEN11_GRDOM_SFC0 << ((instance) >> 1)) #define GEN11_VECS_SFC_RESET_BIT(instance) (GEN11_GRDOM_SFC0 << (instance)) @@ -1877,7 +1885,7 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define BXT_PORT_CL1CM_DW30(phy) _BXT_PHY((phy), _PORT_CL1CM_DW30_BC) /* - * CNL/ICL Port/COMBO-PHY Registers + * ICL Port/COMBO-PHY Registers */ #define _ICL_COMBOPHY_A 0x162000 #define _ICL_COMBOPHY_B 0x6C000 @@ -1891,11 +1899,10 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) _RKL_COMBOPHY_D, \ _ADL_COMBOPHY_E) -/* CNL/ICL Port CL_DW registers */ +/* ICL Port CL_DW registers */ #define _ICL_PORT_CL_DW(dw, phy) (_ICL_COMBOPHY(phy) + \ 4 * (dw)) -#define CNL_PORT_CL1CM_DW5 _MMIO(0x162014) #define ICL_PORT_CL_DW5(phy) _MMIO(_ICL_PORT_CL_DW(5, phy)) #define CL_POWER_DOWN_ENABLE (1 << 4) #define SUS_CLOCK_CONFIG (3 << 0) @@ -1920,19 +1927,16 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define ICL_PORT_CL_DW12(phy) _MMIO(_ICL_PORT_CL_DW(12, phy)) #define ICL_LANE_ENABLE_AUX (1 << 0) -/* CNL/ICL Port COMP_DW registers */ +/* ICL Port COMP_DW registers */ #define _ICL_PORT_COMP 0x100 #define _ICL_PORT_COMP_DW(dw, phy) (_ICL_COMBOPHY(phy) + \ _ICL_PORT_COMP + 4 * (dw)) -#define CNL_PORT_COMP_DW0 _MMIO(0x162100) #define ICL_PORT_COMP_DW0(phy) _MMIO(_ICL_PORT_COMP_DW(0, phy)) #define COMP_INIT (1 << 31) -#define CNL_PORT_COMP_DW1 _MMIO(0x162104) #define ICL_PORT_COMP_DW1(phy) _MMIO(_ICL_PORT_COMP_DW(1, phy)) -#define CNL_PORT_COMP_DW3 _MMIO(0x16210c) #define ICL_PORT_COMP_DW3(phy) _MMIO(_ICL_PORT_COMP_DW(3, phy)) #define PROCESS_INFO_DOT_0 (0 << 26) #define PROCESS_INFO_DOT_1 (1 << 26) @@ -1948,38 +1952,11 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define ICL_PORT_COMP_DW8(phy) _MMIO(_ICL_PORT_COMP_DW(8, phy)) #define IREFGEN (1 << 24) -#define CNL_PORT_COMP_DW9 _MMIO(0x162124) #define ICL_PORT_COMP_DW9(phy) _MMIO(_ICL_PORT_COMP_DW(9, phy)) -#define CNL_PORT_COMP_DW10 _MMIO(0x162128) #define ICL_PORT_COMP_DW10(phy) _MMIO(_ICL_PORT_COMP_DW(10, phy)) -/* CNL/ICL Port PCS registers */ -#define _CNL_PORT_PCS_DW1_GRP_AE 0x162304 -#define _CNL_PORT_PCS_DW1_GRP_B 0x162384 -#define _CNL_PORT_PCS_DW1_GRP_C 0x162B04 -#define _CNL_PORT_PCS_DW1_GRP_D 0x162B84 -#define _CNL_PORT_PCS_DW1_GRP_F 0x162A04 -#define _CNL_PORT_PCS_DW1_LN0_AE 0x162404 -#define _CNL_PORT_PCS_DW1_LN0_B 0x162604 -#define _CNL_PORT_PCS_DW1_LN0_C 0x162C04 -#define _CNL_PORT_PCS_DW1_LN0_D 0x162E04 -#define _CNL_PORT_PCS_DW1_LN0_F 0x162804 -#define CNL_PORT_PCS_DW1_GRP(phy) _MMIO(_PICK(phy, \ - _CNL_PORT_PCS_DW1_GRP_AE, \ - _CNL_PORT_PCS_DW1_GRP_B, \ - _CNL_PORT_PCS_DW1_GRP_C, \ - _CNL_PORT_PCS_DW1_GRP_D, \ - _CNL_PORT_PCS_DW1_GRP_AE, \ - _CNL_PORT_PCS_DW1_GRP_F)) -#define CNL_PORT_PCS_DW1_LN0(phy) _MMIO(_PICK(phy, \ - _CNL_PORT_PCS_DW1_LN0_AE, \ - _CNL_PORT_PCS_DW1_LN0_B, \ - _CNL_PORT_PCS_DW1_LN0_C, \ - _CNL_PORT_PCS_DW1_LN0_D, \ - _CNL_PORT_PCS_DW1_LN0_AE, \ - _CNL_PORT_PCS_DW1_LN0_F)) - +/* ICL Port PCS registers */ #define _ICL_PORT_PCS_AUX 0x300 #define _ICL_PORT_PCS_GRP 0x600 #define _ICL_PORT_PCS_LN(ln) (0x800 + (ln) * 0x100) @@ -1998,34 +1975,7 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define LATENCY_OPTIM_MASK (0x3 << 2) #define LATENCY_OPTIM_VAL(x) ((x) << 2) -/* CNL/ICL Port TX registers */ -#define _CNL_PORT_TX_AE_GRP_OFFSET 0x162340 -#define _CNL_PORT_TX_B_GRP_OFFSET 0x1623C0 -#define _CNL_PORT_TX_C_GRP_OFFSET 0x162B40 -#define _CNL_PORT_TX_D_GRP_OFFSET 0x162BC0 -#define _CNL_PORT_TX_F_GRP_OFFSET 0x162A40 -#define _CNL_PORT_TX_AE_LN0_OFFSET 0x162440 -#define _CNL_PORT_TX_B_LN0_OFFSET 0x162640 -#define _CNL_PORT_TX_C_LN0_OFFSET 0x162C40 -#define _CNL_PORT_TX_D_LN0_OFFSET 0x162E40 -#define _CNL_PORT_TX_F_LN0_OFFSET 0x162840 -#define _CNL_PORT_TX_DW_GRP(dw, port) (_PICK((port), \ - _CNL_PORT_TX_AE_GRP_OFFSET, \ - _CNL_PORT_TX_B_GRP_OFFSET, \ - _CNL_PORT_TX_B_GRP_OFFSET, \ - _CNL_PORT_TX_D_GRP_OFFSET, \ - _CNL_PORT_TX_AE_GRP_OFFSET, \ - _CNL_PORT_TX_F_GRP_OFFSET) + \ - 4 * (dw)) -#define _CNL_PORT_TX_DW_LN0(dw, port) (_PICK((port), \ - _CNL_PORT_TX_AE_LN0_OFFSET, \ - _CNL_PORT_TX_B_LN0_OFFSET, \ - _CNL_PORT_TX_B_LN0_OFFSET, \ - _CNL_PORT_TX_D_LN0_OFFSET, \ - _CNL_PORT_TX_AE_LN0_OFFSET, \ - _CNL_PORT_TX_F_LN0_OFFSET) + \ - 4 * (dw)) - +/* ICL Port TX registers */ #define _ICL_PORT_TX_AUX 0x380 #define _ICL_PORT_TX_GRP 0x680 #define _ICL_PORT_TX_LN(ln) (0x880 + (ln) * 0x100) @@ -2037,8 +1987,6 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define _ICL_PORT_TX_DW_LN(dw, ln, phy) (_ICL_COMBOPHY(phy) + \ _ICL_PORT_TX_LN(ln) + 4 * (dw)) -#define CNL_PORT_TX_DW2_GRP(port) _MMIO(_CNL_PORT_TX_DW_GRP(2, port)) -#define CNL_PORT_TX_DW2_LN0(port) _MMIO(_CNL_PORT_TX_DW_LN0(2, port)) #define ICL_PORT_TX_DW2_AUX(phy) _MMIO(_ICL_PORT_TX_DW_AUX(2, phy)) #define ICL_PORT_TX_DW2_GRP(phy) _MMIO(_ICL_PORT_TX_DW_GRP(2, phy)) #define ICL_PORT_TX_DW2_LN0(phy) _MMIO(_ICL_PORT_TX_DW_LN(2, 0, phy)) @@ -2051,13 +1999,6 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define RCOMP_SCALAR(x) ((x) << 0) #define RCOMP_SCALAR_MASK (0xFF << 0) -#define _CNL_PORT_TX_DW4_LN0_AE 0x162450 -#define _CNL_PORT_TX_DW4_LN1_AE 0x1624D0 -#define CNL_PORT_TX_DW4_GRP(port) _MMIO(_CNL_PORT_TX_DW_GRP(4, (port))) -#define CNL_PORT_TX_DW4_LN0(port) _MMIO(_CNL_PORT_TX_DW_LN0(4, (port))) -#define CNL_PORT_TX_DW4_LN(ln, port) _MMIO(_CNL_PORT_TX_DW_LN0(4, (port)) + \ - ((ln) * (_CNL_PORT_TX_DW4_LN1_AE - \ - _CNL_PORT_TX_DW4_LN0_AE))) #define ICL_PORT_TX_DW4_AUX(phy) _MMIO(_ICL_PORT_TX_DW_AUX(4, phy)) #define ICL_PORT_TX_DW4_GRP(phy) _MMIO(_ICL_PORT_TX_DW_GRP(4, phy)) #define ICL_PORT_TX_DW4_LN0(phy) _MMIO(_ICL_PORT_TX_DW_LN(4, 0, phy)) @@ -2070,8 +2011,6 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define CURSOR_COEFF(x) ((x) << 0) #define CURSOR_COEFF_MASK (0x3F << 0) -#define CNL_PORT_TX_DW5_GRP(port) _MMIO(_CNL_PORT_TX_DW_GRP(5, port)) -#define CNL_PORT_TX_DW5_LN0(port) _MMIO(_CNL_PORT_TX_DW_LN0(5, port)) #define ICL_PORT_TX_DW5_AUX(phy) _MMIO(_ICL_PORT_TX_DW_AUX(5, phy)) #define ICL_PORT_TX_DW5_GRP(phy) _MMIO(_ICL_PORT_TX_DW_GRP(5, phy)) #define ICL_PORT_TX_DW5_LN0(phy) _MMIO(_ICL_PORT_TX_DW_LN(5, 0, phy)) @@ -2083,8 +2022,6 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define RTERM_SELECT(x) ((x) << 3) #define RTERM_SELECT_MASK (0x7 << 3) -#define CNL_PORT_TX_DW7_GRP(port) _MMIO(_CNL_PORT_TX_DW_GRP(7, (port))) -#define CNL_PORT_TX_DW7_LN0(port) _MMIO(_CNL_PORT_TX_DW_LN0(7, (port))) #define ICL_PORT_TX_DW7_AUX(phy) _MMIO(_ICL_PORT_TX_DW_AUX(7, phy)) #define ICL_PORT_TX_DW7_GRP(phy) _MMIO(_ICL_PORT_TX_DW_GRP(7, phy)) #define ICL_PORT_TX_DW7_LN0(phy) _MMIO(_ICL_PORT_TX_DW_LN(7, 0, phy)) @@ -2278,6 +2215,68 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define MG_DP_MODE_CFG_DP_X2_MODE (1 << 7) #define MG_DP_MODE_CFG_DP_X1_MODE (1 << 6) +/* + * DG2 SNPS PHY registers (TC1 = PHY_E) + */ +#define _SNPS_PHY_A_BASE 0x168000 +#define _SNPS_PHY_B_BASE 0x169000 +#define _SNPS_PHY(phy) _PHY(phy, \ + _SNPS_PHY_A_BASE, \ + _SNPS_PHY_B_BASE) +#define _SNPS2(phy, reg) (_SNPS_PHY(phy) - \ + _SNPS_PHY_A_BASE + (reg)) +#define _MMIO_SNPS(phy, reg) _MMIO(_SNPS2(phy, reg)) +#define _MMIO_SNPS_LN(ln, phy, reg) _MMIO(_SNPS2(phy, \ + (reg) + (ln) * 0x10)) + +#define SNPS_PHY_MPLLB_CP(phy) _MMIO_SNPS(phy, 0x168000) +#define SNPS_PHY_MPLLB_CP_INT REG_GENMASK(31, 25) +#define SNPS_PHY_MPLLB_CP_INT_GS REG_GENMASK(23, 17) +#define SNPS_PHY_MPLLB_CP_PROP REG_GENMASK(15, 9) +#define SNPS_PHY_MPLLB_CP_PROP_GS REG_GENMASK(7, 1) + +#define SNPS_PHY_MPLLB_DIV(phy) _MMIO_SNPS(phy, 0x168004) +#define SNPS_PHY_MPLLB_FORCE_EN REG_BIT(31) +#define SNPS_PHY_MPLLB_DIV5_CLK_EN REG_BIT(29) +#define SNPS_PHY_MPLLB_V2I REG_GENMASK(27, 26) +#define SNPS_PHY_MPLLB_FREQ_VCO REG_GENMASK(25, 24) +#define SNPS_PHY_MPLLB_PMIX_EN REG_BIT(10) +#define SNPS_PHY_MPLLB_TX_CLK_DIV REG_GENMASK(7, 5) + +#define SNPS_PHY_MPLLB_FRACN1(phy) _MMIO_SNPS(phy, 0x168008) +#define SNPS_PHY_MPLLB_FRACN_EN REG_BIT(31) +#define SNPS_PHY_MPLLB_FRACN_CGG_UPDATE_EN REG_BIT(30) +#define SNPS_PHY_MPLLB_FRACN_DEN REG_GENMASK(15, 0) + +#define SNPS_PHY_MPLLB_FRACN2(phy) _MMIO_SNPS(phy, 0x16800C) +#define SNPS_PHY_MPLLB_FRACN_REM REG_GENMASK(31, 16) +#define SNPS_PHY_MPLLB_FRACN_QUOT REG_GENMASK(15, 0) + +#define SNPS_PHY_MPLLB_SSCEN(phy) _MMIO_SNPS(phy, 0x168014) +#define SNPS_PHY_MPLLB_SSC_EN REG_BIT(31) +#define SNPS_PHY_MPLLB_SSC_UP_SPREAD REG_BIT(30) +#define SNPS_PHY_MPLLB_SSC_PEAK REG_GENMASK(29, 10) + +#define SNPS_PHY_MPLLB_SSCSTEP(phy) _MMIO_SNPS(phy, 0x168018) +#define SNPS_PHY_MPLLB_SSC_STEPSIZE REG_GENMASK(31, 11) + +#define SNPS_PHY_MPLLB_DIV2(phy) _MMIO_SNPS(phy, 0x16801C) +#define SNPS_PHY_MPLLB_HDMI_PIXEL_CLK_DIV REG_GENMASK(19, 18) +#define SNPS_PHY_MPLLB_HDMI_DIV REG_GENMASK(17, 15) +#define SNPS_PHY_MPLLB_REF_CLK_DIV REG_GENMASK(14, 12) +#define SNPS_PHY_MPLLB_MULTIPLIER REG_GENMASK(11, 0) + +#define SNPS_PHY_REF_CONTROL(phy) _MMIO_SNPS(phy, 0x168188) +#define SNPS_PHY_REF_CONTROL_REF_RANGE REG_GENMASK(31, 27) + +#define SNPS_PHY_TX_REQ(phy) _MMIO_SNPS(phy, 0x168200) +#define SNPS_PHY_TX_REQ_LN_DIS_PWR_STATE_PSR REG_GENMASK(31, 30) + +#define SNPS_PHY_TX_EQ(ln, phy) _MMIO_SNPS_LN(ln, phy, 0x168300) +#define SNPS_PHY_TX_EQ_MAIN REG_GENMASK(23, 18) +#define SNPS_PHY_TX_EQ_POST REG_GENMASK(15, 10) +#define SNPS_PHY_TX_EQ_PRE REG_GENMASK(7, 2) + /* The spec defines this only for BXT PHY0, but lets assume that this * would exist for PHY1 too if it had a second channel. */ @@ -2516,9 +2515,15 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define GEN11_BSD2_RING_BASE 0x1c4000 #define GEN11_BSD3_RING_BASE 0x1d0000 #define GEN11_BSD4_RING_BASE 0x1d4000 +#define XEHP_BSD5_RING_BASE 0x1e0000 +#define XEHP_BSD6_RING_BASE 0x1e4000 +#define XEHP_BSD7_RING_BASE 0x1f0000 +#define XEHP_BSD8_RING_BASE 0x1f4000 #define VEBOX_RING_BASE 0x1a000 #define GEN11_VEBOX_RING_BASE 0x1c8000 #define GEN11_VEBOX2_RING_BASE 0x1d8000 +#define XEHP_VEBOX3_RING_BASE 0x1e8000 +#define XEHP_VEBOX4_RING_BASE 0x1f8000 #define BLT_RING_BASE 0x22000 #define RING_TAIL(base) _MMIO((base) + 0x30) #define RING_HEAD(base) _MMIO((base) + 0x34) @@ -2572,7 +2577,16 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define ARB_MODE_BWGTLB_DISABLE (1 << 9) #define ARB_MODE_SWIZZLE_BDW (1 << 1) #define RENDER_HWS_PGA_GEN7 _MMIO(0x04080) -#define RING_FAULT_REG(engine) _MMIO(0x4094 + 0x100 * (engine)->hw_id) + +#define _RING_FAULT_REG_RCS 0x4094 +#define _RING_FAULT_REG_VCS 0x4194 +#define _RING_FAULT_REG_BCS 0x4294 +#define _RING_FAULT_REG_VECS 0x4394 +#define RING_FAULT_REG(engine) _MMIO(_PICK((engine)->class, \ + _RING_FAULT_REG_RCS, \ + _RING_FAULT_REG_VCS, \ + _RING_FAULT_REG_VECS, \ + _RING_FAULT_REG_BCS)) #define GEN8_RING_FAULT_REG _MMIO(0x4094) #define GEN12_RING_FAULT_REG _MMIO(0xcec4) #define GEN8_RING_FAULT_ENGINE_ID(x) (((x) >> 12) & 0x7) @@ -2672,6 +2686,8 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define GEN12_SC_INSTDONE_EXTRA2 _MMIO(0x7108) #define GEN7_SAMPLER_INSTDONE _MMIO(0xe160) #define GEN7_ROW_INSTDONE _MMIO(0xe164) +#define MCFG_MCR_SELECTOR _MMIO(0xfd0) +#define SF_MCR_SELECTOR _MMIO(0xfd8) #define GEN8_MCR_SELECTOR _MMIO(0xfdc) #define GEN8_MCR_SLICE(slice) (((slice) & 3) << 26) #define GEN8_MCR_SLICE_MASK GEN8_MCR_SLICE(3) @@ -3099,6 +3115,9 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define GEN10_MIRROR_FUSE3 _MMIO(0x9118) #define GEN10_L3BANK_PAIR_COUNT 4 #define GEN10_L3BANK_MASK 0x0F +/* on Xe_HP the same fuses indicates mslices instead of L3 banks */ +#define GEN12_MAX_MSLICES 4 +#define GEN12_MEML3_EN_MASK 0x0F #define GEN8_EU_DISABLE0 _MMIO(0x9134) #define GEN8_EU_DIS0_S0_MASK 0xffffff @@ -3133,6 +3152,9 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define GEN12_GT_DSS_ENABLE _MMIO(0x913C) +#define XEHP_EU_ENABLE _MMIO(0x9134) +#define XEHP_EU_ENA_MASK 0xFF + #define GEN6_BSD_SLEEP_PSMI_CONTROL _MMIO(0x12050) #define GEN6_BSD_SLEEP_MSG_DISABLE (1 << 0) #define GEN6_BSD_SLEEP_FLUSH_DISABLE (1 << 2) @@ -4086,6 +4108,9 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define BXT_GT_PERF_STATUS _MMIO(MCHBAR_MIRROR_BASE_SNB + 0x7070) #define GEN6_RP_STATE_LIMITS _MMIO(MCHBAR_MIRROR_BASE_SNB + 0x5994) #define GEN6_RP_STATE_CAP _MMIO(MCHBAR_MIRROR_BASE_SNB + 0x5998) +#define RP0_CAP_MASK REG_GENMASK(7, 0) +#define RP1_CAP_MASK REG_GENMASK(15, 8) +#define RPN_CAP_MASK REG_GENMASK(23, 16) #define BXT_RP_STATE_CAP _MMIO(0x138170) #define GEN9_RP_STATE_LIMITS _MMIO(0x138148) @@ -4142,6 +4167,7 @@ enum { FAULT_AND_CONTINUE /* Unsupported */ }; +#define CTX_GTT_ADDRESS_MASK GENMASK(31, 12) #define GEN8_CTX_VALID (1 << 0) #define GEN8_CTX_FORCE_PD_RESTORE (1 << 1) #define GEN8_CTX_FORCE_RESTORE (1 << 2) @@ -4158,6 +4184,11 @@ enum { #define GEN11_ENGINE_INSTANCE_SHIFT 48 #define GEN11_ENGINE_INSTANCE_WIDTH 6 +#define XEHP_SW_CTX_ID_SHIFT 39 +#define XEHP_SW_CTX_ID_WIDTH 16 +#define XEHP_SW_COUNTER_SHIFT 58 +#define XEHP_SW_COUNTER_WIDTH 6 + #define CHV_CLK_CTL1 _MMIO(0x101100) #define VLV_CLK_CTL2 _MMIO(0x101104) #define CLK_CTL2_CZCOUNT_30NS_SHIFT 28 @@ -4586,23 +4617,26 @@ enum { #define _PSR2_CTL_EDP 0x6f900 #define EDP_PSR2_CTL(tran) _MMIO_TRANS2(tran, _PSR2_CTL_A) #define EDP_PSR2_ENABLE (1 << 31) -#define EDP_SU_TRACK_ENABLE (1 << 30) +#define EDP_SU_TRACK_ENABLE (1 << 30) /* up to adl-p */ #define TGL_EDP_PSR2_BLOCK_COUNT_NUM_2 (0 << 28) #define TGL_EDP_PSR2_BLOCK_COUNT_NUM_3 (1 << 28) #define EDP_Y_COORDINATE_ENABLE REG_BIT(25) /* display 10, 11 and 12 */ +#define EDP_PSR2_SU_SDP_SCANLINE REG_BIT(25) /* display 13+ */ #define EDP_MAX_SU_DISABLE_TIME(t) ((t) << 20) #define EDP_MAX_SU_DISABLE_TIME_MASK (0x1f << 20) #define EDP_PSR2_IO_BUFFER_WAKE_MAX_LINES 8 #define EDP_PSR2_IO_BUFFER_WAKE(lines) ((EDP_PSR2_IO_BUFFER_WAKE_MAX_LINES - (lines)) << 13) #define EDP_PSR2_IO_BUFFER_WAKE_MASK (3 << 13) #define TGL_EDP_PSR2_IO_BUFFER_WAKE_MIN_LINES 5 -#define TGL_EDP_PSR2_IO_BUFFER_WAKE(lines) (((lines) - TGL_EDP_PSR2_IO_BUFFER_WAKE_MIN_LINES) << 13) +#define TGL_EDP_PSR2_IO_BUFFER_WAKE_SHIFT 13 +#define TGL_EDP_PSR2_IO_BUFFER_WAKE(lines) (((lines) - TGL_EDP_PSR2_IO_BUFFER_WAKE_MIN_LINES) << TGL_EDP_PSR2_IO_BUFFER_WAKE_SHIFT) #define TGL_EDP_PSR2_IO_BUFFER_WAKE_MASK (7 << 13) #define EDP_PSR2_FAST_WAKE_MAX_LINES 8 #define EDP_PSR2_FAST_WAKE(lines) ((EDP_PSR2_FAST_WAKE_MAX_LINES - (lines)) << 11) #define EDP_PSR2_FAST_WAKE_MASK (3 << 11) #define TGL_EDP_PSR2_FAST_WAKE_MIN_LINES 5 -#define TGL_EDP_PSR2_FAST_WAKE(lines) (((lines) - TGL_EDP_PSR2_FAST_WAKE_MIN_LINES) << 10) +#define TGL_EDP_PSR2_FAST_WAKE_MIN_SHIFT 10 +#define TGL_EDP_PSR2_FAST_WAKE(lines) (((lines) - TGL_EDP_PSR2_FAST_WAKE_MIN_LINES) << TGL_EDP_PSR2_FAST_WAKE_MIN_SHIFT) #define TGL_EDP_PSR2_FAST_WAKE_MASK (7 << 10) #define EDP_PSR2_TP2_TIME_500us (0 << 8) #define EDP_PSR2_TP2_TIME_100us (1 << 8) @@ -4652,17 +4686,23 @@ enum { #define PSR2_SU_STATUS_MASK(frame) (0x3ff << PSR2_SU_STATUS_SHIFT(frame)) #define PSR2_SU_STATUS_FRAMES 8 -#define _PSR2_MAN_TRK_CTL_A 0x60910 -#define _PSR2_MAN_TRK_CTL_EDP 0x6f910 -#define PSR2_MAN_TRK_CTL(tran) _MMIO_TRANS2(tran, _PSR2_MAN_TRK_CTL_A) -#define PSR2_MAN_TRK_CTL_ENABLE REG_BIT(31) -#define PSR2_MAN_TRK_CTL_SU_REGION_START_ADDR_MASK REG_GENMASK(30, 21) -#define PSR2_MAN_TRK_CTL_SU_REGION_START_ADDR(val) REG_FIELD_PREP(PSR2_MAN_TRK_CTL_SU_REGION_START_ADDR_MASK, val) +#define _PSR2_MAN_TRK_CTL_A 0x60910 +#define _PSR2_MAN_TRK_CTL_EDP 0x6f910 +#define PSR2_MAN_TRK_CTL(tran) _MMIO_TRANS2(tran, _PSR2_MAN_TRK_CTL_A) +#define PSR2_MAN_TRK_CTL_ENABLE REG_BIT(31) +#define PSR2_MAN_TRK_CTL_SU_REGION_START_ADDR_MASK REG_GENMASK(30, 21) +#define PSR2_MAN_TRK_CTL_SU_REGION_START_ADDR(val) REG_FIELD_PREP(PSR2_MAN_TRK_CTL_SU_REGION_START_ADDR_MASK, val) #define PSR2_MAN_TRK_CTL_SU_REGION_END_ADDR_MASK REG_GENMASK(20, 11) #define PSR2_MAN_TRK_CTL_SU_REGION_END_ADDR(val) REG_FIELD_PREP(PSR2_MAN_TRK_CTL_SU_REGION_END_ADDR_MASK, val) -#define PSR2_MAN_TRK_CTL_SF_SINGLE_FULL_FRAME REG_BIT(3) -#define PSR2_MAN_TRK_CTL_SF_CONTINUOS_FULL_FRAME REG_BIT(2) -#define PSR2_MAN_TRK_CTL_SF_PARTIAL_FRAME_UPDATE REG_BIT(1) +#define PSR2_MAN_TRK_CTL_SF_SINGLE_FULL_FRAME REG_BIT(3) +#define PSR2_MAN_TRK_CTL_SF_CONTINUOS_FULL_FRAME REG_BIT(2) +#define PSR2_MAN_TRK_CTL_SF_PARTIAL_FRAME_UPDATE REG_BIT(1) +#define ADLP_PSR2_MAN_TRK_CTL_SU_REGION_START_ADDR_MASK REG_GENMASK(28, 16) +#define ADLP_PSR2_MAN_TRK_CTL_SU_REGION_START_ADDR(val) REG_FIELD_PREP(ADLP_PSR2_MAN_TRK_CTL_SU_REGION_START_ADDR_MASK, val) +#define ADLP_PSR2_MAN_TRK_CTL_SU_REGION_END_ADDR_MASK REG_GENMASK(12, 0) +#define ADLP_PSR2_MAN_TRK_CTL_SU_REGION_END_ADDR(val) REG_FIELD_PREP(ADLP_PSR2_MAN_TRK_CTL_SU_REGION_END_ADDR_MASK, val) +#define ADLP_PSR2_MAN_TRK_CTL_SF_SINGLE_FULL_FRAME REG_BIT(14) +#define ADLP_PSR2_MAN_TRK_CTL_SF_CONTINUOS_FULL_FRAME REG_BIT(13) /* Icelake DSC Rate Control Range Parameter Registers */ #define DSCA_RC_RANGE_PARAMETERS_0 _MMIO(0x6B240) @@ -7717,11 +7757,11 @@ enum { #define SKL_PS_ECC_STAT(pipe, id) _MMIO_PIPE(pipe, \ _ID(id, _PS_ECC_STAT_1A, _PS_ECC_STAT_2A), \ _ID(id, _PS_ECC_STAT_1B, _PS_ECC_STAT_2B)) -#define CNL_PS_COEF_INDEX_SET(pipe, id, set) _MMIO_PIPE(pipe, \ +#define GLK_PS_COEF_INDEX_SET(pipe, id, set) _MMIO_PIPE(pipe, \ _ID(id, _PS_COEF_SET0_INDEX_1A, _PS_COEF_SET0_INDEX_2A) + (set) * 8, \ _ID(id, _PS_COEF_SET0_INDEX_1B, _PS_COEF_SET0_INDEX_2B) + (set) * 8) -#define CNL_PS_COEF_DATA_SET(pipe, id, set) _MMIO_PIPE(pipe, \ +#define GLK_PS_COEF_DATA_SET(pipe, id, set) _MMIO_PIPE(pipe, \ _ID(id, _PS_COEF_SET0_DATA_1A, _PS_COEF_SET0_DATA_2A) + (set) * 8, \ _ID(id, _PS_COEF_SET0_DATA_1B, _PS_COEF_SET0_DATA_2B) + (set) * 8) /* legacy palette */ @@ -7757,7 +7797,7 @@ enum { #define GAMMA_MODE_MODE_12BIT_MULTI_SEGMENTED (3 << 0) /* icl + */ /* DMC */ -#define DMC_PROGRAM(i) _MMIO(0x80000 + (i) * 4) +#define DMC_PROGRAM(addr, i) _MMIO((addr) + (i) * 4) #define DMC_SSP_BASE_ADDR_GEN9 0x00002FC0 #define DMC_HTP_ADDR_SKL 0x00500034 #define DMC_SSP_BASE _MMIO(0x8F074) @@ -7936,7 +7976,7 @@ enum { #define DSI1_NON_TE (1 << 31) #define DSI0_NON_TE (1 << 30) #define ICL_AUX_CHANNEL_E (1 << 29) -#define CNL_AUX_CHANNEL_F (1 << 28) +#define ICL_AUX_CHANNEL_F (1 << 28) #define GEN9_AUX_CHANNEL_D (1 << 27) #define GEN9_AUX_CHANNEL_C (1 << 26) #define GEN9_AUX_CHANNEL_B (1 << 25) @@ -7988,9 +8028,9 @@ enum { #define GEN11_GT_DW1_IRQ (1 << 1) #define GEN11_GT_DW0_IRQ (1 << 0) -#define DG1_MSTR_UNIT_INTR _MMIO(0x190008) +#define DG1_MSTR_TILE_INTR _MMIO(0x190008) #define DG1_MSTR_IRQ REG_BIT(31) -#define DG1_MSTR_UNIT(u) REG_BIT(u) +#define DG1_MSTR_TILE(t) REG_BIT(t) #define GEN11_DISPLAY_INT_CTL _MMIO(0x44200) #define GEN11_DISPLAY_IRQ_ENABLE (1 << 31) @@ -8073,7 +8113,10 @@ enum { #define GEN11_BCS_RSVD_INTR_MASK _MMIO(0x1900a0) #define GEN11_VCS0_VCS1_INTR_MASK _MMIO(0x1900a8) #define GEN11_VCS2_VCS3_INTR_MASK _MMIO(0x1900ac) +#define GEN12_VCS4_VCS5_INTR_MASK _MMIO(0x1900b0) +#define GEN12_VCS6_VCS7_INTR_MASK _MMIO(0x1900b4) #define GEN11_VECS0_VECS1_INTR_MASK _MMIO(0x1900d0) +#define GEN12_VECS2_VECS3_INTR_MASK _MMIO(0x1900d4) #define GEN11_GUC_SG_INTR_MASK _MMIO(0x1900e8) #define GEN11_GPM_WGBOXPERF_INTR_MASK _MMIO(0x1900ec) #define GEN11_CRYPTO_RSVD_INTR_MASK _MMIO(0x1900f0) @@ -8113,6 +8156,7 @@ enum { # define CHICKEN3_DGMG_DONE_FIX_DISABLE (1 << 2) #define CHICKEN_PAR1_1 _MMIO(0x42080) +#define IGNORE_KVMR_PIPE_A REG_BIT(23) #define KBL_ARB_FILL_SPARE_22 REG_BIT(22) #define DIS_RAM_BYPASS_PSR2_MAN_TRACK (1 << 16) #define SKL_DE_COMPRESSED_HASH_MODE (1 << 15) @@ -8125,7 +8169,6 @@ enum { #define KVM_CONFIG_CHANGE_NOTIFICATION_SELECT (1 << 14) #define CHICKEN_MISC_2 _MMIO(0x42084) -#define CNL_COMP_PWR_DOWN (1 << 23) #define KBL_ARB_FILL_SPARE_14 REG_BIT(14) #define KBL_ARB_FILL_SPARE_13 REG_BIT(13) #define GLK_CL2_PWR_DOWN (1 << 12) @@ -8163,15 +8206,16 @@ enum { [TRANSCODER_B] = _CHICKEN_TRANS_B, \ [TRANSCODER_C] = _CHICKEN_TRANS_C, \ [TRANSCODER_D] = _CHICKEN_TRANS_D)) -#define HSW_FRAME_START_DELAY_MASK (3 << 27) -#define HSW_FRAME_START_DELAY(x) ((x) << 27) /* 0-3 */ -#define VSC_DATA_SEL_SOFTWARE_CONTROL (1 << 25) /* GLK and CNL+ */ -#define DDI_TRAINING_OVERRIDE_ENABLE (1 << 19) -#define DDI_TRAINING_OVERRIDE_VALUE (1 << 18) -#define DDIE_TRAINING_OVERRIDE_ENABLE (1 << 17) /* CHICKEN_TRANS_A only */ -#define DDIE_TRAINING_OVERRIDE_VALUE (1 << 16) /* CHICKEN_TRANS_A only */ -#define PSR2_ADD_VERTICAL_LINE_COUNT (1 << 15) -#define PSR2_VSC_ENABLE_PROG_HEADER (1 << 12) +#define HSW_FRAME_START_DELAY_MASK REG_GENMASK(28, 27) +#define HSW_FRAME_START_DELAY(x) REG_FIELD_PREP(HSW_FRAME_START_DELAY_MASK, x) +#define VSC_DATA_SEL_SOFTWARE_CONTROL REG_BIT(25) /* GLK */ +#define FECSTALL_DIS_DPTSTREAM_DPTTG REG_BIT(23) +#define DDI_TRAINING_OVERRIDE_ENABLE REG_BIT(19) +#define DDI_TRAINING_OVERRIDE_VALUE REG_BIT(18) +#define DDIE_TRAINING_OVERRIDE_ENABLE REG_BIT(17) /* CHICKEN_TRANS_A only */ +#define DDIE_TRAINING_OVERRIDE_VALUE REG_BIT(16) /* CHICKEN_TRANS_A only */ +#define PSR2_ADD_VERTICAL_LINE_COUNT REG_BIT(15) +#define PSR2_VSC_ENABLE_PROG_HEADER REG_BIT(12) #define DISP_ARB_CTL _MMIO(0x45000) #define DISP_FBC_MEMORY_WAKE (1 << 31) @@ -8229,9 +8273,8 @@ enum { #define GEN8_CHICKEN_DCPR_1 _MMIO(0x46430) #define SKL_SELECT_ALTERNATE_DC_EXIT (1 << 30) -#define CNL_DELAY_PMRSP (1 << 22) +#define ICL_DELAY_PMRSP (1 << 22) #define MASK_WAKEMEM (1 << 13) -#define CNL_DDI_CLOCK_REG_ACCESS_ON (1 << 7) #define GEN11_CHICKEN_DCPR_2 _MMIO(0x46434) #define DCPR_MASK_MAXLATENCY_MEMUP_CLR REG_BIT(27) @@ -8252,10 +8295,9 @@ enum { #define SKL_DFSM_PIPE_B_DISABLE (1 << 21) #define SKL_DFSM_PIPE_C_DISABLE (1 << 28) #define TGL_DFSM_PIPE_D_DISABLE (1 << 22) -#define CNL_DFSM_DISPLAY_DSC_DISABLE (1 << 7) +#define GLK_DFSM_DISPLAY_DSC_DISABLE (1 << 7) #define SKL_DSSM _MMIO(0x51004) -#define CNL_DSSM_CDCLK_PLL_REFCLK_24MHz (1 << 31) #define ICL_DSSM_CDCLK_PLL_REFCLK_MASK (7 << 29) #define ICL_DSSM_CDCLK_PLL_REFCLK_24MHz (0 << 29) #define ICL_DSSM_CDCLK_PLL_REFCLK_19_2MHz (1 << 29) @@ -8354,7 +8396,6 @@ enum { /* GEN8 chicken */ #define HDC_CHICKEN0 _MMIO(0x7300) -#define CNL_HDC_CHICKEN0 _MMIO(0xE5F0) #define ICL_HDC_MODE _MMIO(0xE5F4) #define HDC_FORCE_CSR_NON_COHERENT_OVR_DISABLE (1 << 15) #define HDC_FENCE_DEST_SLM_DISABLE (1 << 14) @@ -8388,7 +8429,8 @@ enum { #define _PIPEC_CHICKEN 0x72038 #define PIPE_CHICKEN(pipe) _MMIO_PIPE(pipe, _PIPEA_CHICKEN,\ _PIPEB_CHICKEN) -#define UNDERRUN_RECOVERY_DISABLE REG_BIT(30) +#define UNDERRUN_RECOVERY_DISABLE_ADLP REG_BIT(30) +#define UNDERRUN_RECOVERY_ENABLE_DG2 REG_BIT(30) #define PIXEL_ROUNDING_TRUNC_FB_PASSTHRU (1 << 15) #define PER_PIXEL_ALPHA_BYPASS_EN (1 << 7) @@ -9200,6 +9242,8 @@ enum { #define GEN9_FREQUENCY(x) ((x) << 23) #define GEN6_OFFSET(x) ((x) << 19) #define GEN6_AGGRESSIVE_TURBO (0 << 15) +#define GEN9_SW_REQ_UNSLICE_RATIO_SHIFT 23 + #define GEN6_RC_VIDEO_FREQ _MMIO(0xA00C) #define GEN6_RC_CONTROL _MMIO(0xA090) #define GEN6_RC_CTL_RC6pp_ENABLE (1 << 16) @@ -9368,9 +9412,13 @@ enum { #define ICL_PCODE_MEM_SUBSYSYSTEM_INFO 0xd #define ICL_PCODE_MEM_SS_READ_GLOBAL_INFO (0x0 << 8) #define ICL_PCODE_MEM_SS_READ_QGV_POINT_INFO(point) (((point) << 16) | (0x1 << 8)) +#define ADL_PCODE_MEM_SS_READ_PSF_GV_INFO ((0) | (0x2 << 8)) #define ICL_PCODE_SAGV_DE_MEM_SS_CONFIG 0xe #define ICL_PCODE_POINTS_RESTRICTED 0x0 -#define ICL_PCODE_POINTS_RESTRICTED_MASK 0x1 +#define ICL_PCODE_POINTS_RESTRICTED_MASK 0xf +#define ADLS_PSF_PT_SHIFT 8 +#define ADLS_QGV_PT_MASK REG_GENMASK(7, 0) +#define ADLS_PSF_PT_MASK REG_GENMASK(10, 8) #define GEN6_PCODE_READ_D_COMP 0x10 #define GEN6_PCODE_WRITE_D_COMP 0x11 #define ICL_PCODE_EXIT_TCCOLD 0x12 @@ -9530,7 +9578,6 @@ enum { #define HSW_SAMPLE_C_PERFORMANCE (1 << 9) #define GEN8_CENTROID_PIXEL_OPT_DIS (1 << 8) #define GEN9_DISABLE_OCL_OOB_SUPPRESS_LOGIC (1 << 5) -#define CNL_FAST_ANISO_L1_BANKING_FIX (1 << 4) #define GEN8_SAMPLER_POWER_BYPASS_DIS (1 << 1) #define GEN9_HALF_SLICE_CHICKEN7 _MMIO(0xe194) @@ -9711,15 +9758,12 @@ enum { /* HSW/BDW power well */ #define HSW_PW_CTL_IDX_GLOBAL 15 -/* SKL/BXT/GLK/CNL power wells */ +/* SKL/BXT/GLK power wells */ #define SKL_PW_CTL_IDX_PW_2 15 #define SKL_PW_CTL_IDX_PW_1 14 -#define CNL_PW_CTL_IDX_AUX_F 12 -#define CNL_PW_CTL_IDX_AUX_D 11 #define GLK_PW_CTL_IDX_AUX_C 10 #define GLK_PW_CTL_IDX_AUX_B 9 #define GLK_PW_CTL_IDX_AUX_A 8 -#define CNL_PW_CTL_IDX_DDI_F 6 #define SKL_PW_CTL_IDX_DDI_D 4 #define SKL_PW_CTL_IDX_DDI_C 3 #define SKL_PW_CTL_IDX_DDI_B 2 @@ -9818,19 +9862,6 @@ enum skl_power_gate { ((pw_idx) - ICL_PW_CTL_IDX_PW_1 + SKL_PG1) #define SKL_FUSE_PG_DIST_STATUS(pg) (1 << (27 - (pg))) -#define _CNL_AUX_REG_IDX(pw_idx) ((pw_idx) - GLK_PW_CTL_IDX_AUX_B) -#define _CNL_AUX_ANAOVRD1_B 0x162250 -#define _CNL_AUX_ANAOVRD1_C 0x162210 -#define _CNL_AUX_ANAOVRD1_D 0x1622D0 -#define _CNL_AUX_ANAOVRD1_F 0x162A90 -#define CNL_AUX_ANAOVRD1(pw_idx) _MMIO(_PICK(_CNL_AUX_REG_IDX(pw_idx), \ - _CNL_AUX_ANAOVRD1_B, \ - _CNL_AUX_ANAOVRD1_C, \ - _CNL_AUX_ANAOVRD1_D, \ - _CNL_AUX_ANAOVRD1_F)) -#define CNL_AUX_ANAOVRD1_ENABLE (1 << 16) -#define CNL_AUX_ANAOVRD1_LDO_BYPASS (1 << 23) - #define _ICL_AUX_REG_IDX(pw_idx) ((pw_idx) - ICL_PW_CTL_IDX_AUX_A) #define _ICL_AUX_ANAOVRD1_A 0x162398 #define _ICL_AUX_ANAOVRD1_B 0x6C398 @@ -10130,11 +10161,11 @@ enum skl_power_gate { #define TRANS_DDI_BPC_10 (1 << 20) #define TRANS_DDI_BPC_6 (2 << 20) #define TRANS_DDI_BPC_12 (3 << 20) -#define TRANS_DDI_PORT_SYNC_MASTER_SELECT_MASK REG_GENMASK(19, 18) /* bdw-cnl */ +#define TRANS_DDI_PORT_SYNC_MASTER_SELECT_MASK REG_GENMASK(19, 18) #define TRANS_DDI_PORT_SYNC_MASTER_SELECT(x) REG_FIELD_PREP(TRANS_DDI_PORT_SYNC_MASTER_SELECT_MASK, (x)) #define TRANS_DDI_PVSYNC (1 << 17) #define TRANS_DDI_PHSYNC (1 << 16) -#define TRANS_DDI_PORT_SYNC_ENABLE REG_BIT(15) /* bdw-cnl */ +#define TRANS_DDI_PORT_SYNC_ENABLE REG_BIT(15) #define TRANS_DDI_EDP_INPUT_MASK (7 << 12) #define TRANS_DDI_EDP_INPUT_A_ON (0 << 12) #define TRANS_DDI_EDP_INPUT_A_ONOFF (4 << 12) @@ -10167,6 +10198,9 @@ enum skl_power_gate { #define PORT_SYNC_MODE_MASTER_SELECT_MASK REG_GENMASK(2, 0) #define PORT_SYNC_MODE_MASTER_SELECT(x) REG_FIELD_PREP(PORT_SYNC_MODE_MASTER_SELECT_MASK, (x)) +#define TRANS_CMTG_CHICKEN _MMIO(0x6fa90) +#define DISABLE_DPT_CLK_GATING REG_BIT(1) + /* DisplayPort Transport Control */ #define _DP_TP_CTL_A 0x64040 #define _DP_TP_CTL_B 0x64140 @@ -10371,6 +10405,14 @@ enum skl_power_gate { #define TRANS_MSA_MISC(tran) _MMIO_TRANS2(tran, _TRANSA_MSA_MISC) /* See DP_MSA_MISC_* for the bit definitions */ +#define _TRANS_A_SET_CONTEXT_LATENCY 0x6007C +#define _TRANS_B_SET_CONTEXT_LATENCY 0x6107C +#define _TRANS_C_SET_CONTEXT_LATENCY 0x6207C +#define _TRANS_D_SET_CONTEXT_LATENCY 0x6307C +#define TRANS_SET_CONTEXT_LATENCY(tran) _MMIO_TRANS2(tran, _TRANS_A_SET_CONTEXT_LATENCY) +#define TRANS_SET_CONTEXT_LATENCY_MASK REG_GENMASK(15, 0) +#define TRANS_SET_CONTEXT_LATENCY_VALUE(x) REG_FIELD_PREP(TRANS_SET_CONTEXT_LATENCY_MASK, (x)) + /* LCPLL Control */ #define LCPLL_CTL _MMIO(0x130040) #define LCPLL_PLL_DISABLE (1 << 31) @@ -10482,17 +10524,6 @@ enum skl_power_gate { #define DPLL_CFGCR1(id) _MMIO_PIPE((id) - SKL_DPLL1, _DPLL1_CFGCR1, _DPLL2_CFGCR1) #define DPLL_CFGCR2(id) _MMIO_PIPE((id) - SKL_DPLL1, _DPLL1_CFGCR2, _DPLL2_CFGCR2) -/* - * CNL Clocks - */ -#define DPCLKA_CFGCR0 _MMIO(0x6C200) -#define DPCLKA_CFGCR0_DDI_CLK_OFF(port) (1 << ((port) == PORT_F ? 23 : \ - (port) + 10)) -#define DPCLKA_CFGCR0_DDI_CLK_SEL_SHIFT(port) ((port) == PORT_F ? 21 : \ - (port) * 2) -#define DPCLKA_CFGCR0_DDI_CLK_SEL_MASK(port) (3 << DPCLKA_CFGCR0_DDI_CLK_SEL_SHIFT(port)) -#define DPCLKA_CFGCR0_DDI_CLK_SEL(pll, port) ((pll) << DPCLKA_CFGCR0_DDI_CLK_SEL_SHIFT(port)) - /* ICL Clocks */ #define ICL_DPCLKA_CFGCR0 _MMIO(0x164280) #define ICL_DPCLKA_CFGCR0_DDI_CLK_OFF(phy) (1 << _PICK(phy, 10, 11, 24, 4, 5)) @@ -10548,7 +10579,7 @@ enum skl_power_gate { ADLS_DPCLKA_DDIJ_SEL_MASK, \ ADLS_DPCLKA_DDIK_SEL_MASK) -/* CNL PLL */ +/* ICL PLL */ #define DPLL0_ENABLE 0x46010 #define DPLL1_ENABLE 0x46014 #define _ADLS_DPLL2_ENABLE 0x46018 @@ -10557,9 +10588,14 @@ enum skl_power_gate { #define PLL_LOCK (1 << 30) #define PLL_POWER_ENABLE (1 << 27) #define PLL_POWER_STATE (1 << 26) -#define CNL_DPLL_ENABLE(pll) _MMIO_PLL3(pll, DPLL0_ENABLE, DPLL1_ENABLE, \ +#define ICL_DPLL_ENABLE(pll) _MMIO_PLL3(pll, DPLL0_ENABLE, DPLL1_ENABLE, \ _ADLS_DPLL2_ENABLE, _ADLS_DPLL3_ENABLE) +#define _DG2_PLL3_ENABLE 0x4601C + +#define DG2_PLL_ENABLE(pll) _MMIO_PLL3(pll, DPLL0_ENABLE, DPLL1_ENABLE, \ + _ADLS_DPLL2_ENABLE, _DG2_PLL3_ENABLE) + #define TBT_PLL_ENABLE _MMIO(0x46020) #define _MG_PLL1_ENABLE 0x46030 @@ -10725,60 +10761,52 @@ enum skl_power_gate { _MG_PLL_TDC_COLDST_BIAS_PORT1, \ _MG_PLL_TDC_COLDST_BIAS_PORT2) -#define _CNL_DPLL0_CFGCR0 0x6C000 -#define _CNL_DPLL1_CFGCR0 0x6C080 -#define DPLL_CFGCR0_HDMI_MODE (1 << 30) -#define DPLL_CFGCR0_SSC_ENABLE (1 << 29) -#define DPLL_CFGCR0_SSC_ENABLE_ICL (1 << 25) -#define DPLL_CFGCR0_LINK_RATE_MASK (0xf << 25) -#define DPLL_CFGCR0_LINK_RATE_2700 (0 << 25) -#define DPLL_CFGCR0_LINK_RATE_1350 (1 << 25) -#define DPLL_CFGCR0_LINK_RATE_810 (2 << 25) -#define DPLL_CFGCR0_LINK_RATE_1620 (3 << 25) -#define DPLL_CFGCR0_LINK_RATE_1080 (4 << 25) -#define DPLL_CFGCR0_LINK_RATE_2160 (5 << 25) -#define DPLL_CFGCR0_LINK_RATE_3240 (6 << 25) -#define DPLL_CFGCR0_LINK_RATE_4050 (7 << 25) -#define DPLL_CFGCR0_DCO_FRACTION_MASK (0x7fff << 10) -#define DPLL_CFGCR0_DCO_FRACTION_SHIFT (10) -#define DPLL_CFGCR0_DCO_FRACTION(x) ((x) << 10) -#define DPLL_CFGCR0_DCO_INTEGER_MASK (0x3ff) -#define CNL_DPLL_CFGCR0(pll) _MMIO_PLL(pll, _CNL_DPLL0_CFGCR0, _CNL_DPLL1_CFGCR0) - -#define _CNL_DPLL0_CFGCR1 0x6C004 -#define _CNL_DPLL1_CFGCR1 0x6C084 -#define DPLL_CFGCR1_QDIV_RATIO_MASK (0xff << 10) -#define DPLL_CFGCR1_QDIV_RATIO_SHIFT (10) -#define DPLL_CFGCR1_QDIV_RATIO(x) ((x) << 10) -#define DPLL_CFGCR1_QDIV_MODE_SHIFT (9) -#define DPLL_CFGCR1_QDIV_MODE(x) ((x) << 9) -#define DPLL_CFGCR1_KDIV_MASK (7 << 6) -#define DPLL_CFGCR1_KDIV_SHIFT (6) -#define DPLL_CFGCR1_KDIV(x) ((x) << 6) -#define DPLL_CFGCR1_KDIV_1 (1 << 6) -#define DPLL_CFGCR1_KDIV_2 (2 << 6) -#define DPLL_CFGCR1_KDIV_3 (4 << 6) -#define DPLL_CFGCR1_PDIV_MASK (0xf << 2) -#define DPLL_CFGCR1_PDIV_SHIFT (2) -#define DPLL_CFGCR1_PDIV(x) ((x) << 2) -#define DPLL_CFGCR1_PDIV_2 (1 << 2) -#define DPLL_CFGCR1_PDIV_3 (2 << 2) -#define DPLL_CFGCR1_PDIV_5 (4 << 2) -#define DPLL_CFGCR1_PDIV_7 (8 << 2) -#define DPLL_CFGCR1_CENTRAL_FREQ (3 << 0) -#define DPLL_CFGCR1_CENTRAL_FREQ_8400 (3 << 0) -#define TGL_DPLL_CFGCR1_CFSELOVRD_NORMAL_XTAL (0 << 0) -#define CNL_DPLL_CFGCR1(pll) _MMIO_PLL(pll, _CNL_DPLL0_CFGCR1, _CNL_DPLL1_CFGCR1) - #define _ICL_DPLL0_CFGCR0 0x164000 #define _ICL_DPLL1_CFGCR0 0x164080 #define ICL_DPLL_CFGCR0(pll) _MMIO_PLL(pll, _ICL_DPLL0_CFGCR0, \ _ICL_DPLL1_CFGCR0) +#define DPLL_CFGCR0_HDMI_MODE (1 << 30) +#define DPLL_CFGCR0_SSC_ENABLE (1 << 29) +#define DPLL_CFGCR0_SSC_ENABLE_ICL (1 << 25) +#define DPLL_CFGCR0_LINK_RATE_MASK (0xf << 25) +#define DPLL_CFGCR0_LINK_RATE_2700 (0 << 25) +#define DPLL_CFGCR0_LINK_RATE_1350 (1 << 25) +#define DPLL_CFGCR0_LINK_RATE_810 (2 << 25) +#define DPLL_CFGCR0_LINK_RATE_1620 (3 << 25) +#define DPLL_CFGCR0_LINK_RATE_1080 (4 << 25) +#define DPLL_CFGCR0_LINK_RATE_2160 (5 << 25) +#define DPLL_CFGCR0_LINK_RATE_3240 (6 << 25) +#define DPLL_CFGCR0_LINK_RATE_4050 (7 << 25) +#define DPLL_CFGCR0_DCO_FRACTION_MASK (0x7fff << 10) +#define DPLL_CFGCR0_DCO_FRACTION_SHIFT (10) +#define DPLL_CFGCR0_DCO_FRACTION(x) ((x) << 10) +#define DPLL_CFGCR0_DCO_INTEGER_MASK (0x3ff) #define _ICL_DPLL0_CFGCR1 0x164004 #define _ICL_DPLL1_CFGCR1 0x164084 #define ICL_DPLL_CFGCR1(pll) _MMIO_PLL(pll, _ICL_DPLL0_CFGCR1, \ _ICL_DPLL1_CFGCR1) +#define DPLL_CFGCR1_QDIV_RATIO_MASK (0xff << 10) +#define DPLL_CFGCR1_QDIV_RATIO_SHIFT (10) +#define DPLL_CFGCR1_QDIV_RATIO(x) ((x) << 10) +#define DPLL_CFGCR1_QDIV_MODE_SHIFT (9) +#define DPLL_CFGCR1_QDIV_MODE(x) ((x) << 9) +#define DPLL_CFGCR1_KDIV_MASK (7 << 6) +#define DPLL_CFGCR1_KDIV_SHIFT (6) +#define DPLL_CFGCR1_KDIV(x) ((x) << 6) +#define DPLL_CFGCR1_KDIV_1 (1 << 6) +#define DPLL_CFGCR1_KDIV_2 (2 << 6) +#define DPLL_CFGCR1_KDIV_3 (4 << 6) +#define DPLL_CFGCR1_PDIV_MASK (0xf << 2) +#define DPLL_CFGCR1_PDIV_SHIFT (2) +#define DPLL_CFGCR1_PDIV(x) ((x) << 2) +#define DPLL_CFGCR1_PDIV_2 (1 << 2) +#define DPLL_CFGCR1_PDIV_3 (2 << 2) +#define DPLL_CFGCR1_PDIV_5 (4 << 2) +#define DPLL_CFGCR1_PDIV_7 (8 << 2) +#define DPLL_CFGCR1_CENTRAL_FREQ (3 << 0) +#define DPLL_CFGCR1_CENTRAL_FREQ_8400 (3 << 0) +#define TGL_DPLL_CFGCR1_CFSELOVRD_NORMAL_XTAL (0 << 0) #define _TGL_DPLL0_CFGCR0 0x164284 #define _TGL_DPLL1_CFGCR0 0x16428C @@ -10998,8 +11026,8 @@ enum skl_power_gate { #define BXT_DE_PLL_LOCK (1 << 30) #define BXT_DE_PLL_FREQ_REQ (1 << 23) #define BXT_DE_PLL_FREQ_REQ_ACK (1 << 22) -#define CNL_CDCLK_PLL_RATIO(x) (x) -#define CNL_CDCLK_PLL_RATIO_MASK 0xff +#define ICL_CDCLK_PLL_RATIO(x) (x) +#define ICL_CDCLK_PLL_RATIO_MASK 0xff /* GEN9 DC */ #define DC_STATE_EN _MMIO(0x45504) @@ -11054,6 +11082,7 @@ enum skl_power_gate { #define SKL_MEMORY_FREQ_MULTIPLIER_HZ 266666666 #define SKL_MC_BIOS_DATA_0_0_0_MCHBAR_PCU _MMIO(MCHBAR_MIRROR_BASE_SNB + 0x5E04) #define SKL_REQ_DATA_MASK (0xF << 0) +#define DG1_GEAR_TYPE REG_BIT(16) #define SKL_MAD_INTER_CHANNEL_0_0_0_MCHBAR_MCMAIN _MMIO(MCHBAR_MIRROR_BASE_SNB + 0x5000) #define SKL_DRAM_DDR_TYPE_MASK (0x3 << 0) @@ -11076,18 +11105,29 @@ enum skl_power_gate { #define SKL_DRAM_RANK_1 (0x0 << 10) #define SKL_DRAM_RANK_2 (0x1 << 10) #define SKL_DRAM_RANK_MASK (0x1 << 10) -#define CNL_DRAM_SIZE_MASK 0x7F -#define CNL_DRAM_WIDTH_MASK (0x3 << 7) -#define CNL_DRAM_WIDTH_SHIFT 7 -#define CNL_DRAM_WIDTH_X8 (0x0 << 7) -#define CNL_DRAM_WIDTH_X16 (0x1 << 7) -#define CNL_DRAM_WIDTH_X32 (0x2 << 7) -#define CNL_DRAM_RANK_MASK (0x3 << 9) -#define CNL_DRAM_RANK_SHIFT 9 -#define CNL_DRAM_RANK_1 (0x0 << 9) -#define CNL_DRAM_RANK_2 (0x1 << 9) -#define CNL_DRAM_RANK_3 (0x2 << 9) -#define CNL_DRAM_RANK_4 (0x3 << 9) +#define ICL_DRAM_SIZE_MASK 0x7F +#define ICL_DRAM_WIDTH_MASK (0x3 << 7) +#define ICL_DRAM_WIDTH_SHIFT 7 +#define ICL_DRAM_WIDTH_X8 (0x0 << 7) +#define ICL_DRAM_WIDTH_X16 (0x1 << 7) +#define ICL_DRAM_WIDTH_X32 (0x2 << 7) +#define ICL_DRAM_RANK_MASK (0x3 << 9) +#define ICL_DRAM_RANK_SHIFT 9 +#define ICL_DRAM_RANK_1 (0x0 << 9) +#define ICL_DRAM_RANK_2 (0x1 << 9) +#define ICL_DRAM_RANK_3 (0x2 << 9) +#define ICL_DRAM_RANK_4 (0x3 << 9) + +#define SA_PERF_STATUS_0_0_0_MCHBAR_PC _MMIO(MCHBAR_MIRROR_BASE_SNB + 0x5918) +#define DG1_QCLK_RATIO_MASK REG_GENMASK(9, 2) +#define DG1_QCLK_REFERENCE REG_BIT(10) + +#define MCHBAR_CH0_CR_TC_PRE_0_0_0_MCHBAR _MMIO(MCHBAR_MIRROR_BASE_SNB + 0x4000) +#define DG1_DRAM_T_RDPRE_MASK REG_GENMASK(16, 11) +#define DG1_DRAM_T_RP_MASK REG_GENMASK(6, 0) +#define MCHBAR_CH0_CR_TC_PRE_0_0_0_MCHBAR_HIGH _MMIO(MCHBAR_MIRROR_BASE_SNB + 0x4004) +#define DG1_DRAM_T_RCD_MASK REG_GENMASK(15, 9) +#define DG1_DRAM_T_RAS_MASK REG_GENMASK(8, 1) /* * Please see hsw_read_dcomp() and hsw_write_dcomp() before using this register, @@ -12290,6 +12330,7 @@ enum skl_power_gate { /* MOCS (Memory Object Control State) registers */ #define GEN9_LNCFCMOCS(i) _MMIO(0xb020 + (i) * 4) /* L3 Cache Control */ +#define GEN9_LNCFCMOCS_REG_COUNT 32 #define __GEN9_RCS0_MOCS0 0xc800 #define GEN9_GFX_MOCS(i) _MMIO(__GEN9_RCS0_MOCS0 + (i) * 4) @@ -12337,6 +12378,7 @@ enum skl_power_gate { _ICL_PHY_MISC_B) #define ICL_PHY_MISC_MUX_DDID (1 << 28) #define ICL_PHY_MISC_DE_IO_COMP_PWR_DOWN (1 << 23) +#define DG2_PHY_DP_TX_ACK_MASK REG_GENMASK(23, 20) /* Icelake Display Stream Compression Registers */ #define DSCA_PICTURE_PARAMETER_SET_0 _MMIO(0x6B200) diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 37aef1308573..ce446716d092 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -42,22 +42,17 @@ #include "i915_active.h" #include "i915_drv.h" -#include "i915_globals.h" #include "i915_trace.h" #include "intel_pm.h" struct execute_cb { struct irq_work work; struct i915_sw_fence *fence; - void (*hook)(struct i915_request *rq, struct dma_fence *signal); struct i915_request *signal; }; -static struct i915_global_request { - struct i915_global base; - struct kmem_cache *slab_requests; - struct kmem_cache *slab_execute_cbs; -} global; +static struct kmem_cache *slab_requests; +static struct kmem_cache *slab_execute_cbs; static const char *i915_fence_get_driver_name(struct dma_fence *fence) { @@ -108,13 +103,16 @@ static signed long i915_fence_wait(struct dma_fence *fence, struct kmem_cache *i915_request_slab_cache(void) { - return global.slab_requests; + return slab_requests; } static void i915_fence_release(struct dma_fence *fence) { struct i915_request *rq = to_request(fence); + GEM_BUG_ON(rq->guc_prio != GUC_PRIO_INIT && + rq->guc_prio != GUC_PRIO_FINI); + /* * The request is put onto a RCU freelist (i.e. the address * is immediately reused), mark the fences as being freed now. @@ -126,41 +124,19 @@ static void i915_fence_release(struct dma_fence *fence) i915_sw_fence_fini(&rq->semaphore); /* - * Keep one request on each engine for reserved use under mempressure - * - * We do not hold a reference to the engine here and so have to be - * very careful in what rq->engine we poke. The virtual engine is - * referenced via the rq->context and we released that ref during - * i915_request_retire(), ergo we must not dereference a virtual - * engine here. Not that we would want to, as the only consumer of - * the reserved engine->request_pool is the power management parking, - * which must-not-fail, and that is only run on the physical engines. - * - * Since the request must have been executed to be have completed, - * we know that it will have been processed by the HW and will - * not be unsubmitted again, so rq->engine and rq->execution_mask - * at this point is stable. rq->execution_mask will be a single - * bit if the last and _only_ engine it could execution on was a - * physical engine, if it's multiple bits then it started on and - * could still be on a virtual engine. Thus if the mask is not a - * power-of-two we assume that rq->engine may still be a virtual - * engine and so a dangling invalid pointer that we cannot dereference - * - * For example, consider the flow of a bonded request through a virtual - * engine. The request is created with a wide engine mask (all engines - * that we might execute on). On processing the bond, the request mask - * is reduced to one or more engines. If the request is subsequently - * bound to a single engine, it will then be constrained to only - * execute on that engine and never returned to the virtual engine - * after timeslicing away, see __unwind_incomplete_requests(). Thus we - * know that if the rq->execution_mask is a single bit, rq->engine - * can be a physical engine with the exact corresponding mask. + * Keep one request on each engine for reserved use under mempressure, + * do not use with virtual engines as this really is only needed for + * kernel contexts. */ - if (is_power_of_2(rq->execution_mask) && - !cmpxchg(&rq->engine->request_pool, NULL, rq)) + if (!intel_engine_is_virtual(rq->engine) && + !cmpxchg(&rq->engine->request_pool, NULL, rq)) { + intel_context_put(rq->context); return; + } + + intel_context_put(rq->context); - kmem_cache_free(global.slab_requests, rq); + kmem_cache_free(slab_requests, rq); } const struct dma_fence_ops i915_fence_ops = { @@ -177,18 +153,7 @@ static void irq_execute_cb(struct irq_work *wrk) struct execute_cb *cb = container_of(wrk, typeof(*cb), work); i915_sw_fence_complete(cb->fence); - kmem_cache_free(global.slab_execute_cbs, cb); -} - -static void irq_execute_cb_hook(struct irq_work *wrk) -{ - struct execute_cb *cb = container_of(wrk, typeof(*cb), work); - - cb->hook(container_of(cb->fence, struct i915_request, submit), - &cb->signal->fence); - i915_request_put(cb->signal); - - irq_execute_cb(wrk); + kmem_cache_free(slab_execute_cbs, cb); } static __always_inline void @@ -216,7 +181,7 @@ static bool irq_work_imm(struct irq_work *wrk) return false; } -static void __notify_execute_cb_imm(struct i915_request *rq) +void i915_request_notify_execute_cb_imm(struct i915_request *rq) { __notify_execute_cb(rq, irq_work_imm); } @@ -272,11 +237,11 @@ i915_request_active_engine(struct i915_request *rq, * check that we have acquired the lock on the final engine. */ locked = READ_ONCE(rq->engine); - spin_lock_irq(&locked->active.lock); + spin_lock_irq(&locked->sched_engine->lock); while (unlikely(locked != (engine = READ_ONCE(rq->engine)))) { - spin_unlock(&locked->active.lock); + spin_unlock(&locked->sched_engine->lock); locked = engine; - spin_lock(&locked->active.lock); + spin_lock(&locked->sched_engine->lock); } if (i915_request_is_active(rq)) { @@ -285,42 +250,11 @@ i915_request_active_engine(struct i915_request *rq, ret = true; } - spin_unlock_irq(&locked->active.lock); + spin_unlock_irq(&locked->sched_engine->lock); return ret; } - -static void remove_from_engine(struct i915_request *rq) -{ - struct intel_engine_cs *engine, *locked; - - /* - * Virtual engines complicate acquiring the engine timeline lock, - * as their rq->engine pointer is not stable until under that - * engine lock. The simple ploy we use is to take the lock then - * check that the rq still belongs to the newly locked engine. - */ - locked = READ_ONCE(rq->engine); - spin_lock_irq(&locked->active.lock); - while (unlikely(locked != (engine = READ_ONCE(rq->engine)))) { - spin_unlock(&locked->active.lock); - spin_lock(&engine->active.lock); - locked = engine; - } - list_del_init(&rq->sched.link); - - clear_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); - clear_bit(I915_FENCE_FLAG_HOLD, &rq->fence.flags); - - /* Prevent further __await_execution() registering a cb, then flush */ - set_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags); - - spin_unlock_irq(&locked->active.lock); - - __notify_execute_cb_imm(rq); -} - static void __rq_init_watchdog(struct i915_request *rq) { rq->watchdog.timer.function = NULL; @@ -417,8 +351,7 @@ bool i915_request_retire(struct i915_request *rq) * after removing the breadcrumb and signaling it, so that we do not * inadvertently attach the breadcrumb to a completed request. */ - if (!list_empty(&rq->sched.link)) - remove_from_engine(rq); + rq->engine->remove_active_request(rq); GEM_BUG_ON(!llist_empty(&rq->execute_cb)); __list_del_entry(&rq->link); /* poison neither prev/next (RCU walks) */ @@ -443,6 +376,7 @@ void i915_request_retire_upto(struct i915_request *rq) do { tmp = list_first_entry(&tl->requests, typeof(*tmp), link); + GEM_BUG_ON(!i915_request_completed(tmp)); } while (i915_request_retire(tmp) && tmp != rq); } @@ -517,19 +451,14 @@ static bool __request_in_flight(const struct i915_request *signal) static int __await_execution(struct i915_request *rq, struct i915_request *signal, - void (*hook)(struct i915_request *rq, - struct dma_fence *signal), gfp_t gfp) { struct execute_cb *cb; - if (i915_request_is_active(signal)) { - if (hook) - hook(rq, &signal->fence); + if (i915_request_is_active(signal)) return 0; - } - cb = kmem_cache_alloc(global.slab_execute_cbs, gfp); + cb = kmem_cache_alloc(slab_execute_cbs, gfp); if (!cb) return -ENOMEM; @@ -537,12 +466,6 @@ __await_execution(struct i915_request *rq, i915_sw_fence_await(cb->fence); init_irq_work(&cb->work, irq_execute_cb); - if (hook) { - cb->hook = hook; - cb->signal = i915_request_get(signal); - cb->work.func = irq_execute_cb_hook; - } - /* * Register the callback first, then see if the signaler is already * active. This ensures that if we race with the @@ -559,7 +482,7 @@ __await_execution(struct i915_request *rq, if (llist_add(&cb->work.node.llist, &signal->execute_cb)) { if (i915_request_is_active(signal) || __request_in_flight(signal)) - __notify_execute_cb_imm(signal); + i915_request_notify_execute_cb_imm(signal); } return 0; @@ -637,7 +560,7 @@ bool __i915_request_submit(struct i915_request *request) RQ_TRACE(request, "\n"); GEM_BUG_ON(!irqs_disabled()); - lockdep_assert_held(&engine->active.lock); + lockdep_assert_held(&engine->sched_engine->lock); /* * With the advent of preempt-to-busy, we frequently encounter @@ -649,7 +572,7 @@ bool __i915_request_submit(struct i915_request *request) * * We must remove the request from the caller's priority queue, * and the caller must only call us when the request is in their - * priority queue, under the active.lock. This ensures that the + * priority queue, under the sched_engine->lock. This ensures that the * request has *not* yet been retired and we can safely move * the request into the engine->active.list where it will be * dropped upon retiring. (Otherwise if resubmit a *retired* @@ -690,11 +613,15 @@ bool __i915_request_submit(struct i915_request *request) request->ring->vaddr + request->postfix); trace_i915_request_execute(request); - engine->serial++; + if (engine->bump_serial) + engine->bump_serial(engine); + else + engine->serial++; + result = true; GEM_BUG_ON(test_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags)); - list_move_tail(&request->sched.link, &engine->active.requests); + engine->add_active_request(request); active: clear_bit(I915_FENCE_FLAG_PQUEUE, &request->fence.flags); set_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags); @@ -724,11 +651,11 @@ void i915_request_submit(struct i915_request *request) unsigned long flags; /* Will be called from irq-context when using foreign fences. */ - spin_lock_irqsave(&engine->active.lock, flags); + spin_lock_irqsave(&engine->sched_engine->lock, flags); __i915_request_submit(request); - spin_unlock_irqrestore(&engine->active.lock, flags); + spin_unlock_irqrestore(&engine->sched_engine->lock, flags); } void __i915_request_unsubmit(struct i915_request *request) @@ -742,7 +669,7 @@ void __i915_request_unsubmit(struct i915_request *request) RQ_TRACE(request, "\n"); GEM_BUG_ON(!irqs_disabled()); - lockdep_assert_held(&engine->active.lock); + lockdep_assert_held(&engine->sched_engine->lock); /* * Before we remove this breadcrumb from the signal list, we have @@ -775,23 +702,11 @@ void i915_request_unsubmit(struct i915_request *request) unsigned long flags; /* Will be called from irq-context when using foreign fences. */ - spin_lock_irqsave(&engine->active.lock, flags); + spin_lock_irqsave(&engine->sched_engine->lock, flags); __i915_request_unsubmit(request); - spin_unlock_irqrestore(&engine->active.lock, flags); -} - -static void __cancel_request(struct i915_request *rq) -{ - struct intel_engine_cs *engine = NULL; - - i915_request_active_engine(rq, &engine); - - if (engine && intel_engine_pulse(engine)) - intel_gt_handle_error(engine->gt, engine->mask, 0, - "request cancellation by %s", - current->comm); + spin_unlock_irqrestore(&engine->sched_engine->lock, flags); } void i915_request_cancel(struct i915_request *rq, int error) @@ -801,7 +716,7 @@ void i915_request_cancel(struct i915_request *rq, int error) set_bit(I915_FENCE_FLAG_SENTINEL, &rq->fence.flags); - __cancel_request(rq); + intel_context_cancel_request(rq->context, rq); } static int __i915_sw_fence_call @@ -889,7 +804,7 @@ request_alloc_slow(struct intel_timeline *tl, rq = list_first_entry(&tl->requests, typeof(*rq), link); i915_request_retire(rq); - rq = kmem_cache_alloc(global.slab_requests, + rq = kmem_cache_alloc(slab_requests, gfp | __GFP_RETRY_MAYFAIL | __GFP_NOWARN); if (rq) return rq; @@ -902,7 +817,7 @@ request_alloc_slow(struct intel_timeline *tl, retire_requests(tl); out: - return kmem_cache_alloc(global.slab_requests, gfp); + return kmem_cache_alloc(slab_requests, gfp); } static void __i915_request_ctor(void *arg) @@ -963,7 +878,7 @@ __i915_request_create(struct intel_context *ce, gfp_t gfp) * * Do not use kmem_cache_zalloc() here! */ - rq = kmem_cache_alloc(global.slab_requests, + rq = kmem_cache_alloc(slab_requests, gfp | __GFP_RETRY_MAYFAIL | __GFP_NOWARN); if (unlikely(!rq)) { rq = request_alloc_slow(tl, &ce->engine->request_pool, gfp); @@ -973,7 +888,19 @@ __i915_request_create(struct intel_context *ce, gfp_t gfp) } } - rq->context = ce; + /* + * Hold a reference to the intel_context over life of an i915_request. + * Without this an i915_request can exist after the context has been + * destroyed (e.g. request retired, context closed, but user space holds + * a reference to the request from an out fence). In the case of GuC + * submission + virtual engine, the engine that the request references + * is also destroyed which can trigger bad pointer dref in fence ops + * (e.g. i915_fence_get_driver_name). We could likely change these + * functions to avoid touching the engine but let's just be safe and + * hold the intel_context reference. In execlist mode the request always + * eventually points to a physical engine so this isn't an issue. + */ + rq->context = intel_context_get(ce); rq->engine = ce->engine; rq->ring = ce->ring; rq->execution_mask = ce->engine->mask; @@ -996,6 +923,8 @@ __i915_request_create(struct intel_context *ce, gfp_t gfp) rq->rcustate = get_state_synchronize_rcu(); /* acts as smp_mb() */ + rq->guc_prio = GUC_PRIO_INIT; + /* We bump the ref for the fence chain */ i915_sw_fence_reinit(&i915_request_get(rq)->submit); i915_sw_fence_reinit(&i915_request_get(rq)->semaphore); @@ -1050,7 +979,8 @@ err_unwind: GEM_BUG_ON(!list_empty(&rq->sched.waiters_list)); err_free: - kmem_cache_free(global.slab_requests, rq); + intel_context_put(ce); + kmem_cache_free(slab_requests, rq); err_unreserve: intel_context_unpin(ce); return ERR_PTR(ret); @@ -1253,7 +1183,7 @@ emit_semaphore_wait(struct i915_request *to, goto await_fence; /* Only submit our spinner after the signaler is running! */ - if (__await_execution(to, from, NULL, gfp)) + if (__await_execution(to, from, gfp)) goto await_fence; if (__emit_semaphore_wait(to, from, from->fence.seqno)) @@ -1284,16 +1214,14 @@ static int intel_timeline_sync_set_start(struct intel_timeline *tl, static int __i915_request_await_execution(struct i915_request *to, - struct i915_request *from, - void (*hook)(struct i915_request *rq, - struct dma_fence *signal)) + struct i915_request *from) { int err; GEM_BUG_ON(intel_context_is_barrier(from->context)); /* Submit both requests at the same time */ - err = __await_execution(to, from, hook, I915_FENCE_GFP); + err = __await_execution(to, from, I915_FENCE_GFP); if (err) return err; @@ -1343,7 +1271,7 @@ __i915_request_await_execution(struct i915_request *to, } /* Couple the dependency tree for PI on this exposed to->fence */ - if (to->engine->schedule) { + if (to->engine->sched_engine->schedule) { err = i915_sched_node_add_dependency(&to->sched, &from->sched, I915_DEPENDENCY_WEAK); @@ -1406,9 +1334,7 @@ i915_request_await_external(struct i915_request *rq, struct dma_fence *fence) int i915_request_await_execution(struct i915_request *rq, - struct dma_fence *fence, - void (*hook)(struct i915_request *rq, - struct dma_fence *signal)) + struct dma_fence *fence) { struct dma_fence **child = &fence; unsigned int nchild = 1; @@ -1439,8 +1365,7 @@ i915_request_await_execution(struct i915_request *rq, if (dma_fence_is_i915(fence)) ret = __i915_request_await_execution(rq, - to_request(fence), - hook); + to_request(fence)); else ret = i915_request_await_external(rq, fence); if (ret < 0) @@ -1466,7 +1391,7 @@ await_request_submit(struct i915_request *to, struct i915_request *from) &from->submit, I915_FENCE_GFP); else - return __i915_request_await_execution(to, from, NULL); + return __i915_request_await_execution(to, from); } static int @@ -1482,7 +1407,7 @@ i915_request_await_request(struct i915_request *to, struct i915_request *from) return 0; } - if (to->engine->schedule) { + if (to->engine->sched_engine->schedule) { ret = i915_sched_node_add_dependency(&to->sched, &from->sched, I915_DEPENDENCY_EXTERNAL); @@ -1490,7 +1415,8 @@ i915_request_await_request(struct i915_request *to, struct i915_request *from) return ret; } - if (is_power_of_2(to->execution_mask | READ_ONCE(from->execution_mask))) + if (!intel_engine_uses_guc(to->engine) && + is_power_of_2(to->execution_mask | READ_ONCE(from->execution_mask))) ret = await_request_submit(to, from); else ret = emit_semaphore_wait(to, from, I915_FENCE_GFP); @@ -1649,6 +1575,8 @@ __i915_request_add_to_timeline(struct i915_request *rq) prev = to_request(__i915_active_fence_set(&timeline->last_request, &rq->fence)); if (prev && !__i915_request_is_complete(prev)) { + bool uses_guc = intel_engine_uses_guc(rq->engine); + /* * The requests are supposed to be kept in order. However, * we need to be wary in case the timeline->last_request @@ -1659,7 +1587,9 @@ __i915_request_add_to_timeline(struct i915_request *rq) i915_seqno_passed(prev->fence.seqno, rq->fence.seqno)); - if (is_power_of_2(READ_ONCE(prev->engine)->mask | rq->engine->mask)) + if ((!uses_guc && + is_power_of_2(READ_ONCE(prev->engine)->mask | rq->engine->mask)) || + (uses_guc && prev->context == rq->context)) i915_sw_fence_await_sw_fence(&rq->submit, &prev->submit, &rq->submitq); @@ -1667,7 +1597,7 @@ __i915_request_add_to_timeline(struct i915_request *rq) __i915_sw_fence_await_dma_fence(&rq->submit, &prev->fence, &rq->dmaq); - if (rq->engine->schedule) + if (rq->engine->sched_engine->schedule) __i915_sched_node_add_dependency(&rq->sched, &prev->sched, &rq->dep, @@ -1739,8 +1669,8 @@ void __i915_request_queue(struct i915_request *rq, * decide whether to preempt the entire chain so that it is ready to * run at the earliest possible convenience. */ - if (attr && rq->engine->schedule) - rq->engine->schedule(rq, attr); + if (attr && rq->engine->sched_engine->schedule) + rq->engine->sched_engine->schedule(rq, attr); local_bh_disable(); __i915_request_queue_bh(rq); @@ -2100,31 +2030,61 @@ void i915_request_show(struct drm_printer *m, name); } -#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) -#include "selftests/mock_request.c" -#include "selftests/i915_request.c" -#endif +static bool engine_match_ring(struct intel_engine_cs *engine, struct i915_request *rq) +{ + u32 ring = ENGINE_READ(engine, RING_START); + + return ring == i915_ggtt_offset(rq->ring->vma); +} -static void i915_global_request_shrink(void) +static bool match_ring(struct i915_request *rq) { - kmem_cache_shrink(global.slab_execute_cbs); - kmem_cache_shrink(global.slab_requests); + struct intel_engine_cs *engine; + bool found; + int i; + + if (!intel_engine_is_virtual(rq->engine)) + return engine_match_ring(rq->engine, rq); + + found = false; + i = 0; + while ((engine = intel_engine_get_sibling(rq->engine, i++))) { + found = engine_match_ring(engine, rq); + if (found) + break; + } + + return found; } -static void i915_global_request_exit(void) +enum i915_request_state i915_test_request_state(struct i915_request *rq) { - kmem_cache_destroy(global.slab_execute_cbs); - kmem_cache_destroy(global.slab_requests); + if (i915_request_completed(rq)) + return I915_REQUEST_COMPLETE; + + if (!i915_request_started(rq)) + return I915_REQUEST_PENDING; + + if (match_ring(rq)) + return I915_REQUEST_ACTIVE; + + return I915_REQUEST_QUEUED; } -static struct i915_global_request global = { { - .shrink = i915_global_request_shrink, - .exit = i915_global_request_exit, -} }; +#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) +#include "selftests/mock_request.c" +#include "selftests/i915_request.c" +#endif + +void i915_request_module_exit(void) +{ + kmem_cache_destroy(slab_execute_cbs); + kmem_cache_destroy(slab_requests); +} -int __init i915_global_request_init(void) +int __init i915_request_module_init(void) { - global.slab_requests = + slab_requests = kmem_cache_create("i915_request", sizeof(struct i915_request), __alignof__(struct i915_request), @@ -2132,20 +2092,19 @@ int __init i915_global_request_init(void) SLAB_RECLAIM_ACCOUNT | SLAB_TYPESAFE_BY_RCU, __i915_request_ctor); - if (!global.slab_requests) + if (!slab_requests) return -ENOMEM; - global.slab_execute_cbs = KMEM_CACHE(execute_cb, + slab_execute_cbs = KMEM_CACHE(execute_cb, SLAB_HWCACHE_ALIGN | SLAB_RECLAIM_ACCOUNT | SLAB_TYPESAFE_BY_RCU); - if (!global.slab_execute_cbs) + if (!slab_execute_cbs) goto err_requests; - i915_global_register(&global.base); return 0; err_requests: - kmem_cache_destroy(global.slab_requests); + kmem_cache_destroy(slab_requests); return -ENOMEM; } diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h index 270f6cd37650..1bc1349ba3c2 100644 --- a/drivers/gpu/drm/i915/i915_request.h +++ b/drivers/gpu/drm/i915/i915_request.h @@ -285,6 +285,23 @@ struct i915_request { struct hrtimer timer; } watchdog; + /* + * Requests may need to be stalled when using GuC submission waiting for + * certain GuC operations to complete. If that is the case, stalled + * requests are added to a per context list of stalled requests. The + * below list_head is the link in that list. + */ + struct list_head guc_fence_link; + + /** + * Priority level while the request is inflight. Differs from i915 + * scheduler priority. See comment above + * I915_SCHEDULER_CAP_STATIC_PRIORITY_MAP for details. + */ +#define GUC_PRIO_INIT 0xff +#define GUC_PRIO_FINI 0xfe + u8 guc_prio; + I915_SELFTEST_DECLARE(struct { struct list_head link; unsigned long delay; @@ -352,9 +369,7 @@ int i915_request_await_object(struct i915_request *to, int i915_request_await_dma_fence(struct i915_request *rq, struct dma_fence *fence); int i915_request_await_execution(struct i915_request *rq, - struct dma_fence *fence, - void (*hook)(struct i915_request *rq, - struct dma_fence *signal)); + struct dma_fence *fence); void i915_request_add(struct i915_request *rq); @@ -613,7 +628,7 @@ i915_request_active_timeline(const struct i915_request *rq) * this submission. */ return rcu_dereference_protected(rq->timeline, - lockdep_is_held(&rq->engine->active.lock)); + lockdep_is_held(&rq->engine->sched_engine->lock)); } static inline u32 @@ -641,4 +656,19 @@ bool i915_request_active_engine(struct i915_request *rq, struct intel_engine_cs **active); +void i915_request_notify_execute_cb_imm(struct i915_request *rq); + +enum i915_request_state { + I915_REQUEST_UNKNOWN = 0, + I915_REQUEST_COMPLETE, + I915_REQUEST_PENDING, + I915_REQUEST_QUEUED, + I915_REQUEST_ACTIVE, +}; + +enum i915_request_state i915_test_request_state(struct i915_request *rq); + +void i915_request_module_exit(void); +int i915_request_module_init(void); + #endif /* I915_REQUEST_H */ diff --git a/drivers/gpu/drm/i915/i915_scatterlist.c b/drivers/gpu/drm/i915/i915_scatterlist.c index 69e9e6c3135e..4a6712dca838 100644 --- a/drivers/gpu/drm/i915/i915_scatterlist.c +++ b/drivers/gpu/drm/i915/i915_scatterlist.c @@ -6,6 +6,9 @@ #include "i915_scatterlist.h" +#include "i915_buddy.h" +#include "i915_ttm_buddy_manager.h" + #include <drm/drm_mm.h> #include <linux/slab.h> @@ -104,6 +107,83 @@ struct sg_table *i915_sg_from_mm_node(const struct drm_mm_node *node, return st; } +/** + * i915_sg_from_buddy_resource - Create an sg_table from a struct + * i915_buddy_block list + * @res: The struct i915_ttm_buddy_resource. + * @region_start: An offset to add to the dma addresses of the sg list. + * + * Create a struct sg_table, initializing it from struct i915_buddy_block list, + * taking a maximum segment length into account, splitting into segments + * if necessary. + * + * Return: A pointer to a kmalloced struct sg_table on success, negative + * error code cast to an error pointer on failure. + */ +struct sg_table *i915_sg_from_buddy_resource(struct ttm_resource *res, + u64 region_start) +{ + struct i915_ttm_buddy_resource *bman_res = to_ttm_buddy_resource(res); + const u64 size = res->num_pages << PAGE_SHIFT; + const u64 max_segment = rounddown(UINT_MAX, PAGE_SIZE); + struct i915_buddy_mm *mm = bman_res->mm; + struct list_head *blocks = &bman_res->blocks; + struct i915_buddy_block *block; + struct scatterlist *sg; + struct sg_table *st; + resource_size_t prev_end; + + GEM_BUG_ON(list_empty(blocks)); + + st = kmalloc(sizeof(*st), GFP_KERNEL); + if (!st) + return ERR_PTR(-ENOMEM); + + if (sg_alloc_table(st, res->num_pages, GFP_KERNEL)) { + kfree(st); + return ERR_PTR(-ENOMEM); + } + + sg = st->sgl; + st->nents = 0; + prev_end = (resource_size_t)-1; + + list_for_each_entry(block, blocks, link) { + u64 block_size, offset; + + block_size = min_t(u64, size, i915_buddy_block_size(mm, block)); + offset = i915_buddy_block_offset(block); + + while (block_size) { + u64 len; + + if (offset != prev_end || sg->length >= max_segment) { + if (st->nents) + sg = __sg_next(sg); + + sg_dma_address(sg) = region_start + offset; + sg_dma_len(sg) = 0; + sg->length = 0; + st->nents++; + } + + len = min(block_size, max_segment - sg->length); + sg->length += len; + sg_dma_len(sg) += len; + + offset += len; + block_size -= len; + + prev_end = offset; + } + } + + sg_mark_end(sg); + i915_sg_trim(st); + + return st; +} + #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) #include "selftests/scatterlist.c" #endif diff --git a/drivers/gpu/drm/i915/i915_scatterlist.h b/drivers/gpu/drm/i915/i915_scatterlist.h index 5acca45ea981..b8bd5925b03f 100644 --- a/drivers/gpu/drm/i915/i915_scatterlist.h +++ b/drivers/gpu/drm/i915/i915_scatterlist.h @@ -14,6 +14,7 @@ #include "i915_gem.h" struct drm_mm_node; +struct ttm_resource; /* * Optimised SGL iterator for GEM objects @@ -145,4 +146,8 @@ bool i915_sg_trim(struct sg_table *orig_st); struct sg_table *i915_sg_from_mm_node(const struct drm_mm_node *node, u64 region_start); + +struct sg_table *i915_sg_from_buddy_resource(struct ttm_resource *res, + u64 region_start); + #endif diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c index efa638c3acc7..762127dd56c5 100644 --- a/drivers/gpu/drm/i915/i915_scheduler.c +++ b/drivers/gpu/drm/i915/i915_scheduler.c @@ -7,15 +7,11 @@ #include <linux/mutex.h> #include "i915_drv.h" -#include "i915_globals.h" #include "i915_request.h" #include "i915_scheduler.h" -static struct i915_global_scheduler { - struct i915_global base; - struct kmem_cache *slab_dependencies; - struct kmem_cache *slab_priorities; -} global; +static struct kmem_cache *slab_dependencies; +static struct kmem_cache *slab_priorities; static DEFINE_SPINLOCK(schedule_lock); @@ -40,7 +36,7 @@ static inline struct i915_priolist *to_priolist(struct rb_node *rb) return rb_entry(rb, struct i915_priolist, node); } -static void assert_priolists(struct intel_engine_execlists * const execlists) +static void assert_priolists(struct i915_sched_engine * const sched_engine) { struct rb_node *rb; long last_prio; @@ -48,11 +44,11 @@ static void assert_priolists(struct intel_engine_execlists * const execlists) if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) return; - GEM_BUG_ON(rb_first_cached(&execlists->queue) != - rb_first(&execlists->queue.rb_root)); + GEM_BUG_ON(rb_first_cached(&sched_engine->queue) != + rb_first(&sched_engine->queue.rb_root)); last_prio = INT_MAX; - for (rb = rb_first_cached(&execlists->queue); rb; rb = rb_next(rb)) { + for (rb = rb_first_cached(&sched_engine->queue); rb; rb = rb_next(rb)) { const struct i915_priolist *p = to_priolist(rb); GEM_BUG_ON(p->priority > last_prio); @@ -61,23 +57,22 @@ static void assert_priolists(struct intel_engine_execlists * const execlists) } struct list_head * -i915_sched_lookup_priolist(struct intel_engine_cs *engine, int prio) +i915_sched_lookup_priolist(struct i915_sched_engine *sched_engine, int prio) { - struct intel_engine_execlists * const execlists = &engine->execlists; struct i915_priolist *p; struct rb_node **parent, *rb; bool first = true; - lockdep_assert_held(&engine->active.lock); - assert_priolists(execlists); + lockdep_assert_held(&sched_engine->lock); + assert_priolists(sched_engine); - if (unlikely(execlists->no_priolist)) + if (unlikely(sched_engine->no_priolist)) prio = I915_PRIORITY_NORMAL; find_priolist: /* most positive priority is scheduled first, equal priorities fifo */ rb = NULL; - parent = &execlists->queue.rb_root.rb_node; + parent = &sched_engine->queue.rb_root.rb_node; while (*parent) { rb = *parent; p = to_priolist(rb); @@ -92,9 +87,9 @@ find_priolist: } if (prio == I915_PRIORITY_NORMAL) { - p = &execlists->default_priolist; + p = &sched_engine->default_priolist; } else { - p = kmem_cache_alloc(global.slab_priorities, GFP_ATOMIC); + p = kmem_cache_alloc(slab_priorities, GFP_ATOMIC); /* Convert an allocation failure to a priority bump */ if (unlikely(!p)) { prio = I915_PRIORITY_NORMAL; /* recurses just once */ @@ -107,7 +102,7 @@ find_priolist: * requests, so if userspace lied about their * dependencies that reordering may be visible. */ - execlists->no_priolist = true; + sched_engine->no_priolist = true; goto find_priolist; } } @@ -116,27 +111,27 @@ find_priolist: INIT_LIST_HEAD(&p->requests); rb_link_node(&p->node, rb, parent); - rb_insert_color_cached(&p->node, &execlists->queue, first); + rb_insert_color_cached(&p->node, &sched_engine->queue, first); return &p->requests; } void __i915_priolist_free(struct i915_priolist *p) { - kmem_cache_free(global.slab_priorities, p); + kmem_cache_free(slab_priorities, p); } struct sched_cache { struct list_head *priolist; }; -static struct intel_engine_cs * -sched_lock_engine(const struct i915_sched_node *node, - struct intel_engine_cs *locked, +static struct i915_sched_engine * +lock_sched_engine(struct i915_sched_node *node, + struct i915_sched_engine *locked, struct sched_cache *cache) { const struct i915_request *rq = node_to_request(node); - struct intel_engine_cs *engine; + struct i915_sched_engine *sched_engine; GEM_BUG_ON(!locked); @@ -146,81 +141,22 @@ sched_lock_engine(const struct i915_sched_node *node, * engine lock. The simple ploy we use is to take the lock then * check that the rq still belongs to the newly locked engine. */ - while (locked != (engine = READ_ONCE(rq->engine))) { - spin_unlock(&locked->active.lock); + while (locked != (sched_engine = READ_ONCE(rq->engine)->sched_engine)) { + spin_unlock(&locked->lock); memset(cache, 0, sizeof(*cache)); - spin_lock(&engine->active.lock); - locked = engine; + spin_lock(&sched_engine->lock); + locked = sched_engine; } - GEM_BUG_ON(locked != engine); + GEM_BUG_ON(locked != sched_engine); return locked; } -static inline int rq_prio(const struct i915_request *rq) -{ - return rq->sched.attr.priority; -} - -static inline bool need_preempt(int prio, int active) -{ - /* - * Allow preemption of low -> normal -> high, but we do - * not allow low priority tasks to preempt other low priority - * tasks under the impression that latency for low priority - * tasks does not matter (as much as background throughput), - * so kiss. - */ - return prio >= max(I915_PRIORITY_NORMAL, active); -} - -static void kick_submission(struct intel_engine_cs *engine, - const struct i915_request *rq, - int prio) -{ - const struct i915_request *inflight; - - /* - * We only need to kick the tasklet once for the high priority - * new context we add into the queue. - */ - if (prio <= engine->execlists.queue_priority_hint) - return; - - rcu_read_lock(); - - /* Nothing currently active? We're overdue for a submission! */ - inflight = execlists_active(&engine->execlists); - if (!inflight) - goto unlock; - - /* - * If we are already the currently executing context, don't - * bother evaluating if we should preempt ourselves. - */ - if (inflight->context == rq->context) - goto unlock; - - ENGINE_TRACE(engine, - "bumping queue-priority-hint:%d for rq:%llx:%lld, inflight:%llx:%lld prio %d\n", - prio, - rq->fence.context, rq->fence.seqno, - inflight->fence.context, inflight->fence.seqno, - inflight->sched.attr.priority); - - engine->execlists.queue_priority_hint = prio; - if (need_preempt(prio, rq_prio(inflight))) - tasklet_hi_schedule(&engine->execlists.tasklet); - -unlock: - rcu_read_unlock(); -} - static void __i915_schedule(struct i915_sched_node *node, const struct i915_sched_attr *attr) { const int prio = max(attr->priority, node->attr.priority); - struct intel_engine_cs *engine; + struct i915_sched_engine *sched_engine; struct i915_dependency *dep, *p; struct i915_dependency stack; struct sched_cache cache; @@ -295,23 +231,31 @@ static void __i915_schedule(struct i915_sched_node *node, } memset(&cache, 0, sizeof(cache)); - engine = node_to_request(node)->engine; - spin_lock(&engine->active.lock); + sched_engine = node_to_request(node)->engine->sched_engine; + spin_lock(&sched_engine->lock); /* Fifo and depth-first replacement ensure our deps execute before us */ - engine = sched_lock_engine(node, engine, &cache); + sched_engine = lock_sched_engine(node, sched_engine, &cache); list_for_each_entry_safe_reverse(dep, p, &dfs, dfs_link) { + struct i915_request *from = container_of(dep->signaler, + struct i915_request, + sched); INIT_LIST_HEAD(&dep->dfs_link); node = dep->signaler; - engine = sched_lock_engine(node, engine, &cache); - lockdep_assert_held(&engine->active.lock); + sched_engine = lock_sched_engine(node, sched_engine, &cache); + lockdep_assert_held(&sched_engine->lock); /* Recheck after acquiring the engine->timeline.lock */ if (prio <= node->attr.priority || node_signaled(node)) continue; - GEM_BUG_ON(node_to_request(node)->engine != engine); + GEM_BUG_ON(node_to_request(node)->engine->sched_engine != + sched_engine); + + /* Must be called before changing the nodes priority */ + if (sched_engine->bump_inflight_request_prio) + sched_engine->bump_inflight_request_prio(from, prio); WRITE_ONCE(node->attr.priority, prio); @@ -329,16 +273,17 @@ static void __i915_schedule(struct i915_sched_node *node, if (i915_request_in_priority_queue(node_to_request(node))) { if (!cache.priolist) cache.priolist = - i915_sched_lookup_priolist(engine, + i915_sched_lookup_priolist(sched_engine, prio); list_move_tail(&node->link, cache.priolist); } /* Defer (tasklet) submission until after all of our updates. */ - kick_submission(engine, node_to_request(node), prio); + if (sched_engine->kick_backend) + sched_engine->kick_backend(node_to_request(node), prio); } - spin_unlock(&engine->active.lock); + spin_unlock(&sched_engine->lock); } void i915_schedule(struct i915_request *rq, const struct i915_sched_attr *attr) @@ -371,13 +316,13 @@ void i915_sched_node_reinit(struct i915_sched_node *node) static struct i915_dependency * i915_dependency_alloc(void) { - return kmem_cache_alloc(global.slab_dependencies, GFP_KERNEL); + return kmem_cache_alloc(slab_dependencies, GFP_KERNEL); } static void i915_dependency_free(struct i915_dependency *dep) { - kmem_cache_free(global.slab_dependencies, dep); + kmem_cache_free(slab_dependencies, dep); } bool __i915_sched_node_add_dependency(struct i915_sched_node *node, @@ -489,39 +434,78 @@ void i915_request_show_with_schedule(struct drm_printer *m, rcu_read_unlock(); } -static void i915_global_scheduler_shrink(void) +static void default_destroy(struct kref *kref) +{ + struct i915_sched_engine *sched_engine = + container_of(kref, typeof(*sched_engine), ref); + + tasklet_kill(&sched_engine->tasklet); /* flush the callback */ + kfree(sched_engine); +} + +static bool default_disabled(struct i915_sched_engine *sched_engine) { - kmem_cache_shrink(global.slab_dependencies); - kmem_cache_shrink(global.slab_priorities); + return false; } -static void i915_global_scheduler_exit(void) +struct i915_sched_engine * +i915_sched_engine_create(unsigned int subclass) { - kmem_cache_destroy(global.slab_dependencies); - kmem_cache_destroy(global.slab_priorities); + struct i915_sched_engine *sched_engine; + + sched_engine = kzalloc(sizeof(*sched_engine), GFP_KERNEL); + if (!sched_engine) + return NULL; + + kref_init(&sched_engine->ref); + + sched_engine->queue = RB_ROOT_CACHED; + sched_engine->queue_priority_hint = INT_MIN; + sched_engine->destroy = default_destroy; + sched_engine->disabled = default_disabled; + + INIT_LIST_HEAD(&sched_engine->requests); + INIT_LIST_HEAD(&sched_engine->hold); + + spin_lock_init(&sched_engine->lock); + lockdep_set_subclass(&sched_engine->lock, subclass); + + /* + * Due to an interesting quirk in lockdep's internal debug tracking, + * after setting a subclass we must ensure the lock is used. Otherwise, + * nr_unused_locks is incremented once too often. + */ +#ifdef CONFIG_DEBUG_LOCK_ALLOC + local_irq_disable(); + lock_map_acquire(&sched_engine->lock.dep_map); + lock_map_release(&sched_engine->lock.dep_map); + local_irq_enable(); +#endif + + return sched_engine; } -static struct i915_global_scheduler global = { { - .shrink = i915_global_scheduler_shrink, - .exit = i915_global_scheduler_exit, -} }; +void i915_scheduler_module_exit(void) +{ + kmem_cache_destroy(slab_dependencies); + kmem_cache_destroy(slab_priorities); +} -int __init i915_global_scheduler_init(void) +int __init i915_scheduler_module_init(void) { - global.slab_dependencies = KMEM_CACHE(i915_dependency, + slab_dependencies = KMEM_CACHE(i915_dependency, SLAB_HWCACHE_ALIGN | SLAB_TYPESAFE_BY_RCU); - if (!global.slab_dependencies) + if (!slab_dependencies) return -ENOMEM; - global.slab_priorities = KMEM_CACHE(i915_priolist, 0); - if (!global.slab_priorities) + slab_priorities = KMEM_CACHE(i915_priolist, 0); + if (!slab_priorities) goto err_priorities; - i915_global_register(&global.base); return 0; err_priorities: - kmem_cache_destroy(global.slab_priorities); + kmem_cache_destroy(slab_priorities); return -ENOMEM; } diff --git a/drivers/gpu/drm/i915/i915_scheduler.h b/drivers/gpu/drm/i915/i915_scheduler.h index 858a0938f47a..0b9b86af6c7f 100644 --- a/drivers/gpu/drm/i915/i915_scheduler.h +++ b/drivers/gpu/drm/i915/i915_scheduler.h @@ -39,7 +39,7 @@ void i915_schedule(struct i915_request *request, const struct i915_sched_attr *attr); struct list_head * -i915_sched_lookup_priolist(struct intel_engine_cs *engine, int prio); +i915_sched_lookup_priolist(struct i915_sched_engine *sched_engine, int prio); void __i915_priolist_free(struct i915_priolist *p); static inline void i915_priolist_free(struct i915_priolist *p) @@ -48,9 +48,61 @@ static inline void i915_priolist_free(struct i915_priolist *p) __i915_priolist_free(p); } +struct i915_sched_engine * +i915_sched_engine_create(unsigned int subclass); + +static inline struct i915_sched_engine * +i915_sched_engine_get(struct i915_sched_engine *sched_engine) +{ + kref_get(&sched_engine->ref); + return sched_engine; +} + +static inline void +i915_sched_engine_put(struct i915_sched_engine *sched_engine) +{ + kref_put(&sched_engine->ref, sched_engine->destroy); +} + +static inline bool +i915_sched_engine_is_empty(struct i915_sched_engine *sched_engine) +{ + return RB_EMPTY_ROOT(&sched_engine->queue.rb_root); +} + +static inline void +i915_sched_engine_reset_on_empty(struct i915_sched_engine *sched_engine) +{ + if (i915_sched_engine_is_empty(sched_engine)) + sched_engine->no_priolist = false; +} + +static inline void +i915_sched_engine_active_lock_bh(struct i915_sched_engine *sched_engine) +{ + local_bh_disable(); /* prevent local softirq and lock recursion */ + tasklet_lock(&sched_engine->tasklet); +} + +static inline void +i915_sched_engine_active_unlock_bh(struct i915_sched_engine *sched_engine) +{ + tasklet_unlock(&sched_engine->tasklet); + local_bh_enable(); /* restore softirq, and kick ksoftirqd! */ +} + void i915_request_show_with_schedule(struct drm_printer *m, const struct i915_request *rq, const char *prefix, int indent); +static inline bool +i915_sched_engine_disabled(struct i915_sched_engine *sched_engine) +{ + return sched_engine->disabled(sched_engine); +} + +void i915_scheduler_module_exit(void); +int i915_scheduler_module_init(void); + #endif /* _I915_SCHEDULER_H_ */ diff --git a/drivers/gpu/drm/i915/i915_scheduler_types.h b/drivers/gpu/drm/i915/i915_scheduler_types.h index 343ed44d5ed4..b0a1b58c7893 100644 --- a/drivers/gpu/drm/i915/i915_scheduler_types.h +++ b/drivers/gpu/drm/i915/i915_scheduler_types.h @@ -91,4 +91,115 @@ struct i915_dependency { &(rq__)->sched.signalers_list, \ signal_link) +/** + * struct i915_sched_engine - scheduler engine + * + * A schedule engine represents a submission queue with different priority + * bands. It contains all the common state (relative to the backend) to queue, + * track, and submit a request. + * + * This object at the moment is quite i915 specific but will transition into a + * container for the drm_gpu_scheduler plus a few other variables once the i915 + * is integrated with the DRM scheduler. + */ +struct i915_sched_engine { + /** + * @ref: reference count of schedule engine object + */ + struct kref ref; + + /** + * @lock: protects requests in priority lists, requests, hold and + * tasklet while running + */ + spinlock_t lock; + + /** + * @requests: list of requests inflight on this schedule engine + */ + struct list_head requests; + + /** + * @hold: list of ready requests, but on hold + */ + struct list_head hold; + + /** + * @tasklet: softirq tasklet for submission + */ + struct tasklet_struct tasklet; + + /** + * @default_priolist: priority list for I915_PRIORITY_NORMAL + */ + struct i915_priolist default_priolist; + + /** + * @queue_priority_hint: Highest pending priority. + * + * When we add requests into the queue, or adjust the priority of + * executing requests, we compute the maximum priority of those + * pending requests. We can then use this value to determine if + * we need to preempt the executing requests to service the queue. + * However, since the we may have recorded the priority of an inflight + * request we wanted to preempt but since completed, at the time of + * dequeuing the priority hint may no longer may match the highest + * available request priority. + */ + int queue_priority_hint; + + /** + * @queue: queue of requests, in priority lists + */ + struct rb_root_cached queue; + + /** + * @no_priolist: priority lists disabled + */ + bool no_priolist; + + /** + * @private_data: private data of the submission backend + */ + void *private_data; + + /** + * @destroy: destroy schedule engine / cleanup in backend + */ + void (*destroy)(struct kref *kref); + + /** + * @disabled: check if backend has disabled submission + */ + bool (*disabled)(struct i915_sched_engine *sched_engine); + + /** + * @kick_backend: kick backend after a request's priority has changed + */ + void (*kick_backend)(const struct i915_request *rq, + int prio); + + /** + * @bump_inflight_request_prio: update priority of an inflight request + */ + void (*bump_inflight_request_prio)(struct i915_request *rq, + int prio); + + /** + * @retire_inflight_request_prio: indicate request is retired to + * priority tracking + */ + void (*retire_inflight_request_prio)(struct i915_request *rq); + + /** + * @schedule: adjust priority of request + * + * Call when the priority on a request has changed and it and its + * dependencies may need rescheduling. Note the request itself may + * not be ready to run! + */ + void (*schedule)(struct i915_request *request, + const struct i915_sched_attr *attr); +}; + #endif /* _I915_SCHEDULER_TYPES_H_ */ diff --git a/drivers/gpu/drm/i915/i915_sw_fence_work.c b/drivers/gpu/drm/i915/i915_sw_fence_work.c index a3a81bb8f2c3..5b33ef23d54c 100644 --- a/drivers/gpu/drm/i915/i915_sw_fence_work.c +++ b/drivers/gpu/drm/i915/i915_sw_fence_work.c @@ -16,11 +16,8 @@ static void fence_complete(struct dma_fence_work *f) static void fence_work(struct work_struct *work) { struct dma_fence_work *f = container_of(work, typeof(*f), work); - int err; - err = f->ops->work(f); - if (err) - dma_fence_set_error(&f->dma, err); + f->ops->work(f); fence_complete(f); dma_fence_put(&f->dma); diff --git a/drivers/gpu/drm/i915/i915_sw_fence_work.h b/drivers/gpu/drm/i915/i915_sw_fence_work.h index 2c409f11c5c5..d56806918d13 100644 --- a/drivers/gpu/drm/i915/i915_sw_fence_work.h +++ b/drivers/gpu/drm/i915/i915_sw_fence_work.h @@ -17,7 +17,7 @@ struct dma_fence_work; struct dma_fence_work_ops { const char *name; - int (*work)(struct dma_fence_work *f); + void (*work)(struct dma_fence_work *f); void (*release)(struct dma_fence_work *f); }; diff --git a/drivers/gpu/drm/i915/i915_sysfs.c b/drivers/gpu/drm/i915/i915_sysfs.c index 873bf996ceb5..cdf0e9c6fd73 100644 --- a/drivers/gpu/drm/i915/i915_sysfs.c +++ b/drivers/gpu/drm/i915/i915_sysfs.c @@ -272,7 +272,7 @@ static ssize_t gt_cur_freq_mhz_show(struct device *kdev, struct drm_i915_private *i915 = kdev_minor_to_i915(kdev); struct intel_rps *rps = &i915->gt.rps; - return sysfs_emit(buf, "%d\n", intel_gpu_freq(rps, rps->cur_freq)); + return sysfs_emit(buf, "%d\n", intel_rps_get_requested_frequency(rps)); } static ssize_t gt_boost_freq_mhz_show(struct device *kdev, struct device_attribute *attr, char *buf) @@ -326,9 +326,10 @@ static ssize_t vlv_rpe_freq_mhz_show(struct device *kdev, static ssize_t gt_max_freq_mhz_show(struct device *kdev, struct device_attribute *attr, char *buf) { struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev); - struct intel_rps *rps = &dev_priv->gt.rps; + struct intel_gt *gt = &dev_priv->gt; + struct intel_rps *rps = >->rps; - return sysfs_emit(buf, "%d\n", intel_gpu_freq(rps, rps->max_freq_softlimit)); + return sysfs_emit(buf, "%d\n", intel_rps_get_max_frequency(rps)); } static ssize_t gt_max_freq_mhz_store(struct device *kdev, @@ -336,7 +337,8 @@ static ssize_t gt_max_freq_mhz_store(struct device *kdev, const char *buf, size_t count) { struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev); - struct intel_rps *rps = &dev_priv->gt.rps; + struct intel_gt *gt = &dev_priv->gt; + struct intel_rps *rps = >->rps; ssize_t ret; u32 val; @@ -344,53 +346,26 @@ static ssize_t gt_max_freq_mhz_store(struct device *kdev, if (ret) return ret; - mutex_lock(&rps->lock); - - val = intel_freq_opcode(rps, val); - if (val < rps->min_freq || - val > rps->max_freq || - val < rps->min_freq_softlimit) { - ret = -EINVAL; - goto unlock; - } - - if (val > rps->rp0_freq) - DRM_DEBUG("User requested overclocking to %d\n", - intel_gpu_freq(rps, val)); - - rps->max_freq_softlimit = val; - - val = clamp_t(int, rps->cur_freq, - rps->min_freq_softlimit, - rps->max_freq_softlimit); - - /* - * We still need *_set_rps to process the new max_delay and - * update the interrupt limits and PMINTRMSK even though - * frequency request may be unchanged. - */ - intel_rps_set(rps, val); - -unlock: - mutex_unlock(&rps->lock); + ret = intel_rps_set_max_frequency(rps, val); return ret ?: count; } static ssize_t gt_min_freq_mhz_show(struct device *kdev, struct device_attribute *attr, char *buf) { - struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev); - struct intel_rps *rps = &dev_priv->gt.rps; + struct drm_i915_private *i915 = kdev_minor_to_i915(kdev); + struct intel_gt *gt = &i915->gt; + struct intel_rps *rps = >->rps; - return sysfs_emit(buf, "%d\n", intel_gpu_freq(rps, rps->min_freq_softlimit)); + return sysfs_emit(buf, "%d\n", intel_rps_get_min_frequency(rps)); } static ssize_t gt_min_freq_mhz_store(struct device *kdev, struct device_attribute *attr, const char *buf, size_t count) { - struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev); - struct intel_rps *rps = &dev_priv->gt.rps; + struct drm_i915_private *i915 = kdev_minor_to_i915(kdev); + struct intel_rps *rps = &i915->gt.rps; ssize_t ret; u32 val; @@ -398,31 +373,7 @@ static ssize_t gt_min_freq_mhz_store(struct device *kdev, if (ret) return ret; - mutex_lock(&rps->lock); - - val = intel_freq_opcode(rps, val); - if (val < rps->min_freq || - val > rps->max_freq || - val > rps->max_freq_softlimit) { - ret = -EINVAL; - goto unlock; - } - - rps->min_freq_softlimit = val; - - val = clamp_t(int, rps->cur_freq, - rps->min_freq_softlimit, - rps->max_freq_softlimit); - - /* - * We still need *_set_rps to process the new min_delay and - * update the interrupt limits and PMINTRMSK even though - * frequency request may be unchanged. - */ - intel_rps_set(rps, val); - -unlock: - mutex_unlock(&rps->lock); + ret = intel_rps_set_min_frequency(rps, val); return ret ?: count; } @@ -448,11 +399,11 @@ static ssize_t gt_rp_mhz_show(struct device *kdev, struct device_attribute *attr u32 val; if (attr == &dev_attr_gt_RP0_freq_mhz) - val = intel_gpu_freq(rps, rps->rp0_freq); + val = intel_rps_get_rp0_frequency(rps); else if (attr == &dev_attr_gt_RP1_freq_mhz) - val = intel_gpu_freq(rps, rps->rp1_freq); + val = intel_rps_get_rp1_frequency(rps); else if (attr == &dev_attr_gt_RPn_freq_mhz) - val = intel_gpu_freq(rps, rps->min_freq); + val = intel_rps_get_rpn_frequency(rps); else BUG(); diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h index 6778ad2a14a4..806ad688274b 100644 --- a/drivers/gpu/drm/i915/i915_trace.h +++ b/drivers/gpu/drm/i915/i915_trace.h @@ -794,30 +794,40 @@ DECLARE_EVENT_CLASS(i915_request, TP_STRUCT__entry( __field(u32, dev) __field(u64, ctx) + __field(u32, guc_id) __field(u16, class) __field(u16, instance) __field(u32, seqno) + __field(u32, tail) ), TP_fast_assign( __entry->dev = rq->engine->i915->drm.primary->index; __entry->class = rq->engine->uabi_class; __entry->instance = rq->engine->uabi_instance; + __entry->guc_id = rq->context->guc_id; __entry->ctx = rq->fence.context; __entry->seqno = rq->fence.seqno; + __entry->tail = rq->tail; ), - TP_printk("dev=%u, engine=%u:%u, ctx=%llu, seqno=%u", + TP_printk("dev=%u, engine=%u:%u, guc_id=%u, ctx=%llu, seqno=%u, tail=%u", __entry->dev, __entry->class, __entry->instance, - __entry->ctx, __entry->seqno) + __entry->guc_id, __entry->ctx, __entry->seqno, + __entry->tail) ); DEFINE_EVENT(i915_request, i915_request_add, - TP_PROTO(struct i915_request *rq), - TP_ARGS(rq) + TP_PROTO(struct i915_request *rq), + TP_ARGS(rq) ); #if defined(CONFIG_DRM_I915_LOW_LEVEL_TRACEPOINTS) +DEFINE_EVENT(i915_request, i915_request_guc_submit, + TP_PROTO(struct i915_request *rq), + TP_ARGS(rq) +); + DEFINE_EVENT(i915_request, i915_request_submit, TP_PROTO(struct i915_request *rq), TP_ARGS(rq) @@ -885,9 +895,117 @@ TRACE_EVENT(i915_request_out, __entry->ctx, __entry->seqno, __entry->completed) ); +DECLARE_EVENT_CLASS(intel_context, + TP_PROTO(struct intel_context *ce), + TP_ARGS(ce), + + TP_STRUCT__entry( + __field(u32, guc_id) + __field(int, pin_count) + __field(u32, sched_state) + __field(u32, guc_sched_state_no_lock) + __field(u8, guc_prio) + ), + + TP_fast_assign( + __entry->guc_id = ce->guc_id; + __entry->pin_count = atomic_read(&ce->pin_count); + __entry->sched_state = ce->guc_state.sched_state; + __entry->guc_sched_state_no_lock = + atomic_read(&ce->guc_sched_state_no_lock); + __entry->guc_prio = ce->guc_prio; + ), + + TP_printk("guc_id=%d, pin_count=%d sched_state=0x%x,0x%x, guc_prio=%u", + __entry->guc_id, __entry->pin_count, + __entry->sched_state, + __entry->guc_sched_state_no_lock, + __entry->guc_prio) +); + +DEFINE_EVENT(intel_context, intel_context_set_prio, + TP_PROTO(struct intel_context *ce), + TP_ARGS(ce) +); + +DEFINE_EVENT(intel_context, intel_context_reset, + TP_PROTO(struct intel_context *ce), + TP_ARGS(ce) +); + +DEFINE_EVENT(intel_context, intel_context_ban, + TP_PROTO(struct intel_context *ce), + TP_ARGS(ce) +); + +DEFINE_EVENT(intel_context, intel_context_register, + TP_PROTO(struct intel_context *ce), + TP_ARGS(ce) +); + +DEFINE_EVENT(intel_context, intel_context_deregister, + TP_PROTO(struct intel_context *ce), + TP_ARGS(ce) +); + +DEFINE_EVENT(intel_context, intel_context_deregister_done, + TP_PROTO(struct intel_context *ce), + TP_ARGS(ce) +); + +DEFINE_EVENT(intel_context, intel_context_sched_enable, + TP_PROTO(struct intel_context *ce), + TP_ARGS(ce) +); + +DEFINE_EVENT(intel_context, intel_context_sched_disable, + TP_PROTO(struct intel_context *ce), + TP_ARGS(ce) +); + +DEFINE_EVENT(intel_context, intel_context_sched_done, + TP_PROTO(struct intel_context *ce), + TP_ARGS(ce) +); + +DEFINE_EVENT(intel_context, intel_context_create, + TP_PROTO(struct intel_context *ce), + TP_ARGS(ce) +); + +DEFINE_EVENT(intel_context, intel_context_fence_release, + TP_PROTO(struct intel_context *ce), + TP_ARGS(ce) +); + +DEFINE_EVENT(intel_context, intel_context_free, + TP_PROTO(struct intel_context *ce), + TP_ARGS(ce) +); + +DEFINE_EVENT(intel_context, intel_context_steal_guc_id, + TP_PROTO(struct intel_context *ce), + TP_ARGS(ce) +); + +DEFINE_EVENT(intel_context, intel_context_do_pin, + TP_PROTO(struct intel_context *ce), + TP_ARGS(ce) +); + +DEFINE_EVENT(intel_context, intel_context_do_unpin, + TP_PROTO(struct intel_context *ce), + TP_ARGS(ce) +); + #else #if !defined(TRACE_HEADER_MULTI_READ) static inline void +trace_i915_request_guc_submit(struct i915_request *rq) +{ +} + +static inline void trace_i915_request_submit(struct i915_request *rq) { } @@ -906,6 +1024,81 @@ static inline void trace_i915_request_out(struct i915_request *rq) { } + +static inline void +trace_intel_context_set_prio(struct intel_context *ce) +{ +} + +static inline void +trace_intel_context_reset(struct intel_context *ce) +{ +} + +static inline void +trace_intel_context_ban(struct intel_context *ce) +{ +} + +static inline void +trace_intel_context_register(struct intel_context *ce) +{ +} + +static inline void +trace_intel_context_deregister(struct intel_context *ce) +{ +} + +static inline void +trace_intel_context_deregister_done(struct intel_context *ce) +{ +} + +static inline void +trace_intel_context_sched_enable(struct intel_context *ce) +{ +} + +static inline void +trace_intel_context_sched_disable(struct intel_context *ce) +{ +} + +static inline void +trace_intel_context_sched_done(struct intel_context *ce) +{ +} + +static inline void +trace_intel_context_create(struct intel_context *ce) +{ +} + +static inline void +trace_intel_context_fence_release(struct intel_context *ce) +{ +} + +static inline void +trace_intel_context_free(struct intel_context *ce) +{ +} + +static inline void +trace_intel_context_steal_guc_id(struct intel_context *ce) +{ +} + +static inline void +trace_intel_context_do_pin(struct intel_context *ce) +{ +} + +static inline void +trace_intel_context_do_unpin(struct intel_context *ce) +{ +} #endif #endif diff --git a/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c b/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c new file mode 100644 index 000000000000..6877362f6b85 --- /dev/null +++ b/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c @@ -0,0 +1,258 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2021 Intel Corporation + */ + +#include <linux/slab.h> + +#include <drm/ttm/ttm_bo_driver.h> +#include <drm/ttm/ttm_placement.h> + +#include "i915_ttm_buddy_manager.h" + +#include "i915_buddy.h" +#include "i915_gem.h" + +struct i915_ttm_buddy_manager { + struct ttm_resource_manager manager; + struct i915_buddy_mm mm; + struct list_head reserved; + struct mutex lock; + u64 default_page_size; +}; + +static struct i915_ttm_buddy_manager * +to_buddy_manager(struct ttm_resource_manager *man) +{ + return container_of(man, struct i915_ttm_buddy_manager, manager); +} + +static int i915_ttm_buddy_man_alloc(struct ttm_resource_manager *man, + struct ttm_buffer_object *bo, + const struct ttm_place *place, + struct ttm_resource **res) +{ + struct i915_ttm_buddy_manager *bman = to_buddy_manager(man); + struct i915_ttm_buddy_resource *bman_res; + struct i915_buddy_mm *mm = &bman->mm; + unsigned long n_pages; + unsigned int min_order; + u64 min_page_size; + u64 size; + int err; + + GEM_BUG_ON(place->fpfn || place->lpfn); + + bman_res = kzalloc(sizeof(*bman_res), GFP_KERNEL); + if (!bman_res) + return -ENOMEM; + + ttm_resource_init(bo, place, &bman_res->base); + INIT_LIST_HEAD(&bman_res->blocks); + bman_res->mm = mm; + + GEM_BUG_ON(!bman_res->base.num_pages); + size = bman_res->base.num_pages << PAGE_SHIFT; + + min_page_size = bman->default_page_size; + if (bo->page_alignment) + min_page_size = bo->page_alignment << PAGE_SHIFT; + + GEM_BUG_ON(min_page_size < mm->chunk_size); + min_order = ilog2(min_page_size) - ilog2(mm->chunk_size); + if (place->flags & TTM_PL_FLAG_CONTIGUOUS) { + size = roundup_pow_of_two(size); + min_order = ilog2(size) - ilog2(mm->chunk_size); + } + + if (size > mm->size) { + err = -E2BIG; + goto err_free_res; + } + + n_pages = size >> ilog2(mm->chunk_size); + + do { + struct i915_buddy_block *block; + unsigned int order; + + order = fls(n_pages) - 1; + GEM_BUG_ON(order > mm->max_order); + GEM_BUG_ON(order < min_order); + + do { + mutex_lock(&bman->lock); + block = i915_buddy_alloc(mm, order); + mutex_unlock(&bman->lock); + if (!IS_ERR(block)) + break; + + if (order-- == min_order) { + err = -ENOSPC; + goto err_free_blocks; + } + } while (1); + + n_pages -= BIT(order); + + list_add_tail(&block->link, &bman_res->blocks); + + if (!n_pages) + break; + } while (1); + + *res = &bman_res->base; + return 0; + +err_free_blocks: + mutex_lock(&bman->lock); + i915_buddy_free_list(mm, &bman_res->blocks); + mutex_unlock(&bman->lock); +err_free_res: + kfree(bman_res); + return err; +} + +static void i915_ttm_buddy_man_free(struct ttm_resource_manager *man, + struct ttm_resource *res) +{ + struct i915_ttm_buddy_resource *bman_res = to_ttm_buddy_resource(res); + struct i915_ttm_buddy_manager *bman = to_buddy_manager(man); + + mutex_lock(&bman->lock); + i915_buddy_free_list(&bman->mm, &bman_res->blocks); + mutex_unlock(&bman->lock); + + kfree(bman_res); +} + +static const struct ttm_resource_manager_func i915_ttm_buddy_manager_func = { + .alloc = i915_ttm_buddy_man_alloc, + .free = i915_ttm_buddy_man_free, +}; + + +/** + * i915_ttm_buddy_man_init - Setup buddy allocator based ttm manager + * @bdev: The ttm device + * @type: Memory type we want to manage + * @use_tt: Set use_tt for the manager + * @size: The size in bytes to manage + * @default_page_size: The default minimum page size in bytes for allocations, + * this must be at least as large as @chunk_size, and can be overridden by + * setting the BO page_alignment, to be larger or smaller as needed. + * @chunk_size: The minimum page size in bytes for our allocations i.e + * order-zero + * + * Note that the starting address is assumed to be zero here, since this + * simplifies keeping the property where allocated blocks having natural + * power-of-two alignment. So long as the real starting address is some large + * power-of-two, or naturally start from zero, then this should be fine. Also + * the &i915_ttm_buddy_man_reserve interface can be used to preserve alignment + * if say there is some unusable range from the start of the region. We can + * revisit this in the future and make the interface accept an actual starting + * offset and let it take care of the rest. + * + * Note that if the @size is not aligned to the @chunk_size then we perform the + * required rounding to get the usable size. The final size in pages can be + * taken from &ttm_resource_manager.size. + * + * Return: 0 on success, negative error code on failure. + */ +int i915_ttm_buddy_man_init(struct ttm_device *bdev, + unsigned int type, bool use_tt, + u64 size, u64 default_page_size, + u64 chunk_size) +{ + struct ttm_resource_manager *man; + struct i915_ttm_buddy_manager *bman; + int err; + + bman = kzalloc(sizeof(*bman), GFP_KERNEL); + if (!bman) + return -ENOMEM; + + err = i915_buddy_init(&bman->mm, size, chunk_size); + if (err) + goto err_free_bman; + + mutex_init(&bman->lock); + INIT_LIST_HEAD(&bman->reserved); + GEM_BUG_ON(default_page_size < chunk_size); + bman->default_page_size = default_page_size; + + man = &bman->manager; + man->use_tt = use_tt; + man->func = &i915_ttm_buddy_manager_func; + ttm_resource_manager_init(man, bman->mm.size >> PAGE_SHIFT); + + ttm_resource_manager_set_used(man, true); + ttm_set_driver_manager(bdev, type, man); + + return 0; + +err_free_bman: + kfree(bman); + return err; +} + +/** + * i915_ttm_buddy_man_fini - Destroy the buddy allocator ttm manager + * @bdev: The ttm device + * @type: Memory type we want to manage + * + * Note that if we reserved anything with &i915_ttm_buddy_man_reserve, this will + * also be freed for us here. + * + * Return: 0 on success, negative error code on failure. + */ +int i915_ttm_buddy_man_fini(struct ttm_device *bdev, unsigned int type) +{ + struct ttm_resource_manager *man = ttm_manager_type(bdev, type); + struct i915_ttm_buddy_manager *bman = to_buddy_manager(man); + struct i915_buddy_mm *mm = &bman->mm; + int ret; + + ttm_resource_manager_set_used(man, false); + + ret = ttm_resource_manager_evict_all(bdev, man); + if (ret) + return ret; + + ttm_set_driver_manager(bdev, type, NULL); + + mutex_lock(&bman->lock); + i915_buddy_free_list(mm, &bman->reserved); + i915_buddy_fini(mm); + mutex_unlock(&bman->lock); + + ttm_resource_manager_cleanup(man); + kfree(bman); + + return 0; +} + +/** + * i915_ttm_buddy_man_reserve - Reserve address range + * @man: The buddy allocator ttm manager + * @start: The offset in bytes, where the region start is assumed to be zero + * @size: The size in bytes + * + * Note that the starting address for the region is always assumed to be zero. + * + * Return: 0 on success, negative error code on failure. + */ +int i915_ttm_buddy_man_reserve(struct ttm_resource_manager *man, + u64 start, u64 size) +{ + struct i915_ttm_buddy_manager *bman = to_buddy_manager(man); + struct i915_buddy_mm *mm = &bman->mm; + int ret; + + mutex_lock(&bman->lock); + ret = i915_buddy_alloc_range(mm, &bman->reserved, start, size); + mutex_unlock(&bman->lock); + + return ret; +} + diff --git a/drivers/gpu/drm/i915/i915_ttm_buddy_manager.h b/drivers/gpu/drm/i915/i915_ttm_buddy_manager.h new file mode 100644 index 000000000000..0722d33f3e14 --- /dev/null +++ b/drivers/gpu/drm/i915/i915_ttm_buddy_manager.h @@ -0,0 +1,56 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2021 Intel Corporation + */ + +#ifndef __I915_TTM_BUDDY_MANAGER_H__ +#define __I915_TTM_BUDDY_MANAGER_H__ + +#include <linux/list.h> +#include <linux/types.h> + +#include <drm/ttm/ttm_resource.h> + +struct ttm_device; +struct ttm_resource_manager; +struct i915_buddy_mm; + +/** + * struct i915_ttm_buddy_resource + * + * @base: struct ttm_resource base class we extend + * @blocks: the list of struct i915_buddy_block for this resource/allocation + * @mm: the struct i915_buddy_mm for this resource + * + * Extends the struct ttm_resource to manage an address space allocation with + * one or more struct i915_buddy_block. + */ +struct i915_ttm_buddy_resource { + struct ttm_resource base; + struct list_head blocks; + struct i915_buddy_mm *mm; +}; + +/** + * to_ttm_buddy_resource + * + * @res: the resource to upcast + * + * Upcast the struct ttm_resource object into a struct i915_ttm_buddy_resource. + */ +static inline struct i915_ttm_buddy_resource * +to_ttm_buddy_resource(struct ttm_resource *res) +{ + return container_of(res, struct i915_ttm_buddy_resource, base); +} + +int i915_ttm_buddy_man_init(struct ttm_device *bdev, + unsigned type, bool use_tt, + u64 size, u64 default_page_size, u64 chunk_size); +int i915_ttm_buddy_man_fini(struct ttm_device *bdev, + unsigned int type); + +int i915_ttm_buddy_man_reserve(struct ttm_resource_manager *man, + u64 start, u64 size); + +#endif diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index 0f227f28b280..4b7fc4647e46 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -34,24 +34,20 @@ #include "gt/intel_gt_requests.h" #include "i915_drv.h" -#include "i915_globals.h" #include "i915_sw_fence_work.h" #include "i915_trace.h" #include "i915_vma.h" -static struct i915_global_vma { - struct i915_global base; - struct kmem_cache *slab_vmas; -} global; +static struct kmem_cache *slab_vmas; struct i915_vma *i915_vma_alloc(void) { - return kmem_cache_zalloc(global.slab_vmas, GFP_KERNEL); + return kmem_cache_zalloc(slab_vmas, GFP_KERNEL); } void i915_vma_free(struct i915_vma *vma) { - return kmem_cache_free(global.slab_vmas, vma); + return kmem_cache_free(slab_vmas, vma); } #if IS_ENABLED(CONFIG_DRM_I915_ERRLOG_GEM) && IS_ENABLED(CONFIG_DRM_DEBUG_MM) @@ -300,14 +296,13 @@ struct i915_vma_work { unsigned int flags; }; -static int __vma_bind(struct dma_fence_work *work) +static void __vma_bind(struct dma_fence_work *work) { struct i915_vma_work *vw = container_of(work, typeof(*vw), base); struct i915_vma *vma = vw->vma; vma->ops->bind_vma(vw->vm, &vw->stash, vma, vw->cache_level, vw->flags); - return 0; } static void __vma_release(struct dma_fence_work *work) @@ -1415,27 +1410,16 @@ void i915_vma_make_purgeable(struct i915_vma *vma) #include "selftests/i915_vma.c" #endif -static void i915_global_vma_shrink(void) +void i915_vma_module_exit(void) { - kmem_cache_shrink(global.slab_vmas); + kmem_cache_destroy(slab_vmas); } -static void i915_global_vma_exit(void) -{ - kmem_cache_destroy(global.slab_vmas); -} - -static struct i915_global_vma global = { { - .shrink = i915_global_vma_shrink, - .exit = i915_global_vma_exit, -} }; - -int __init i915_global_vma_init(void) +int __init i915_vma_module_init(void) { - global.slab_vmas = KMEM_CACHE(i915_vma, SLAB_HWCACHE_ALIGN); - if (!global.slab_vmas) + slab_vmas = KMEM_CACHE(i915_vma, SLAB_HWCACHE_ALIGN); + if (!slab_vmas) return -ENOMEM; - i915_global_register(&global.base); return 0; } diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h index eca452a9851f..ed69f66c7ab0 100644 --- a/drivers/gpu/drm/i915/i915_vma.h +++ b/drivers/gpu/drm/i915/i915_vma.h @@ -426,4 +426,7 @@ static inline int i915_vma_sync(struct i915_vma *vma) return i915_active_wait(&vma->active); } +void i915_vma_module_exit(void); +int i915_vma_module_init(void); + #endif diff --git a/drivers/gpu/drm/i915/intel_device_info.c b/drivers/gpu/drm/i915/intel_device_info.c index e0a10f36acc1..305facedd284 100644 --- a/drivers/gpu/drm/i915/intel_device_info.c +++ b/drivers/gpu/drm/i915/intel_device_info.c @@ -59,7 +59,6 @@ static const char * const platform_names[] = { PLATFORM_NAME(GEMINILAKE), PLATFORM_NAME(COFFEELAKE), PLATFORM_NAME(COMETLAKE), - PLATFORM_NAME(CANNONLAKE), PLATFORM_NAME(ICELAKE), PLATFORM_NAME(ELKHARTLAKE), PLATFORM_NAME(JASPERLAKE), @@ -68,6 +67,8 @@ static const char * const platform_names[] = { PLATFORM_NAME(DG1), PLATFORM_NAME(ALDERLAKE_S), PLATFORM_NAME(ALDERLAKE_P), + PLATFORM_NAME(XEHPSDV), + PLATFORM_NAME(DG2), }; #undef PLATFORM_NAME @@ -96,9 +97,17 @@ static const char *iommu_name(void) void intel_device_info_print_static(const struct intel_device_info *info, struct drm_printer *p) { - drm_printf(p, "graphics_ver: %u\n", info->graphics_ver); - drm_printf(p, "media_ver: %u\n", info->media_ver); - drm_printf(p, "display_ver: %u\n", info->display.ver); + if (info->graphics_rel) + drm_printf(p, "graphics version: %u.%02u\n", info->graphics_ver, info->graphics_rel); + else + drm_printf(p, "graphics version: %u\n", info->graphics_ver); + + if (info->media_rel) + drm_printf(p, "media version: %u.%02u\n", info->media_ver, info->media_rel); + else + drm_printf(p, "media version: %u\n", info->media_ver); + + drm_printf(p, "display version: %u\n", info->display.ver); drm_printf(p, "gt: %d\n", info->gt); drm_printf(p, "iommu: %s\n", iommu_name()); drm_printf(p, "memory-regions: %x\n", info->memory_regions); @@ -165,7 +174,6 @@ static const u16 subplatform_ulx_ids[] = { }; static const u16 subplatform_portf_ids[] = { - INTEL_CNL_PORT_F_IDS(0), INTEL_ICL_PORT_F_IDS(0), }; @@ -253,14 +261,14 @@ void intel_device_info_runtime_init(struct drm_i915_private *dev_priv) struct intel_runtime_info *runtime = RUNTIME_INFO(dev_priv); enum pipe pipe; - /* Wa_14011765242: adl-s A0 */ - if (IS_ADLS_DISPLAY_STEP(dev_priv, STEP_A0, STEP_A0)) + /* Wa_14011765242: adl-s A0,A1 */ + if (IS_ADLS_DISPLAY_STEP(dev_priv, STEP_A0, STEP_A2)) for_each_pipe(dev_priv, pipe) runtime->num_scalers[pipe] = 0; - else if (GRAPHICS_VER(dev_priv) >= 10) { + else if (DISPLAY_VER(dev_priv) >= 11) { for_each_pipe(dev_priv, pipe) runtime->num_scalers[pipe] = 2; - } else if (GRAPHICS_VER(dev_priv) == 9) { + } else if (DISPLAY_VER(dev_priv) >= 9) { runtime->num_scalers[PIPE_A] = 2; runtime->num_scalers[PIPE_B] = 2; runtime->num_scalers[PIPE_C] = 1; @@ -271,10 +279,10 @@ void intel_device_info_runtime_init(struct drm_i915_private *dev_priv) if (DISPLAY_VER(dev_priv) >= 13 || HAS_D12_PLANE_MINIMIZATION(dev_priv)) for_each_pipe(dev_priv, pipe) runtime->num_sprites[pipe] = 4; - else if (GRAPHICS_VER(dev_priv) >= 11) + else if (DISPLAY_VER(dev_priv) >= 11) for_each_pipe(dev_priv, pipe) runtime->num_sprites[pipe] = 6; - else if (GRAPHICS_VER(dev_priv) == 10 || IS_GEMINILAKE(dev_priv)) + else if (DISPLAY_VER(dev_priv) == 10) for_each_pipe(dev_priv, pipe) runtime->num_sprites[pipe] = 3; else if (IS_BROXTON(dev_priv)) { @@ -293,7 +301,7 @@ void intel_device_info_runtime_init(struct drm_i915_private *dev_priv) } else if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) { for_each_pipe(dev_priv, pipe) runtime->num_sprites[pipe] = 2; - } else if (GRAPHICS_VER(dev_priv) >= 5 || IS_G4X(dev_priv)) { + } else if (DISPLAY_VER(dev_priv) >= 5 || IS_G4X(dev_priv)) { for_each_pipe(dev_priv, pipe) runtime->num_sprites[pipe] = 1; } @@ -357,7 +365,7 @@ void intel_device_info_runtime_init(struct drm_i915_private *dev_priv) info->display.has_dmc = 0; if (DISPLAY_VER(dev_priv) >= 10 && - (dfsm & CNL_DFSM_DISPLAY_DSC_DISABLE)) + (dfsm & GLK_DFSM_DISPLAY_DSC_DISABLE)) info->display.has_dsc = 0; } diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h index b326aff65cd6..d328bb95c49b 100644 --- a/drivers/gpu/drm/i915/intel_device_info.h +++ b/drivers/gpu/drm/i915/intel_device_info.h @@ -76,8 +76,6 @@ enum intel_platform { INTEL_GEMINILAKE, INTEL_COFFEELAKE, INTEL_COMETLAKE, - /* gen10 */ - INTEL_CANNONLAKE, /* gen11 */ INTEL_ICELAKE, INTEL_ELKHARTLAKE, @@ -88,6 +86,8 @@ enum intel_platform { INTEL_DG1, INTEL_ALDERLAKE_S, INTEL_ALDERLAKE_P, + INTEL_XEHPSDV, + INTEL_DG2, INTEL_MAX_PLATFORMS }; @@ -103,9 +103,13 @@ enum intel_platform { #define INTEL_SUBPLATFORM_ULT (0) #define INTEL_SUBPLATFORM_ULX (1) -/* CNL/ICL */ +/* ICL */ #define INTEL_SUBPLATFORM_PORTF (0) +/* DG2 */ +#define INTEL_SUBPLATFORM_G10 0 +#define INTEL_SUBPLATFORM_G11 1 + enum intel_ppgtt_type { INTEL_PPGTT_NONE = I915_GEM_PPGTT_NONE, INTEL_PPGTT_ALIASING = I915_GEM_PPGTT_ALIASING, @@ -127,7 +131,7 @@ enum intel_ppgtt_type { func(has_llc); \ func(has_logical_ring_contexts); \ func(has_logical_ring_elsq); \ - func(has_master_unit_irq); \ + func(has_mslices); \ func(has_pooled_eu); \ func(has_rc6); \ func(has_rc6p); \ @@ -141,6 +145,7 @@ enum intel_ppgtt_type { #define DEV_INFO_DISPLAY_FOR_EACH_FLAG(func) \ /* Keep in alphabetical order */ \ func(cursor_needs_physical); \ + func(has_cdclk_crawl); \ func(has_dmc); \ func(has_ddi); \ func(has_dp_mst); \ @@ -162,9 +167,10 @@ enum intel_ppgtt_type { struct intel_device_info { u8 graphics_ver; + u8 graphics_rel; u8 media_ver; + u8 media_rel; - u8 gt; /* GT number, 0 if undefined */ intel_engine_mask_t platform_engine_mask; /* Engines supported by the HW */ enum intel_platform platform; @@ -180,13 +186,13 @@ struct intel_device_info { u32 display_mmio_offset; + u8 gt; /* GT number, 0 if undefined */ + u8 pipe_mask; u8 cpu_transcoder_mask; u8 abox_mask; - u8 has_cdclk_crawl; /* does support CDCLK crawling */ - #define DEFINE_FLAG(name) u8 name:1 DEV_INFO_FOR_EACH_FLAG(DEFINE_FLAG); #undef DEFINE_FLAG diff --git a/drivers/gpu/drm/i915/intel_dram.c b/drivers/gpu/drm/i915/intel_dram.c index 50fdea84ba70..91866520c173 100644 --- a/drivers/gpu/drm/i915/intel_dram.c +++ b/drivers/gpu/drm/i915/intel_dram.c @@ -77,21 +77,21 @@ static int skl_get_dimm_ranks(u16 val) } /* Returns total Gb for the whole DIMM */ -static int cnl_get_dimm_size(u16 val) +static int icl_get_dimm_size(u16 val) { - return (val & CNL_DRAM_SIZE_MASK) * 8 / 2; + return (val & ICL_DRAM_SIZE_MASK) * 8 / 2; } -static int cnl_get_dimm_width(u16 val) +static int icl_get_dimm_width(u16 val) { - if (cnl_get_dimm_size(val) == 0) + if (icl_get_dimm_size(val) == 0) return 0; - switch (val & CNL_DRAM_WIDTH_MASK) { - case CNL_DRAM_WIDTH_X8: - case CNL_DRAM_WIDTH_X16: - case CNL_DRAM_WIDTH_X32: - val = (val & CNL_DRAM_WIDTH_MASK) >> CNL_DRAM_WIDTH_SHIFT; + switch (val & ICL_DRAM_WIDTH_MASK) { + case ICL_DRAM_WIDTH_X8: + case ICL_DRAM_WIDTH_X16: + case ICL_DRAM_WIDTH_X32: + val = (val & ICL_DRAM_WIDTH_MASK) >> ICL_DRAM_WIDTH_SHIFT; return 8 << val; default: MISSING_CASE(val); @@ -99,12 +99,12 @@ static int cnl_get_dimm_width(u16 val) } } -static int cnl_get_dimm_ranks(u16 val) +static int icl_get_dimm_ranks(u16 val) { - if (cnl_get_dimm_size(val) == 0) + if (icl_get_dimm_size(val) == 0) return 0; - val = (val & CNL_DRAM_RANK_MASK) >> CNL_DRAM_RANK_SHIFT; + val = (val & ICL_DRAM_RANK_MASK) >> ICL_DRAM_RANK_SHIFT; return val + 1; } @@ -121,10 +121,10 @@ skl_dram_get_dimm_info(struct drm_i915_private *i915, struct dram_dimm_info *dimm, int channel, char dimm_name, u16 val) { - if (GRAPHICS_VER(i915) >= 10) { - dimm->size = cnl_get_dimm_size(val); - dimm->width = cnl_get_dimm_width(val); - dimm->ranks = cnl_get_dimm_ranks(val); + if (GRAPHICS_VER(i915) >= 11) { + dimm->size = icl_get_dimm_size(val); + dimm->width = icl_get_dimm_width(val); + dimm->ranks = icl_get_dimm_ranks(val); } else { dimm->size = skl_get_dimm_size(val); dimm->width = skl_get_dimm_width(val); @@ -468,6 +468,7 @@ static int icl_pcode_read_mem_global_info(struct drm_i915_private *dev_priv) dram_info->num_channels = (val & 0xf0) >> 4; dram_info->num_qgv_points = (val & 0xf00) >> 8; + dram_info->num_psf_gv_points = (val & 0x3000) >> 12; return 0; } @@ -484,8 +485,7 @@ static int gen11_get_dram_info(struct drm_i915_private *i915) static int gen12_get_dram_info(struct drm_i915_private *i915) { - /* Always needed for GEN12+ */ - i915->dram_info.wm_lv_0_adjust_needed = true; + i915->dram_info.wm_lv_0_adjust_needed = false; return icl_pcode_read_mem_global_info(i915); } @@ -495,15 +495,15 @@ void intel_dram_detect(struct drm_i915_private *i915) struct dram_info *dram_info = &i915->dram_info; int ret; + if (GRAPHICS_VER(i915) < 9 || IS_DG2(i915) || !HAS_DISPLAY(i915)) + return; + /* * Assume level 0 watermark latency adjustment is needed until proven * otherwise, this w/a is not needed by bxt/glk. */ dram_info->wm_lv_0_adjust_needed = !IS_GEN9_LP(i915); - if (GRAPHICS_VER(i915) < 9 || !HAS_DISPLAY(i915)) - return; - if (GRAPHICS_VER(i915) >= 12) ret = gen12_get_dram_info(i915); else if (GRAPHICS_VER(i915) >= 11) diff --git a/drivers/gpu/drm/i915/intel_memory_region.c b/drivers/gpu/drm/i915/intel_memory_region.c index e6024eb7cca4..779eb2fa90b6 100644 --- a/drivers/gpu/drm/i915/intel_memory_region.c +++ b/drivers/gpu/drm/i915/intel_memory_region.c @@ -5,6 +5,7 @@ #include "intel_memory_region.h" #include "i915_drv.h" +#include "i915_ttm_buddy_manager.h" static const struct { u16 class; @@ -28,11 +29,6 @@ static const struct { }, }; -struct intel_region_reserve { - struct list_head link; - struct ttm_resource *res; -}; - struct intel_memory_region * intel_memory_region_lookup(struct drm_i915_private *i915, u16 class, u16 instance) @@ -64,27 +60,6 @@ intel_memory_region_by_type(struct drm_i915_private *i915, } /** - * intel_memory_region_unreserve - Unreserve all previously reserved - * ranges - * @mem: The region containing the reserved ranges. - */ -void intel_memory_region_unreserve(struct intel_memory_region *mem) -{ - struct intel_region_reserve *reserve, *next; - - if (!mem->priv_ops || !mem->priv_ops->free) - return; - - mutex_lock(&mem->mm_lock); - list_for_each_entry_safe(reserve, next, &mem->reserved, link) { - list_del(&reserve->link); - mem->priv_ops->free(mem, reserve->res); - kfree(reserve); - } - mutex_unlock(&mem->mm_lock); -} - -/** * intel_memory_region_reserve - Reserve a memory range * @mem: The region for which we want to reserve a range. * @offset: Start of the range to reserve. @@ -96,28 +71,11 @@ int intel_memory_region_reserve(struct intel_memory_region *mem, resource_size_t offset, resource_size_t size) { - int ret; - struct intel_region_reserve *reserve; - - if (!mem->priv_ops || !mem->priv_ops->reserve) - return -EINVAL; - - reserve = kzalloc(sizeof(*reserve), GFP_KERNEL); - if (!reserve) - return -ENOMEM; + struct ttm_resource_manager *man = mem->region_private; - reserve->res = mem->priv_ops->reserve(mem, offset, size); - if (IS_ERR(reserve->res)) { - ret = PTR_ERR(reserve->res); - kfree(reserve); - return ret; - } - - mutex_lock(&mem->mm_lock); - list_add_tail(&reserve->link, &mem->reserved); - mutex_unlock(&mem->mm_lock); + GEM_BUG_ON(mem->is_range_manager); - return 0; + return i915_ttm_buddy_man_reserve(man, offset, size); } struct intel_memory_region * @@ -149,10 +107,6 @@ intel_memory_region_create(struct drm_i915_private *i915, mutex_init(&mem->objects.lock); INIT_LIST_HEAD(&mem->objects.list); - INIT_LIST_HEAD(&mem->objects.purgeable); - INIT_LIST_HEAD(&mem->reserved); - - mutex_init(&mem->mm_lock); if (ops->init) { err = ops->init(mem); @@ -183,11 +137,9 @@ static void __intel_memory_region_destroy(struct kref *kref) struct intel_memory_region *mem = container_of(kref, typeof(*mem), kref); - intel_memory_region_unreserve(mem); if (mem->ops->release) mem->ops->release(mem); - mutex_destroy(&mem->mm_lock); mutex_destroy(&mem->objects.lock); kfree(mem); } @@ -221,7 +173,12 @@ int intel_memory_regions_hw_probe(struct drm_i915_private *i915) instance = intel_region_map[i].instance; switch (type) { case INTEL_MEMORY_SYSTEM: - mem = i915_gem_shmem_setup(i915, type, instance); + if (IS_DGFX(i915)) + mem = i915_gem_ttm_system_setup(i915, type, + instance); + else + mem = i915_gem_shmem_setup(i915, type, + instance); break; case INTEL_MEMORY_STOLEN_LOCAL: mem = i915_gem_stolen_lmem_setup(i915, type, instance); diff --git a/drivers/gpu/drm/i915/intel_memory_region.h b/drivers/gpu/drm/i915/intel_memory_region.h index 1f7dac63abb7..1f2b96efa69d 100644 --- a/drivers/gpu/drm/i915/intel_memory_region.h +++ b/drivers/gpu/drm/i915/intel_memory_region.h @@ -40,8 +40,7 @@ enum intel_region_id { #define REGION_STOLEN_SMEM BIT(INTEL_REGION_STOLEN_SMEM) #define REGION_STOLEN_LMEM BIT(INTEL_REGION_STOLEN_LMEM) -#define I915_ALLOC_MIN_PAGE_SIZE BIT(0) -#define I915_ALLOC_CONTIGUOUS BIT(1) +#define I915_ALLOC_CONTIGUOUS BIT(0) #define for_each_memory_region(mr, i915, id) \ for (id = 0; id < ARRAY_SIZE((i915)->mm.regions); id++) \ @@ -56,22 +55,14 @@ struct intel_memory_region_ops { int (*init_object)(struct intel_memory_region *mem, struct drm_i915_gem_object *obj, resource_size_t size, + resource_size_t page_size, unsigned int flags); }; -struct intel_memory_region_private_ops { - struct ttm_resource *(*reserve)(struct intel_memory_region *mem, - resource_size_t offset, - resource_size_t size); - void (*free)(struct intel_memory_region *mem, - struct ttm_resource *res); -}; - struct intel_memory_region { struct drm_i915_private *i915; const struct intel_memory_region_ops *ops; - const struct intel_memory_region_private_ops *priv_ops; struct io_mapping iomap; struct resource region; @@ -79,8 +70,6 @@ struct intel_memory_region { /* For fake LMEM */ struct drm_mm_node fake_mappable; - struct mutex mm_lock; - struct kref kref; resource_size_t io_start; @@ -94,18 +83,13 @@ struct intel_memory_region { char name[16]; bool private; /* not for userspace */ - struct list_head reserved; - dma_addr_t remap_addr; struct { struct mutex lock; /* Protects access to objects */ struct list_head list; - struct list_head purgeable; } objects; - size_t chunk_size; - unsigned int max_order; bool is_range_manager; void *region_private; @@ -139,9 +123,15 @@ __printf(2, 3) void intel_memory_region_set_name(struct intel_memory_region *mem, const char *fmt, ...); -void intel_memory_region_unreserve(struct intel_memory_region *mem); - int intel_memory_region_reserve(struct intel_memory_region *mem, resource_size_t offset, resource_size_t size); + +struct intel_memory_region * +i915_gem_ttm_system_setup(struct drm_i915_private *i915, + u16 type, u16 instance); +struct intel_memory_region * +i915_gem_shmem_setup(struct drm_i915_private *i915, + u16 type, u16 instance); + #endif diff --git a/drivers/gpu/drm/i915/intel_pch.c b/drivers/gpu/drm/i915/intel_pch.c index 4e92ae19189e..d1d4b97b86f5 100644 --- a/drivers/gpu/drm/i915/intel_pch.c +++ b/drivers/gpu/drm/i915/intel_pch.c @@ -81,7 +81,6 @@ intel_pch_type(const struct drm_i915_private *dev_priv, unsigned short id) case INTEL_PCH_CNP_DEVICE_ID_TYPE: drm_dbg_kms(&dev_priv->drm, "Found Cannon Lake PCH (CNP)\n"); drm_WARN_ON(&dev_priv->drm, - !IS_CANNONLAKE(dev_priv) && !IS_COFFEELAKE(dev_priv) && !IS_COMETLAKE(dev_priv)); return PCH_CNP; @@ -89,7 +88,6 @@ intel_pch_type(const struct drm_i915_private *dev_priv, unsigned short id) drm_dbg_kms(&dev_priv->drm, "Found Cannon Lake LP PCH (CNP-LP)\n"); drm_WARN_ON(&dev_priv->drm, - !IS_CANNONLAKE(dev_priv) && !IS_COFFEELAKE(dev_priv) && !IS_COMETLAKE(dev_priv)); return PCH_CNP; @@ -171,8 +169,7 @@ intel_virt_detect_pch(const struct drm_i915_private *dev_priv, id = INTEL_PCH_MCC_DEVICE_ID_TYPE; else if (IS_ICELAKE(dev_priv)) id = INTEL_PCH_ICP_DEVICE_ID_TYPE; - else if (IS_CANNONLAKE(dev_priv) || - IS_COFFEELAKE(dev_priv) || + else if (IS_COFFEELAKE(dev_priv) || IS_COMETLAKE(dev_priv)) id = INTEL_PCH_CNP_DEVICE_ID_TYPE; else if (IS_KABYLAKE(dev_priv) || IS_SKYLAKE(dev_priv)) @@ -211,6 +208,9 @@ void intel_detect_pch(struct drm_i915_private *dev_priv) if (IS_DG1(dev_priv)) { dev_priv->pch_type = PCH_DG1; return; + } else if (IS_DG2(dev_priv)) { + dev_priv->pch_type = PCH_DG2; + return; } /* diff --git a/drivers/gpu/drm/i915/intel_pch.h b/drivers/gpu/drm/i915/intel_pch.h index e2f3f30c6445..7c0d83d292dc 100644 --- a/drivers/gpu/drm/i915/intel_pch.h +++ b/drivers/gpu/drm/i915/intel_pch.h @@ -30,6 +30,7 @@ enum intel_pch { /* Fake PCHs, functionality handled on the same PCI dev */ PCH_DG1 = 1024, + PCH_DG2, }; #define INTEL_PCH_DEVICE_ID_MASK 0xff80 @@ -62,6 +63,7 @@ enum intel_pch { #define INTEL_PCH_TYPE(dev_priv) ((dev_priv)->pch_type) #define INTEL_PCH_ID(dev_priv) ((dev_priv)->pch_id) +#define HAS_PCH_DG2(dev_priv) (INTEL_PCH_TYPE(dev_priv) == PCH_DG2) #define HAS_PCH_ADP(dev_priv) (INTEL_PCH_TYPE(dev_priv) == PCH_ADP) #define HAS_PCH_DG1(dev_priv) (INTEL_PCH_TYPE(dev_priv) == PCH_DG1) #define HAS_PCH_JSP(dev_priv) (INTEL_PCH_TYPE(dev_priv) == PCH_JSP) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 45fefa0ed160..65bc3709f54c 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -1370,11 +1370,11 @@ static bool g4x_compute_fbc_en(const struct g4x_wm_state *wm_state, return true; } -static int g4x_compute_pipe_wm(struct intel_crtc_state *crtc_state) +static int g4x_compute_pipe_wm(struct intel_atomic_state *state, + struct intel_crtc *crtc) { - struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); - struct intel_atomic_state *state = - to_intel_atomic_state(crtc_state->uapi.state); + struct intel_crtc_state *crtc_state = + intel_atomic_get_new_crtc_state(state, crtc); struct g4x_wm_state *wm_state = &crtc_state->wm.g4x.optimal; int num_active_planes = hweight8(crtc_state->active_planes & ~BIT(PLANE_CURSOR)); @@ -1451,20 +1451,21 @@ static int g4x_compute_pipe_wm(struct intel_crtc_state *crtc_state) return 0; } -static int g4x_compute_intermediate_wm(struct intel_crtc_state *new_crtc_state) +static int g4x_compute_intermediate_wm(struct intel_atomic_state *state, + struct intel_crtc *crtc) { - struct intel_crtc *crtc = to_intel_crtc(new_crtc_state->uapi.crtc); struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + struct intel_crtc_state *new_crtc_state = + intel_atomic_get_new_crtc_state(state, crtc); + const struct intel_crtc_state *old_crtc_state = + intel_atomic_get_old_crtc_state(state, crtc); struct g4x_wm_state *intermediate = &new_crtc_state->wm.g4x.intermediate; const struct g4x_wm_state *optimal = &new_crtc_state->wm.g4x.optimal; - struct intel_atomic_state *intel_state = - to_intel_atomic_state(new_crtc_state->uapi.state); - const struct intel_crtc_state *old_crtc_state = - intel_atomic_get_old_crtc_state(intel_state, crtc); const struct g4x_wm_state *active = &old_crtc_state->wm.g4x.optimal; enum plane_id plane_id; - if (!new_crtc_state->hw.active || drm_atomic_crtc_needs_modeset(&new_crtc_state->uapi)) { + if (!new_crtc_state->hw.active || + drm_atomic_crtc_needs_modeset(&new_crtc_state->uapi)) { *intermediate = *optimal; intermediate->cxsr = false; @@ -1890,12 +1891,12 @@ static bool vlv_raw_crtc_wm_is_valid(const struct intel_crtc_state *crtc_state, vlv_raw_plane_wm_is_valid(crtc_state, PLANE_CURSOR, level); } -static int vlv_compute_pipe_wm(struct intel_crtc_state *crtc_state) +static int vlv_compute_pipe_wm(struct intel_atomic_state *state, + struct intel_crtc *crtc) { - struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); - struct intel_atomic_state *state = - to_intel_atomic_state(crtc_state->uapi.state); + struct intel_crtc_state *crtc_state = + intel_atomic_get_new_crtc_state(state, crtc); struct vlv_wm_state *wm_state = &crtc_state->wm.vlv.optimal; const struct vlv_fifo_state *fifo_state = &crtc_state->wm.vlv.fifo_state; @@ -2095,19 +2096,20 @@ static void vlv_atomic_update_fifo(struct intel_atomic_state *state, #undef VLV_FIFO -static int vlv_compute_intermediate_wm(struct intel_crtc_state *new_crtc_state) +static int vlv_compute_intermediate_wm(struct intel_atomic_state *state, + struct intel_crtc *crtc) { - struct intel_crtc *crtc = to_intel_crtc(new_crtc_state->uapi.crtc); + struct intel_crtc_state *new_crtc_state = + intel_atomic_get_new_crtc_state(state, crtc); + const struct intel_crtc_state *old_crtc_state = + intel_atomic_get_old_crtc_state(state, crtc); struct vlv_wm_state *intermediate = &new_crtc_state->wm.vlv.intermediate; const struct vlv_wm_state *optimal = &new_crtc_state->wm.vlv.optimal; - struct intel_atomic_state *intel_state = - to_intel_atomic_state(new_crtc_state->uapi.state); - const struct intel_crtc_state *old_crtc_state = - intel_atomic_get_old_crtc_state(intel_state, crtc); const struct vlv_wm_state *active = &old_crtc_state->wm.vlv.optimal; int level; - if (!new_crtc_state->hw.active || drm_atomic_crtc_needs_modeset(&new_crtc_state->uapi)) { + if (!new_crtc_state->hw.active || + drm_atomic_crtc_needs_modeset(&new_crtc_state->uapi)) { *intermediate = *optimal; intermediate->cxsr = false; @@ -2906,24 +2908,25 @@ static void intel_read_wm_latency(struct drm_i915_private *dev_priv, if (wm[level] == 0) { for (i = level + 1; i <= max_level; i++) wm[i] = 0; + + max_level = level - 1; + break; } } /* - * WaWmMemoryReadLatency:skl+,glk + * WaWmMemoryReadLatency * * punit doesn't take into account the read latency so we need - * to add 2us to the various latency levels we retrieve from the - * punit when level 0 response data us 0us. + * to add proper adjustement to each valid level we retrieve + * from the punit when level 0 response data is 0us. */ if (wm[0] == 0) { - wm[0] += 2; - for (level = 1; level <= max_level; level++) { - if (wm[level] == 0) - break; - wm[level] += 2; - } + u8 adjust = DISPLAY_VER(dev_priv) >= 12 ? 3 : 2; + + for (level = 0; level <= max_level; level++) + wm[level] += adjust; } /* @@ -2934,7 +2937,6 @@ static void intel_read_wm_latency(struct drm_i915_private *dev_priv, */ if (dev_priv->dram_info.wm_lv_0_adjust_needed) wm[0] += 1; - } else if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) { u64 sskpd = intel_uncore_read64(uncore, MCH_SSKPD); @@ -3144,10 +3146,12 @@ static bool ilk_validate_pipe_wm(const struct drm_i915_private *dev_priv, } /* Compute new watermarks for the pipe */ -static int ilk_compute_pipe_wm(struct intel_crtc_state *crtc_state) +static int ilk_compute_pipe_wm(struct intel_atomic_state *state, + struct intel_crtc *crtc) { - struct drm_i915_private *dev_priv = to_i915(crtc_state->uapi.crtc->dev); - struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); + struct drm_i915_private *dev_priv = to_i915(state->base.dev); + struct intel_crtc_state *crtc_state = + intel_atomic_get_new_crtc_state(state, crtc); struct intel_pipe_wm *pipe_wm; struct intel_plane *plane; const struct intel_plane_state *plane_state; @@ -3220,16 +3224,16 @@ static int ilk_compute_pipe_wm(struct intel_crtc_state *crtc_state) * state and the new state. These can be programmed to the hardware * immediately. */ -static int ilk_compute_intermediate_wm(struct intel_crtc_state *newstate) -{ - struct intel_crtc *intel_crtc = to_intel_crtc(newstate->uapi.crtc); - struct drm_i915_private *dev_priv = to_i915(intel_crtc->base.dev); - struct intel_pipe_wm *a = &newstate->wm.ilk.intermediate; - struct intel_atomic_state *intel_state = - to_intel_atomic_state(newstate->uapi.state); - const struct intel_crtc_state *oldstate = - intel_atomic_get_old_crtc_state(intel_state, intel_crtc); - const struct intel_pipe_wm *b = &oldstate->wm.ilk.optimal; +static int ilk_compute_intermediate_wm(struct intel_atomic_state *state, + struct intel_crtc *crtc) +{ + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + struct intel_crtc_state *new_crtc_state = + intel_atomic_get_new_crtc_state(state, crtc); + const struct intel_crtc_state *old_crtc_state = + intel_atomic_get_old_crtc_state(state, crtc); + struct intel_pipe_wm *a = &new_crtc_state->wm.ilk.intermediate; + const struct intel_pipe_wm *b = &old_crtc_state->wm.ilk.optimal; int level, max_level = ilk_wm_max_level(dev_priv); /* @@ -3237,9 +3241,10 @@ static int ilk_compute_intermediate_wm(struct intel_crtc_state *newstate) * currently active watermarks to get values that are safe both before * and after the vblank. */ - *a = newstate->wm.ilk.optimal; - if (!newstate->hw.active || drm_atomic_crtc_needs_modeset(&newstate->uapi) || - intel_state->skip_intermediate_wm) + *a = new_crtc_state->wm.ilk.optimal; + if (!new_crtc_state->hw.active || + drm_atomic_crtc_needs_modeset(&new_crtc_state->uapi) || + state->skip_intermediate_wm) return 0; a->pipe_enabled |= b->pipe_enabled; @@ -3270,8 +3275,8 @@ static int ilk_compute_intermediate_wm(struct intel_crtc_state *newstate) * If our intermediate WM are identical to the final WM, then we can * omit the post-vblank programming; only update if it's different. */ - if (memcmp(a, &newstate->wm.ilk.optimal, sizeof(*a)) != 0) - newstate->wm.need_postvbl_update = true; + if (memcmp(a, &new_crtc_state->wm.ilk.optimal, sizeof(*a)) != 0) + new_crtc_state->wm.need_postvbl_update = true; return 0; } @@ -3283,12 +3288,12 @@ static void ilk_merge_wm_level(struct drm_i915_private *dev_priv, int level, struct intel_wm_level *ret_wm) { - const struct intel_crtc *intel_crtc; + const struct intel_crtc *crtc; ret_wm->enable = true; - for_each_intel_crtc(&dev_priv->drm, intel_crtc) { - const struct intel_pipe_wm *active = &intel_crtc->wm.active.ilk; + for_each_intel_crtc(&dev_priv->drm, crtc) { + const struct intel_pipe_wm *active = &crtc->wm.active.ilk; const struct intel_wm_level *wm = &active->wm[level]; if (!active->pipe_enabled) @@ -3388,7 +3393,7 @@ static void ilk_compute_wm_results(struct drm_i915_private *dev_priv, enum intel_ddb_partitioning partitioning, struct ilk_wm_values *results) { - struct intel_crtc *intel_crtc; + struct intel_crtc *crtc; int level, wm_lp; results->enable_fbc_wm = merged->fbc_wm_enabled; @@ -3433,9 +3438,9 @@ static void ilk_compute_wm_results(struct drm_i915_private *dev_priv, } /* LP0 register values */ - for_each_intel_crtc(&dev_priv->drm, intel_crtc) { - enum pipe pipe = intel_crtc->pipe; - const struct intel_pipe_wm *pipe_wm = &intel_crtc->wm.active.ilk; + for_each_intel_crtc(&dev_priv->drm, crtc) { + enum pipe pipe = crtc->pipe; + const struct intel_pipe_wm *pipe_wm = &crtc->wm.active.ilk; const struct intel_wm_level *r = &pipe_wm->wm[0]; if (drm_WARN_ON(&dev_priv->drm, !r->enable)) @@ -4579,6 +4584,117 @@ static const struct dbuf_slice_conf_entry tgl_allowed_dbufs[] = {} }; +static const struct dbuf_slice_conf_entry dg2_allowed_dbufs[] = { + { + .active_pipes = BIT(PIPE_A), + .dbuf_mask = { + [PIPE_A] = BIT(DBUF_S1) | BIT(DBUF_S2), + }, + }, + { + .active_pipes = BIT(PIPE_B), + .dbuf_mask = { + [PIPE_B] = BIT(DBUF_S1) | BIT(DBUF_S2), + }, + }, + { + .active_pipes = BIT(PIPE_A) | BIT(PIPE_B), + .dbuf_mask = { + [PIPE_A] = BIT(DBUF_S1), + [PIPE_B] = BIT(DBUF_S2), + }, + }, + { + .active_pipes = BIT(PIPE_C), + .dbuf_mask = { + [PIPE_C] = BIT(DBUF_S3) | BIT(DBUF_S4), + }, + }, + { + .active_pipes = BIT(PIPE_A) | BIT(PIPE_C), + .dbuf_mask = { + [PIPE_A] = BIT(DBUF_S1) | BIT(DBUF_S2), + [PIPE_C] = BIT(DBUF_S3) | BIT(DBUF_S4), + }, + }, + { + .active_pipes = BIT(PIPE_B) | BIT(PIPE_C), + .dbuf_mask = { + [PIPE_B] = BIT(DBUF_S1) | BIT(DBUF_S2), + [PIPE_C] = BIT(DBUF_S3) | BIT(DBUF_S4), + }, + }, + { + .active_pipes = BIT(PIPE_A) | BIT(PIPE_B) | BIT(PIPE_C), + .dbuf_mask = { + [PIPE_A] = BIT(DBUF_S1), + [PIPE_B] = BIT(DBUF_S2), + [PIPE_C] = BIT(DBUF_S3) | BIT(DBUF_S4), + }, + }, + { + .active_pipes = BIT(PIPE_D), + .dbuf_mask = { + [PIPE_D] = BIT(DBUF_S3) | BIT(DBUF_S4), + }, + }, + { + .active_pipes = BIT(PIPE_A) | BIT(PIPE_D), + .dbuf_mask = { + [PIPE_A] = BIT(DBUF_S1) | BIT(DBUF_S2), + [PIPE_D] = BIT(DBUF_S3) | BIT(DBUF_S4), + }, + }, + { + .active_pipes = BIT(PIPE_B) | BIT(PIPE_D), + .dbuf_mask = { + [PIPE_B] = BIT(DBUF_S1) | BIT(DBUF_S2), + [PIPE_D] = BIT(DBUF_S3) | BIT(DBUF_S4), + }, + }, + { + .active_pipes = BIT(PIPE_A) | BIT(PIPE_B) | BIT(PIPE_D), + .dbuf_mask = { + [PIPE_A] = BIT(DBUF_S1), + [PIPE_B] = BIT(DBUF_S2), + [PIPE_D] = BIT(DBUF_S3) | BIT(DBUF_S4), + }, + }, + { + .active_pipes = BIT(PIPE_C) | BIT(PIPE_D), + .dbuf_mask = { + [PIPE_C] = BIT(DBUF_S3), + [PIPE_D] = BIT(DBUF_S4), + }, + }, + { + .active_pipes = BIT(PIPE_A) | BIT(PIPE_C) | BIT(PIPE_D), + .dbuf_mask = { + [PIPE_A] = BIT(DBUF_S1) | BIT(DBUF_S2), + [PIPE_C] = BIT(DBUF_S3), + [PIPE_D] = BIT(DBUF_S4), + }, + }, + { + .active_pipes = BIT(PIPE_B) | BIT(PIPE_C) | BIT(PIPE_D), + .dbuf_mask = { + [PIPE_B] = BIT(DBUF_S1) | BIT(DBUF_S2), + [PIPE_C] = BIT(DBUF_S3), + [PIPE_D] = BIT(DBUF_S4), + }, + }, + { + .active_pipes = BIT(PIPE_A) | BIT(PIPE_B) | BIT(PIPE_C) | BIT(PIPE_D), + .dbuf_mask = { + [PIPE_A] = BIT(DBUF_S1), + [PIPE_B] = BIT(DBUF_S2), + [PIPE_C] = BIT(DBUF_S3), + [PIPE_D] = BIT(DBUF_S4), + }, + }, + {} +}; + static const struct dbuf_slice_conf_entry adlp_allowed_dbufs[] = { { .active_pipes = BIT(PIPE_A), @@ -4754,12 +4870,19 @@ static u32 adlp_compute_dbuf_slices(enum pipe pipe, u32 active_pipes) return compute_dbuf_slices(pipe, active_pipes, adlp_allowed_dbufs); } +static u32 dg2_compute_dbuf_slices(enum pipe pipe, u32 active_pipes) +{ + return compute_dbuf_slices(pipe, active_pipes, dg2_allowed_dbufs); +} + static u8 skl_compute_dbuf_slices(struct intel_crtc *crtc, u8 active_pipes) { struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); enum pipe pipe = crtc->pipe; - if (IS_ALDERLAKE_P(dev_priv)) + if (IS_DG2(dev_priv)) + return dg2_compute_dbuf_slices(pipe, active_pipes); + else if (IS_ALDERLAKE_P(dev_priv)) return adlp_compute_dbuf_slices(pipe, active_pipes); else if (DISPLAY_VER(dev_priv) == 12) return tgl_compute_dbuf_slices(pipe, active_pipes); @@ -7340,30 +7463,24 @@ static void icl_init_clock_gating(struct drm_i915_private *dev_priv) intel_uncore_write(&dev_priv->uncore, ILK_DPFC_CHICKEN, ILK_DPFC_CHICKEN_COMP_DUMMY_PIXEL); - /* This is not an Wa. Enable to reduce Sampler power */ - intel_uncore_write(&dev_priv->uncore, GEN10_DFR_RATIO_EN_AND_CHICKEN, - intel_uncore_read(&dev_priv->uncore, GEN10_DFR_RATIO_EN_AND_CHICKEN) & ~DFR_DISABLE); - /*Wa_14010594013:icl, ehl */ intel_uncore_rmw(&dev_priv->uncore, GEN8_CHICKEN_DCPR_1, - 0, CNL_DELAY_PMRSP); + 0, ICL_DELAY_PMRSP); } static void gen12lp_init_clock_gating(struct drm_i915_private *dev_priv) { - /* Wa_1409120013:tgl,rkl,adl_s,dg1 */ - intel_uncore_write(&dev_priv->uncore, ILK_DPFC_CHICKEN, - ILK_DPFC_CHICKEN_COMP_DUMMY_PIXEL); + /* Wa_1409120013:tgl,rkl,adl-s,dg1 */ + if (IS_TIGERLAKE(dev_priv) || IS_ROCKETLAKE(dev_priv) || + IS_ALDERLAKE_S(dev_priv) || IS_DG1(dev_priv)) + intel_uncore_write(&dev_priv->uncore, ILK_DPFC_CHICKEN, + ILK_DPFC_CHICKEN_COMP_DUMMY_PIXEL); /* Wa_1409825376:tgl (pre-prod)*/ - if (IS_TGL_DISPLAY_STEP(dev_priv, STEP_A0, STEP_B1)) + if (IS_TGL_DISPLAY_STEP(dev_priv, STEP_A0, STEP_C0)) intel_uncore_write(&dev_priv->uncore, GEN9_CLKGATE_DIS_3, intel_uncore_read(&dev_priv->uncore, GEN9_CLKGATE_DIS_3) | TGL_VRH_GATING_DIS); - /* Wa_14011059788:tgl,rkl,adl_s,dg1 */ - intel_uncore_rmw(&dev_priv->uncore, GEN10_DFR_RATIO_EN_AND_CHICKEN, - 0, DFR_DISABLE); - /* Wa_14013723622:tgl,rkl,dg1,adl-s */ if (DISPLAY_VER(dev_priv) == 12) intel_uncore_rmw(&dev_priv->uncore, CLKREQ_POLICY, @@ -7383,7 +7500,7 @@ static void dg1_init_clock_gating(struct drm_i915_private *dev_priv) gen12lp_init_clock_gating(dev_priv); /* Wa_1409836686:dg1[a0] */ - if (IS_DG1_REVID(dev_priv, DG1_REVID_A0, DG1_REVID_A0)) + if (IS_DG1_GT_STEP(dev_priv, STEP_A0, STEP_B0)) intel_uncore_write(&dev_priv->uncore, GEN9_CLKGATE_DIS_3, intel_uncore_read(&dev_priv->uncore, GEN9_CLKGATE_DIS_3) | DPT_GATING_DIS); } @@ -7398,43 +7515,6 @@ static void cnp_init_clock_gating(struct drm_i915_private *dev_priv) CNP_PWM_CGE_GATING_DISABLE); } -static void cnl_init_clock_gating(struct drm_i915_private *dev_priv) -{ - u32 val; - cnp_init_clock_gating(dev_priv); - - /* This is not an Wa. Enable for better image quality */ - intel_uncore_write(&dev_priv->uncore, _3D_CHICKEN3, - _MASKED_BIT_ENABLE(_3D_CHICKEN3_AA_LINE_QUALITY_FIX_ENABLE)); - - /* WaEnableChickenDCPR:cnl */ - intel_uncore_write(&dev_priv->uncore, GEN8_CHICKEN_DCPR_1, - intel_uncore_read(&dev_priv->uncore, GEN8_CHICKEN_DCPR_1) | MASK_WAKEMEM); - - /* - * WaFbcWakeMemOn:cnl - * Display WA #0859: cnl - */ - intel_uncore_write(&dev_priv->uncore, DISP_ARB_CTL, intel_uncore_read(&dev_priv->uncore, DISP_ARB_CTL) | - DISP_FBC_MEMORY_WAKE); - - val = intel_uncore_read(&dev_priv->uncore, SLICE_UNIT_LEVEL_CLKGATE); - /* ReadHitWriteOnlyDisable:cnl */ - val |= RCCUNIT_CLKGATE_DIS; - intel_uncore_write(&dev_priv->uncore, SLICE_UNIT_LEVEL_CLKGATE, val); - - /* Wa_2201832410:cnl */ - val = intel_uncore_read(&dev_priv->uncore, SUBSLICE_UNIT_LEVEL_CLKGATE); - val |= GWUNIT_CLKGATE_DIS; - intel_uncore_write(&dev_priv->uncore, SUBSLICE_UNIT_LEVEL_CLKGATE, val); - - /* WaDisableVFclkgate:cnl */ - /* WaVFUnitClockGatingDisable:cnl */ - val = intel_uncore_read(&dev_priv->uncore, UNSLICE_UNIT_LEVEL_CLKGATE); - val |= VFUNIT_CLKGATE_DIS; - intel_uncore_write(&dev_priv->uncore, UNSLICE_UNIT_LEVEL_CLKGATE, val); -} - static void cfl_init_clock_gating(struct drm_i915_private *dev_priv) { cnp_init_clock_gating(dev_priv); @@ -7468,12 +7548,12 @@ static void kbl_init_clock_gating(struct drm_i915_private *dev_priv) FBC_LLC_FULLY_OPEN); /* WaDisableSDEUnitClockGating:kbl */ - if (IS_KBL_GT_STEP(dev_priv, 0, STEP_B0)) + if (IS_KBL_GT_STEP(dev_priv, 0, STEP_C0)) intel_uncore_write(&dev_priv->uncore, GEN8_UCGCTL6, intel_uncore_read(&dev_priv->uncore, GEN8_UCGCTL6) | GEN8_SDEUNIT_CLOCK_GATE_DISABLE); /* WaDisableGamClockGating:kbl */ - if (IS_KBL_GT_STEP(dev_priv, 0, STEP_B0)) + if (IS_KBL_GT_STEP(dev_priv, 0, STEP_C0)) intel_uncore_write(&dev_priv->uncore, GEN6_UCGCTL1, intel_uncore_read(&dev_priv->uncore, GEN6_UCGCTL1) | GEN6_GAMUNIT_CLOCK_GATE_DISABLE); @@ -7863,8 +7943,6 @@ void intel_init_clock_gating_hooks(struct drm_i915_private *dev_priv) dev_priv->display.init_clock_gating = gen12lp_init_clock_gating; else if (GRAPHICS_VER(dev_priv) == 11) dev_priv->display.init_clock_gating = icl_init_clock_gating; - else if (IS_CANNONLAKE(dev_priv)) - dev_priv->display.init_clock_gating = cnl_init_clock_gating; else if (IS_COFFEELAKE(dev_priv) || IS_COMETLAKE(dev_priv)) dev_priv->display.init_clock_gating = cfl_init_clock_gating; else if (IS_SKYLAKE(dev_priv)) diff --git a/drivers/gpu/drm/i915/intel_region_ttm.c b/drivers/gpu/drm/i915/intel_region_ttm.c index 82a6727ede46..98c7339bf8ba 100644 --- a/drivers/gpu/drm/i915/intel_region_ttm.c +++ b/drivers/gpu/drm/i915/intel_region_ttm.c @@ -8,9 +8,11 @@ #include "i915_drv.h" #include "i915_scatterlist.h" +#include "i915_ttm_buddy_manager.h" #include "intel_region_ttm.h" +#include "gem/i915_gem_ttm.h" /* For the funcs/ops export only */ /** * DOC: TTM support structure * @@ -20,9 +22,6 @@ * i915 GEM regions to TTM memory types and resource managers. */ -/* A Zero-initialized driver for now. We don't have a TTM backend yet. */ -static struct ttm_device_funcs i915_ttm_bo_driver; - /** * intel_region_ttm_device_init - Initialize a TTM device * @dev_priv: Pointer to an i915 device private structure. @@ -33,7 +32,7 @@ int intel_region_ttm_device_init(struct drm_i915_private *dev_priv) { struct drm_device *drm = &dev_priv->drm; - return ttm_device_init(&dev_priv->bdev, &i915_ttm_bo_driver, + return ttm_device_init(&dev_priv->bdev, i915_ttm_driver(), drm->dev, drm->anon_inode->i_mapping, drm->vma_offset_manager, false, false); } @@ -52,12 +51,16 @@ void intel_region_ttm_device_fini(struct drm_i915_private *dev_priv) * driver-private types for now, reserving TTM_PL_VRAM for stolen * memory and TTM_PL_TT for GGTT use if decided to implement this. */ -static int intel_region_to_ttm_type(struct intel_memory_region *mem) +int intel_region_to_ttm_type(const struct intel_memory_region *mem) { int type; GEM_BUG_ON(mem->type != INTEL_MEMORY_LOCAL && - mem->type != INTEL_MEMORY_MOCK); + mem->type != INTEL_MEMORY_MOCK && + mem->type != INTEL_MEMORY_SYSTEM); + + if (mem->type == INTEL_MEMORY_SYSTEM) + return TTM_PL_SYSTEM; type = mem->instance + TTM_PL_PRIV; GEM_BUG_ON(type >= TTM_NUM_MEM_TYPES); @@ -65,72 +68,29 @@ static int intel_region_to_ttm_type(struct intel_memory_region *mem) return type; } -static struct ttm_resource * -intel_region_ttm_node_reserve(struct intel_memory_region *mem, - resource_size_t offset, - resource_size_t size) -{ - struct ttm_resource_manager *man = mem->region_private; - struct ttm_place place = {}; - struct ttm_buffer_object mock_bo = {}; - struct ttm_resource *res; - int ret; - - /* - * Having to use a mock_bo is unfortunate but stems from some - * drivers having private managers that insist to know what the - * allocate memory is intended for, using it to send private - * data to the manager. Also recently the bo has been used to send - * alignment info to the manager. Assume that apart from the latter, - * none of the managers we use will ever access the buffer object - * members, hoping we can pass the alignment info in the - * struct ttm_place in the future. - */ - - place.fpfn = offset >> PAGE_SHIFT; - place.lpfn = place.fpfn + (size >> PAGE_SHIFT); - mock_bo.base.size = size; - ret = man->func->alloc(man, &mock_bo, &place, &res); - if (ret == -ENOSPC) - ret = -ENXIO; - - return ret ? ERR_PTR(ret) : res; -} - /** - * intel_region_ttm_node_free - Free a node allocated from a resource manager - * @mem: The region the node was allocated from. - * @node: The opaque node representing an allocation. + * intel_region_ttm_init - Initialize a memory region for TTM. + * @mem: The region to initialize. + * + * This function initializes a suitable TTM resource manager for the + * region, and if it's a LMEM region type, attaches it to the TTM + * device. MOCK regions are NOT attached to the TTM device, since we don't + * have one for the mock selftests. + * + * Return: 0 on success, negative error code on failure. */ -void intel_region_ttm_node_free(struct intel_memory_region *mem, - struct ttm_resource *res) -{ - struct ttm_resource_manager *man = mem->region_private; - - man->func->free(man, res); -} - -static const struct intel_memory_region_private_ops priv_ops = { - .reserve = intel_region_ttm_node_reserve, - .free = intel_region_ttm_node_free, -}; - int intel_region_ttm_init(struct intel_memory_region *mem) { struct ttm_device *bdev = &mem->i915->bdev; int mem_type = intel_region_to_ttm_type(mem); int ret; - ret = ttm_range_man_init(bdev, mem_type, false, - resource_size(&mem->region) >> PAGE_SHIFT); + ret = i915_ttm_buddy_man_init(bdev, mem_type, false, + resource_size(&mem->region), + mem->min_page_size, PAGE_SIZE); if (ret) return ret; - mem->chunk_size = PAGE_SIZE; - mem->max_order = - get_order(rounddown_pow_of_two(resource_size(&mem->region))); - mem->is_range_manager = true; - mem->priv_ops = &priv_ops; mem->region_private = ttm_manager_type(bdev, mem_type); return 0; @@ -148,17 +108,17 @@ void intel_region_ttm_fini(struct intel_memory_region *mem) { int ret; - ret = ttm_range_man_fini(&mem->i915->bdev, - intel_region_to_ttm_type(mem)); + ret = i915_ttm_buddy_man_fini(&mem->i915->bdev, + intel_region_to_ttm_type(mem)); GEM_WARN_ON(ret); mem->region_private = NULL; } /** - * intel_region_ttm_node_to_st - Convert an opaque TTM resource manager node + * intel_region_ttm_resource_to_st - Convert an opaque TTM resource manager resource * to an sg_table. * @mem: The memory region. - * @node: The resource manager node obtained from the TTM resource manager. + * @res: The resource manager resource obtained from the TTM resource manager. * * The gem backends typically use sg-tables for operations on the underlying * io_memory. So provide a way for the backends to translate the @@ -166,19 +126,23 @@ void intel_region_ttm_fini(struct intel_memory_region *mem) * * Return: A malloced sg_table on success, an error pointer on failure. */ -struct sg_table *intel_region_ttm_node_to_st(struct intel_memory_region *mem, - struct ttm_resource *res) +struct sg_table *intel_region_ttm_resource_to_st(struct intel_memory_region *mem, + struct ttm_resource *res) { - struct ttm_range_mgr_node *range_node = - container_of(res, typeof(*range_node), base); + if (mem->is_range_manager) { + struct ttm_range_mgr_node *range_node = + to_ttm_range_mgr_node(res); - GEM_WARN_ON(!mem->is_range_manager); - return i915_sg_from_mm_node(&range_node->mm_nodes[0], - mem->region.start); + return i915_sg_from_mm_node(&range_node->mm_nodes[0], + mem->region.start); + } else { + return i915_sg_from_buddy_resource(res, mem->region.start); + } } +#ifdef CONFIG_DRM_I915_SELFTEST /** - * intel_region_ttm_node_alloc - Allocate memory resources from a region + * intel_region_ttm_resource_alloc - Allocate memory resources from a region * @mem: The memory region, * @size: The requested size in bytes * @flags: Allocation flags @@ -187,15 +151,15 @@ struct sg_table *intel_region_ttm_node_to_st(struct intel_memory_region *mem, * memory from standalone TTM range managers, without the TTM eviction * functionality. Don't use if you are not completely sure that's the * case. The returned opaque node can be converted to an sg_table using - * intel_region_ttm_node_to_st(), and can be freed using - * intel_region_ttm_node_free(). + * intel_region_ttm_resource_to_st(), and can be freed using + * intel_region_ttm_resource_free(). * * Return: A valid pointer on success, an error pointer on failure. */ struct ttm_resource * -intel_region_ttm_node_alloc(struct intel_memory_region *mem, - resource_size_t size, - unsigned int flags) +intel_region_ttm_resource_alloc(struct intel_memory_region *mem, + resource_size_t size, + unsigned int flags) { struct ttm_resource_manager *man = mem->region_private; struct ttm_place place = {}; @@ -203,24 +167,26 @@ intel_region_ttm_node_alloc(struct intel_memory_region *mem, struct ttm_resource *res; int ret; - /* - * We ignore the flags for now since we're using the range - * manager and contigous and min page size would be fulfilled - * by default if size is min page size aligned. - */ mock_bo.base.size = size; - - if (mem->is_range_manager) { - if (size >= SZ_1G) - mock_bo.page_alignment = SZ_1G >> PAGE_SHIFT; - else if (size >= SZ_2M) - mock_bo.page_alignment = SZ_2M >> PAGE_SHIFT; - else if (size >= SZ_64K) - mock_bo.page_alignment = SZ_64K >> PAGE_SHIFT; - } + place.flags = flags; ret = man->func->alloc(man, &mock_bo, &place, &res); if (ret == -ENOSPC) ret = -ENXIO; return ret ? ERR_PTR(ret) : res; } + +#endif + +/** + * intel_region_ttm_resource_free - Free a resource allocated from a resource manager + * @mem: The region the resource was allocated from. + * @res: The opaque resource representing an allocation. + */ +void intel_region_ttm_resource_free(struct intel_memory_region *mem, + struct ttm_resource *res) +{ + struct ttm_resource_manager *man = mem->region_private; + + man->func->free(man, res); +} diff --git a/drivers/gpu/drm/i915/intel_region_ttm.h b/drivers/gpu/drm/i915/intel_region_ttm.h index 11b0574ab791..6f44075920f2 100644 --- a/drivers/gpu/drm/i915/intel_region_ttm.h +++ b/drivers/gpu/drm/i915/intel_region_ttm.h @@ -12,6 +12,7 @@ struct drm_i915_private; struct intel_memory_region; struct ttm_resource; +struct ttm_device_funcs; int intel_region_ttm_device_init(struct drm_i915_private *dev_priv); @@ -21,14 +22,20 @@ int intel_region_ttm_init(struct intel_memory_region *mem); void intel_region_ttm_fini(struct intel_memory_region *mem); -struct sg_table *intel_region_ttm_node_to_st(struct intel_memory_region *mem, - struct ttm_resource *res); +struct sg_table *intel_region_ttm_resource_to_st(struct intel_memory_region *mem, + struct ttm_resource *res); -struct ttm_resource * -intel_region_ttm_node_alloc(struct intel_memory_region *mem, - resource_size_t size, - unsigned int flags); +void intel_region_ttm_resource_free(struct intel_memory_region *mem, + struct ttm_resource *res); + +int intel_region_to_ttm_type(const struct intel_memory_region *mem); -void intel_region_ttm_node_free(struct intel_memory_region *mem, - struct ttm_resource *node); +struct ttm_device_funcs *i915_ttm_driver(void); + +#ifdef CONFIG_DRM_I915_SELFTEST +struct ttm_resource * +intel_region_ttm_resource_alloc(struct intel_memory_region *mem, + resource_size_t size, + unsigned int flags); +#endif #endif /* _INTEL_REGION_TTM_H_ */ diff --git a/drivers/gpu/drm/i915/intel_sideband.c b/drivers/gpu/drm/i915/intel_sideband.c index f0a82b37bd1a..e304bf44e1ff 100644 --- a/drivers/gpu/drm/i915/intel_sideband.c +++ b/drivers/gpu/drm/i915/intel_sideband.c @@ -556,17 +556,22 @@ out: #undef COND } -void intel_pcode_init(struct drm_i915_private *i915) +int intel_pcode_init(struct drm_i915_private *i915) { - int ret; + int ret = 0; if (!IS_DGFX(i915)) - return; + return ret; ret = skl_pcode_request(i915, DG1_PCODE_STATUS, DG1_UNCORE_GET_INIT_STATUS, DG1_UNCORE_INIT_STATUS_COMPLETE, - DG1_UNCORE_INIT_STATUS_COMPLETE, 50); + DG1_UNCORE_INIT_STATUS_COMPLETE, 180000); + + drm_dbg(&i915->drm, "PCODE init status %d\n", ret); + if (ret) drm_err(&i915->drm, "Pcode did not report uncore initialization completion!\n"); + + return ret; } diff --git a/drivers/gpu/drm/i915/intel_sideband.h b/drivers/gpu/drm/i915/intel_sideband.h index 094c7b19c5d4..d1d14bcb8f56 100644 --- a/drivers/gpu/drm/i915/intel_sideband.h +++ b/drivers/gpu/drm/i915/intel_sideband.h @@ -138,6 +138,6 @@ int sandybridge_pcode_write_timeout(struct drm_i915_private *i915, u32 mbox, int skl_pcode_request(struct drm_i915_private *i915, u32 mbox, u32 request, u32 reply_mask, u32 reply, int timeout_base_ms); -void intel_pcode_init(struct drm_i915_private *i915); +int intel_pcode_init(struct drm_i915_private *i915); #endif /* _INTEL_SIDEBAND_H */ diff --git a/drivers/gpu/drm/i915/intel_step.c b/drivers/gpu/drm/i915/intel_step.c index ba9479a67521..6cf967631395 100644 --- a/drivers/gpu/drm/i915/intel_step.c +++ b/drivers/gpu/drm/i915/intel_step.c @@ -7,16 +7,32 @@ #include "intel_step.h" /* - * KBL revision ID ordering is bizarre; higher revision ID's map to lower - * steppings in some cases. So rather than test against the revision ID - * directly, let's map that into our own range of increasing ID's that we - * can test against in a regular manner. + * Some platforms have unusual ways of mapping PCI revision ID to GT/display + * steppings. E.g., in some cases a higher PCI revision may translate to a + * lower stepping of the GT and/or display IP. This file provides lookup + * tables to map the PCI revision into a standard set of stepping values that + * can be compared numerically. + * + * Also note that some revisions/steppings may have been set aside as + * placeholders but never materialized in real hardware; in those cases there + * may be jumps in the revision IDs or stepping values in the tables below. */ +/* + * Some platforms always have the same stepping value for GT and display; + * use a macro to define these to make it easier to identify the platforms + * where the two steppings can deviate. + */ +#define COMMON_STEP(x) .gt_step = STEP_##x, .display_step = STEP_##x + +static const struct intel_step_info skl_revids[] = { + [0x6] = { COMMON_STEP(G0) }, + [0x7] = { COMMON_STEP(H0) }, + [0x9] = { COMMON_STEP(J0) }, + [0xA] = { COMMON_STEP(I1) }, +}; -/* FIXME: what about REVID_E0 */ static const struct intel_step_info kbl_revids[] = { - [0] = { .gt_step = STEP_A0, .display_step = STEP_A0 }, [1] = { .gt_step = STEP_B0, .display_step = STEP_B0 }, [2] = { .gt_step = STEP_C0, .display_step = STEP_B0 }, [3] = { .gt_step = STEP_D0, .display_step = STEP_B0 }, @@ -26,7 +42,27 @@ static const struct intel_step_info kbl_revids[] = { [7] = { .gt_step = STEP_G0, .display_step = STEP_C0 }, }; -static const struct intel_step_info tgl_uy_revid_step_tbl[] = { +static const struct intel_step_info bxt_revids[] = { + [0xA] = { COMMON_STEP(C0) }, + [0xB] = { COMMON_STEP(C0) }, + [0xC] = { COMMON_STEP(D0) }, + [0xD] = { COMMON_STEP(E0) }, +}; + +static const struct intel_step_info glk_revids[] = { + [3] = { COMMON_STEP(B0) }, +}; + +static const struct intel_step_info icl_revids[] = { + [7] = { COMMON_STEP(D0) }, +}; + +static const struct intel_step_info jsl_ehl_revids[] = { + [0] = { COMMON_STEP(A0) }, + [1] = { COMMON_STEP(B0) }, +}; + +static const struct intel_step_info tgl_uy_revids[] = { [0] = { .gt_step = STEP_A0, .display_step = STEP_A0 }, [1] = { .gt_step = STEP_B0, .display_step = STEP_C0 }, [2] = { .gt_step = STEP_B1, .display_step = STEP_C0 }, @@ -34,12 +70,23 @@ static const struct intel_step_info tgl_uy_revid_step_tbl[] = { }; /* Same GT stepping between tgl_uy_revids and tgl_revids don't mean the same HW */ -static const struct intel_step_info tgl_revid_step_tbl[] = { +static const struct intel_step_info tgl_revids[] = { [0] = { .gt_step = STEP_A0, .display_step = STEP_B0 }, [1] = { .gt_step = STEP_B0, .display_step = STEP_D0 }, }; -static const struct intel_step_info adls_revid_step_tbl[] = { +static const struct intel_step_info rkl_revids[] = { + [0] = { COMMON_STEP(A0) }, + [1] = { COMMON_STEP(B0) }, + [4] = { COMMON_STEP(C0) }, +}; + +static const struct intel_step_info dg1_revids[] = { + [0] = { COMMON_STEP(A0) }, + [1] = { COMMON_STEP(B0) }, +}; + +static const struct intel_step_info adls_revids[] = { [0x0] = { .gt_step = STEP_A0, .display_step = STEP_A0 }, [0x1] = { .gt_step = STEP_A0, .display_step = STEP_A2 }, [0x4] = { .gt_step = STEP_B0, .display_step = STEP_B0 }, @@ -47,13 +94,33 @@ static const struct intel_step_info adls_revid_step_tbl[] = { [0xC] = { .gt_step = STEP_D0, .display_step = STEP_C0 }, }; -static const struct intel_step_info adlp_revid_step_tbl[] = { +static const struct intel_step_info adlp_revids[] = { [0x0] = { .gt_step = STEP_A0, .display_step = STEP_A0 }, [0x4] = { .gt_step = STEP_B0, .display_step = STEP_B0 }, [0x8] = { .gt_step = STEP_C0, .display_step = STEP_C0 }, [0xC] = { .gt_step = STEP_C0, .display_step = STEP_D0 }, }; +static const struct intel_step_info xehpsdv_revids[] = { + [0x0] = { .gt_step = STEP_A0 }, + [0x1] = { .gt_step = STEP_A1 }, + [0x4] = { .gt_step = STEP_B0 }, + [0x8] = { .gt_step = STEP_C0 }, +}; + +static const struct intel_step_info dg2_g10_revid_step_tbl[] = { + [0x0] = { .gt_step = STEP_A0, .display_step = STEP_A0 }, + [0x1] = { .gt_step = STEP_A1, .display_step = STEP_A0 }, + [0x4] = { .gt_step = STEP_B0, .display_step = STEP_B0 }, + [0x8] = { .gt_step = STEP_C0, .display_step = STEP_C0 }, +}; + +static const struct intel_step_info dg2_g11_revid_step_tbl[] = { + [0x0] = { .gt_step = STEP_A0, .display_step = STEP_B0 }, + [0x4] = { .gt_step = STEP_B0, .display_step = STEP_C0 }, + [0x5] = { .gt_step = STEP_B1, .display_step = STEP_C0 }, +}; + void intel_step_init(struct drm_i915_private *i915) { const struct intel_step_info *revids = NULL; @@ -61,21 +128,51 @@ void intel_step_init(struct drm_i915_private *i915) int revid = INTEL_REVID(i915); struct intel_step_info step = {}; - if (IS_ALDERLAKE_P(i915)) { - revids = adlp_revid_step_tbl; - size = ARRAY_SIZE(adlp_revid_step_tbl); + if (IS_DG2_G10(i915)) { + revids = dg2_g10_revid_step_tbl; + size = ARRAY_SIZE(dg2_g10_revid_step_tbl); + } else if (IS_DG2_G11(i915)) { + revids = dg2_g11_revid_step_tbl; + size = ARRAY_SIZE(dg2_g11_revid_step_tbl); + } else if (IS_XEHPSDV(i915)) { + revids = xehpsdv_revids; + size = ARRAY_SIZE(xehpsdv_revids); + } else if (IS_ALDERLAKE_P(i915)) { + revids = adlp_revids; + size = ARRAY_SIZE(adlp_revids); } else if (IS_ALDERLAKE_S(i915)) { - revids = adls_revid_step_tbl; - size = ARRAY_SIZE(adls_revid_step_tbl); + revids = adls_revids; + size = ARRAY_SIZE(adls_revids); + } else if (IS_DG1(i915)) { + revids = dg1_revids; + size = ARRAY_SIZE(dg1_revids); + } else if (IS_ROCKETLAKE(i915)) { + revids = rkl_revids; + size = ARRAY_SIZE(rkl_revids); } else if (IS_TGL_U(i915) || IS_TGL_Y(i915)) { - revids = tgl_uy_revid_step_tbl; - size = ARRAY_SIZE(tgl_uy_revid_step_tbl); + revids = tgl_uy_revids; + size = ARRAY_SIZE(tgl_uy_revids); } else if (IS_TIGERLAKE(i915)) { - revids = tgl_revid_step_tbl; - size = ARRAY_SIZE(tgl_revid_step_tbl); + revids = tgl_revids; + size = ARRAY_SIZE(tgl_revids); + } else if (IS_JSL_EHL(i915)) { + revids = jsl_ehl_revids; + size = ARRAY_SIZE(jsl_ehl_revids); + } else if (IS_ICELAKE(i915)) { + revids = icl_revids; + size = ARRAY_SIZE(icl_revids); + } else if (IS_GEMINILAKE(i915)) { + revids = glk_revids; + size = ARRAY_SIZE(glk_revids); + } else if (IS_BROXTON(i915)) { + revids = bxt_revids; + size = ARRAY_SIZE(bxt_revids); } else if (IS_KABYLAKE(i915)) { revids = kbl_revids; size = ARRAY_SIZE(kbl_revids); + } else if (IS_SKYLAKE(i915)) { + revids = skl_revids; + size = ARRAY_SIZE(skl_revids); } /* Not using the stepping scheme for the platform yet. */ @@ -114,3 +211,17 @@ void intel_step_init(struct drm_i915_private *i915) RUNTIME_INFO(i915)->step = step; } + +#define STEP_NAME_CASE(name) \ + case STEP_##name: \ + return #name; + +const char *intel_step_name(enum intel_step step) +{ + switch (step) { + STEP_NAME_LIST(STEP_NAME_CASE); + + default: + return "**"; + } +} diff --git a/drivers/gpu/drm/i915/intel_step.h b/drivers/gpu/drm/i915/intel_step.h index 958a8bb5d677..f6641e2a3c77 100644 --- a/drivers/gpu/drm/i915/intel_step.h +++ b/drivers/gpu/drm/i915/intel_step.h @@ -15,26 +15,39 @@ struct intel_step_info { u8 display_step; }; +#define STEP_ENUM_VAL(name) STEP_##name, + +#define STEP_NAME_LIST(func) \ + func(A0) \ + func(A1) \ + func(A2) \ + func(B0) \ + func(B1) \ + func(B2) \ + func(C0) \ + func(C1) \ + func(D0) \ + func(D1) \ + func(E0) \ + func(F0) \ + func(G0) \ + func(H0) \ + func(I0) \ + func(I1) \ + func(J0) + /* * Symbolic steppings that do not match the hardware. These are valid both as gt * and display steppings as symbolic names. */ enum intel_step { STEP_NONE = 0, - STEP_A0, - STEP_A2, - STEP_B0, - STEP_B1, - STEP_C0, - STEP_D0, - STEP_D1, - STEP_E0, - STEP_F0, - STEP_G0, + STEP_NAME_LIST(STEP_ENUM_VAL) STEP_FUTURE, STEP_FOREVER, }; void intel_step_init(struct drm_i915_private *i915); +const char *intel_step_name(enum intel_step step); #endif /* __INTEL_STEP_H__ */ diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c index 1bed8f666048..6b38bc2811c1 100644 --- a/drivers/gpu/drm/i915/intel_uncore.c +++ b/drivers/gpu/drm/i915/intel_uncore.c @@ -24,6 +24,8 @@ #include <linux/pm_runtime.h> #include <asm/iosf_mbi.h> +#include "gt/intel_lrc_reg.h" /* for shadow reg list */ + #include "i915_drv.h" #include "i915_trace.h" #include "i915_vgpu.h" @@ -68,8 +70,14 @@ static const char * const forcewake_domain_names[] = { "vdbox1", "vdbox2", "vdbox3", + "vdbox4", + "vdbox5", + "vdbox6", + "vdbox7", "vebox0", "vebox1", + "vebox2", + "vebox3", }; const char * @@ -952,30 +960,80 @@ static const i915_reg_t gen8_shadowed_regs[] = { }; static const i915_reg_t gen11_shadowed_regs[] = { - RING_TAIL(RENDER_RING_BASE), /* 0x2000 (base) */ - GEN6_RPNSWREQ, /* 0xA008 */ - GEN6_RC_VIDEO_FREQ, /* 0xA00C */ - RING_TAIL(BLT_RING_BASE), /* 0x22000 (base) */ - RING_TAIL(GEN11_BSD_RING_BASE), /* 0x1C0000 (base) */ - RING_TAIL(GEN11_BSD2_RING_BASE), /* 0x1C4000 (base) */ - RING_TAIL(GEN11_VEBOX_RING_BASE), /* 0x1C8000 (base) */ - RING_TAIL(GEN11_BSD3_RING_BASE), /* 0x1D0000 (base) */ - RING_TAIL(GEN11_BSD4_RING_BASE), /* 0x1D4000 (base) */ - RING_TAIL(GEN11_VEBOX2_RING_BASE), /* 0x1D8000 (base) */ + RING_TAIL(RENDER_RING_BASE), /* 0x2000 (base) */ + RING_EXECLIST_CONTROL(RENDER_RING_BASE), /* 0x2550 */ + GEN6_RPNSWREQ, /* 0xA008 */ + GEN6_RC_VIDEO_FREQ, /* 0xA00C */ + RING_TAIL(BLT_RING_BASE), /* 0x22000 (base) */ + RING_EXECLIST_CONTROL(BLT_RING_BASE), /* 0x22550 */ + RING_TAIL(GEN11_BSD_RING_BASE), /* 0x1C0000 (base) */ + RING_EXECLIST_CONTROL(GEN11_BSD_RING_BASE), /* 0x1C0550 */ + RING_TAIL(GEN11_BSD2_RING_BASE), /* 0x1C4000 (base) */ + RING_EXECLIST_CONTROL(GEN11_BSD2_RING_BASE), /* 0x1C4550 */ + RING_TAIL(GEN11_VEBOX_RING_BASE), /* 0x1C8000 (base) */ + RING_EXECLIST_CONTROL(GEN11_VEBOX_RING_BASE), /* 0x1C8550 */ + RING_TAIL(GEN11_BSD3_RING_BASE), /* 0x1D0000 (base) */ + RING_EXECLIST_CONTROL(GEN11_BSD3_RING_BASE), /* 0x1D0550 */ + RING_TAIL(GEN11_BSD4_RING_BASE), /* 0x1D4000 (base) */ + RING_EXECLIST_CONTROL(GEN11_BSD4_RING_BASE), /* 0x1D4550 */ + RING_TAIL(GEN11_VEBOX2_RING_BASE), /* 0x1D8000 (base) */ + RING_EXECLIST_CONTROL(GEN11_VEBOX2_RING_BASE), /* 0x1D8550 */ /* TODO: Other registers are not yet used */ }; static const i915_reg_t gen12_shadowed_regs[] = { - RING_TAIL(RENDER_RING_BASE), /* 0x2000 (base) */ - GEN6_RPNSWREQ, /* 0xA008 */ - GEN6_RC_VIDEO_FREQ, /* 0xA00C */ - RING_TAIL(BLT_RING_BASE), /* 0x22000 (base) */ - RING_TAIL(GEN11_BSD_RING_BASE), /* 0x1C0000 (base) */ - RING_TAIL(GEN11_BSD2_RING_BASE), /* 0x1C4000 (base) */ - RING_TAIL(GEN11_VEBOX_RING_BASE), /* 0x1C8000 (base) */ - RING_TAIL(GEN11_BSD3_RING_BASE), /* 0x1D0000 (base) */ - RING_TAIL(GEN11_BSD4_RING_BASE), /* 0x1D4000 (base) */ - RING_TAIL(GEN11_VEBOX2_RING_BASE), /* 0x1D8000 (base) */ + RING_TAIL(RENDER_RING_BASE), /* 0x2000 (base) */ + RING_EXECLIST_CONTROL(RENDER_RING_BASE), /* 0x2550 */ + GEN6_RPNSWREQ, /* 0xA008 */ + GEN6_RC_VIDEO_FREQ, /* 0xA00C */ + RING_TAIL(BLT_RING_BASE), /* 0x22000 (base) */ + RING_EXECLIST_CONTROL(BLT_RING_BASE), /* 0x22550 */ + RING_TAIL(GEN11_BSD_RING_BASE), /* 0x1C0000 (base) */ + RING_EXECLIST_CONTROL(GEN11_BSD_RING_BASE), /* 0x1C0550 */ + RING_TAIL(GEN11_BSD2_RING_BASE), /* 0x1C4000 (base) */ + RING_EXECLIST_CONTROL(GEN11_BSD2_RING_BASE), /* 0x1C4550 */ + RING_TAIL(GEN11_VEBOX_RING_BASE), /* 0x1C8000 (base) */ + RING_EXECLIST_CONTROL(GEN11_VEBOX_RING_BASE), /* 0x1C8550 */ + RING_TAIL(GEN11_BSD3_RING_BASE), /* 0x1D0000 (base) */ + RING_EXECLIST_CONTROL(GEN11_BSD3_RING_BASE), /* 0x1D0550 */ + RING_TAIL(GEN11_BSD4_RING_BASE), /* 0x1D4000 (base) */ + RING_EXECLIST_CONTROL(GEN11_BSD4_RING_BASE), /* 0x1D4550 */ + RING_TAIL(GEN11_VEBOX2_RING_BASE), /* 0x1D8000 (base) */ + RING_EXECLIST_CONTROL(GEN11_VEBOX2_RING_BASE), /* 0x1D8550 */ + /* TODO: Other registers are not yet used */ +}; + +static const i915_reg_t xehp_shadowed_regs[] = { + RING_TAIL(RENDER_RING_BASE), /* 0x2000 (base) */ + RING_EXECLIST_CONTROL(RENDER_RING_BASE), /* 0x2550 */ + GEN6_RPNSWREQ, /* 0xA008 */ + GEN6_RC_VIDEO_FREQ, /* 0xA00C */ + RING_TAIL(BLT_RING_BASE), /* 0x22000 (base) */ + RING_EXECLIST_CONTROL(BLT_RING_BASE), /* 0x22550 */ + RING_TAIL(GEN11_BSD_RING_BASE), /* 0x1C0000 (base) */ + RING_EXECLIST_CONTROL(GEN11_BSD_RING_BASE), /* 0x1C0550 */ + RING_TAIL(GEN11_BSD2_RING_BASE), /* 0x1C4000 (base) */ + RING_EXECLIST_CONTROL(GEN11_BSD2_RING_BASE), /* 0x1C4550 */ + RING_TAIL(GEN11_VEBOX_RING_BASE), /* 0x1C8000 (base) */ + RING_EXECLIST_CONTROL(GEN11_VEBOX_RING_BASE), /* 0x1C8550 */ + RING_TAIL(GEN11_BSD3_RING_BASE), /* 0x1D0000 (base) */ + RING_EXECLIST_CONTROL(GEN11_BSD3_RING_BASE), /* 0x1D0550 */ + RING_TAIL(GEN11_BSD4_RING_BASE), /* 0x1D4000 (base) */ + RING_EXECLIST_CONTROL(GEN11_BSD4_RING_BASE), /* 0x1D4550 */ + RING_TAIL(GEN11_VEBOX2_RING_BASE), /* 0x1D8000 (base) */ + RING_EXECLIST_CONTROL(GEN11_VEBOX2_RING_BASE), /* 0x1D8550 */ + RING_TAIL(XEHP_BSD5_RING_BASE), /* 0x1E0000 (base) */ + RING_EXECLIST_CONTROL(XEHP_BSD5_RING_BASE), /* 0x1E0550 */ + RING_TAIL(XEHP_BSD6_RING_BASE), /* 0x1E4000 (base) */ + RING_EXECLIST_CONTROL(XEHP_BSD6_RING_BASE), /* 0x1E4550 */ + RING_TAIL(XEHP_VEBOX3_RING_BASE), /* 0x1E8000 (base) */ + RING_EXECLIST_CONTROL(XEHP_VEBOX3_RING_BASE), /* 0x1E8550 */ + RING_TAIL(XEHP_BSD7_RING_BASE), /* 0x1F0000 (base) */ + RING_EXECLIST_CONTROL(XEHP_BSD7_RING_BASE), /* 0x1F0550 */ + RING_TAIL(XEHP_BSD8_RING_BASE), /* 0x1F4000 (base) */ + RING_EXECLIST_CONTROL(XEHP_BSD8_RING_BASE), /* 0x1F4550 */ + RING_TAIL(XEHP_VEBOX4_RING_BASE), /* 0x1F8000 (base) */ + RING_EXECLIST_CONTROL(XEHP_VEBOX4_RING_BASE), /* 0x1F8550 */ /* TODO: Other registers are not yet used */ }; @@ -991,17 +1049,18 @@ static int mmio_reg_cmp(u32 key, const i915_reg_t *reg) return 0; } -#define __is_genX_shadowed(x) \ -static bool is_gen##x##_shadowed(u32 offset) \ +#define __is_X_shadowed(x) \ +static bool is_##x##_shadowed(u32 offset) \ { \ - const i915_reg_t *regs = gen##x##_shadowed_regs; \ - return BSEARCH(offset, regs, ARRAY_SIZE(gen##x##_shadowed_regs), \ + const i915_reg_t *regs = x##_shadowed_regs; \ + return BSEARCH(offset, regs, ARRAY_SIZE(x##_shadowed_regs), \ mmio_reg_cmp); \ } -__is_genX_shadowed(8) -__is_genX_shadowed(11) -__is_genX_shadowed(12) +__is_X_shadowed(gen8) +__is_X_shadowed(gen11) +__is_X_shadowed(gen12) +__is_X_shadowed(xehp) static enum forcewake_domains gen6_reg_write_fw_domains(struct intel_uncore *uncore, i915_reg_t reg) @@ -1065,6 +1124,15 @@ static const struct intel_forcewake_range __chv_fw_ranges[] = { __fwd; \ }) +#define __xehp_fwtable_reg_write_fw_domains(uncore, offset) \ +({ \ + enum forcewake_domains __fwd = 0; \ + const u32 __offset = (offset); \ + if (!is_xehp_shadowed(__offset)) \ + __fwd = find_fw_domain(uncore, __offset); \ + __fwd; \ +}) + /* *Must* be sorted by offset ranges! See intel_fw_table_check(). */ static const struct intel_forcewake_range __gen9_fw_ranges[] = { GEN_FW_RANGE(0x0, 0xaff, FORCEWAKE_GT), @@ -1249,6 +1317,172 @@ static const struct intel_forcewake_range __gen12_fw_ranges[] = { 0x1d3f00 - 0x1d3fff: VD2 */ }; +/* + * Graphics IP version 12.55 brings a slight change to the 0xd800 range, + * switching it from the GT domain to the render domain. + * + * *Must* be sorted by offset ranges! See intel_fw_table_check(). + */ +#define XEHP_FWRANGES(FW_RANGE_D800) \ + GEN_FW_RANGE(0x0, 0x1fff, 0), /* \ + 0x0 - 0xaff: reserved \ + 0xb00 - 0x1fff: always on */ \ + GEN_FW_RANGE(0x2000, 0x26ff, FORCEWAKE_RENDER), \ + GEN_FW_RANGE(0x2700, 0x4aff, FORCEWAKE_GT), \ + GEN_FW_RANGE(0x4b00, 0x51ff, 0), /* \ + 0x4b00 - 0x4fff: reserved \ + 0x5000 - 0x51ff: always on */ \ + GEN_FW_RANGE(0x5200, 0x7fff, FORCEWAKE_RENDER), \ + GEN_FW_RANGE(0x8000, 0x813f, FORCEWAKE_GT), \ + GEN_FW_RANGE(0x8140, 0x815f, FORCEWAKE_RENDER), \ + GEN_FW_RANGE(0x8160, 0x81ff, 0), /* \ + 0x8160 - 0x817f: reserved \ + 0x8180 - 0x81ff: always on */ \ + GEN_FW_RANGE(0x8200, 0x82ff, FORCEWAKE_GT), \ + GEN_FW_RANGE(0x8300, 0x84ff, FORCEWAKE_RENDER), \ + GEN_FW_RANGE(0x8500, 0x8cff, FORCEWAKE_GT), /* \ + 0x8500 - 0x87ff: gt \ + 0x8800 - 0x8c7f: reserved \ + 0x8c80 - 0x8cff: gt (DG2 only) */ \ + GEN_FW_RANGE(0x8d00, 0x8fff, FORCEWAKE_RENDER), /* \ + 0x8d00 - 0x8dff: render (DG2 only) \ + 0x8e00 - 0x8fff: reserved */ \ + GEN_FW_RANGE(0x9000, 0x94cf, FORCEWAKE_GT), /* \ + 0x9000 - 0x947f: gt \ + 0x9480 - 0x94cf: reserved */ \ + GEN_FW_RANGE(0x94d0, 0x955f, FORCEWAKE_RENDER), \ + GEN_FW_RANGE(0x9560, 0x967f, 0), /* \ + 0x9560 - 0x95ff: always on \ + 0x9600 - 0x967f: reserved */ \ + GEN_FW_RANGE(0x9680, 0x97ff, FORCEWAKE_RENDER), /* \ + 0x9680 - 0x96ff: render (DG2 only) \ + 0x9700 - 0x97ff: reserved */ \ + GEN_FW_RANGE(0x9800, 0xcfff, FORCEWAKE_GT), /* \ + 0x9800 - 0xb4ff: gt \ + 0xb500 - 0xbfff: reserved \ + 0xc000 - 0xcfff: gt */ \ + GEN_FW_RANGE(0xd000, 0xd7ff, 0), \ + GEN_FW_RANGE(0xd800, 0xd87f, FW_RANGE_D800), \ + GEN_FW_RANGE(0xd880, 0xdbff, FORCEWAKE_GT), \ + GEN_FW_RANGE(0xdc00, 0xdcff, FORCEWAKE_RENDER), \ + GEN_FW_RANGE(0xdd00, 0xde7f, FORCEWAKE_GT), /* \ + 0xdd00 - 0xddff: gt \ + 0xde00 - 0xde7f: reserved */ \ + GEN_FW_RANGE(0xde80, 0xe8ff, FORCEWAKE_RENDER), /* \ + 0xde80 - 0xdfff: render \ + 0xe000 - 0xe0ff: reserved \ + 0xe100 - 0xe8ff: render */ \ + GEN_FW_RANGE(0xe900, 0xffff, FORCEWAKE_GT), /* \ + 0xe900 - 0xe9ff: gt \ + 0xea00 - 0xefff: reserved \ + 0xf000 - 0xffff: gt */ \ + GEN_FW_RANGE(0x10000, 0x12fff, 0), /* \ + 0x10000 - 0x11fff: reserved \ + 0x12000 - 0x127ff: always on \ + 0x12800 - 0x12fff: reserved */ \ + GEN_FW_RANGE(0x13000, 0x131ff, FORCEWAKE_MEDIA_VDBOX0), /* DG2 only */ \ + GEN_FW_RANGE(0x13200, 0x13fff, FORCEWAKE_MEDIA_VDBOX2), /* \ + 0x13200 - 0x133ff: VD2 (DG2 only) \ + 0x13400 - 0x13fff: reserved */ \ + GEN_FW_RANGE(0x14000, 0x141ff, FORCEWAKE_MEDIA_VDBOX0), /* XEHPSDV only */ \ + GEN_FW_RANGE(0x14200, 0x143ff, FORCEWAKE_MEDIA_VDBOX2), /* XEHPSDV only */ \ + GEN_FW_RANGE(0x14400, 0x145ff, FORCEWAKE_MEDIA_VDBOX4), /* XEHPSDV only */ \ + GEN_FW_RANGE(0x14600, 0x147ff, FORCEWAKE_MEDIA_VDBOX6), /* XEHPSDV only */ \ + GEN_FW_RANGE(0x14800, 0x14fff, FORCEWAKE_RENDER), \ + GEN_FW_RANGE(0x15000, 0x16dff, FORCEWAKE_GT), /* \ + 0x15000 - 0x15fff: gt (DG2 only) \ + 0x16000 - 0x16dff: reserved */ \ + GEN_FW_RANGE(0x16e00, 0x1ffff, FORCEWAKE_RENDER), \ + GEN_FW_RANGE(0x20000, 0x21fff, FORCEWAKE_MEDIA_VDBOX0), /* \ + 0x20000 - 0x20fff: VD0 (XEHPSDV only) \ + 0x21000 - 0x21fff: reserved */ \ + GEN_FW_RANGE(0x22000, 0x23fff, FORCEWAKE_GT), \ + GEN_FW_RANGE(0x24000, 0x2417f, 0), /* \ + 0x24000 - 0x2407f: always on \ + 0x24080 - 0x2417f: reserved */ \ + GEN_FW_RANGE(0x24180, 0x249ff, FORCEWAKE_GT), /* \ + 0x24180 - 0x241ff: gt \ + 0x24200 - 0x249ff: reserved */ \ + GEN_FW_RANGE(0x24a00, 0x251ff, FORCEWAKE_RENDER), /* \ + 0x24a00 - 0x24a7f: render \ + 0x24a80 - 0x251ff: reserved */ \ + GEN_FW_RANGE(0x25200, 0x25fff, FORCEWAKE_GT), /* \ + 0x25200 - 0x252ff: gt \ + 0x25300 - 0x25fff: reserved */ \ + GEN_FW_RANGE(0x26000, 0x2ffff, FORCEWAKE_RENDER), /* \ + 0x26000 - 0x27fff: render \ + 0x28000 - 0x29fff: reserved \ + 0x2a000 - 0x2ffff: undocumented */ \ + GEN_FW_RANGE(0x30000, 0x3ffff, FORCEWAKE_GT), \ + GEN_FW_RANGE(0x40000, 0x1bffff, 0), \ + GEN_FW_RANGE(0x1c0000, 0x1c3fff, FORCEWAKE_MEDIA_VDBOX0), /* \ + 0x1c0000 - 0x1c2bff: VD0 \ + 0x1c2c00 - 0x1c2cff: reserved \ + 0x1c2d00 - 0x1c2dff: VD0 \ + 0x1c2e00 - 0x1c3eff: VD0 (DG2 only) \ + 0x1c3f00 - 0x1c3fff: VD0 */ \ + GEN_FW_RANGE(0x1c4000, 0x1c7fff, FORCEWAKE_MEDIA_VDBOX1), /* \ + 0x1c4000 - 0x1c6bff: VD1 \ + 0x1c6c00 - 0x1c6cff: reserved \ + 0x1c6d00 - 0x1c6dff: VD1 \ + 0x1c6e00 - 0x1c7fff: reserved */ \ + GEN_FW_RANGE(0x1c8000, 0x1cbfff, FORCEWAKE_MEDIA_VEBOX0), /* \ + 0x1c8000 - 0x1ca0ff: VE0 \ + 0x1ca100 - 0x1cbfff: reserved */ \ + GEN_FW_RANGE(0x1cc000, 0x1ccfff, FORCEWAKE_MEDIA_VDBOX0), \ + GEN_FW_RANGE(0x1cd000, 0x1cdfff, FORCEWAKE_MEDIA_VDBOX2), \ + GEN_FW_RANGE(0x1ce000, 0x1cefff, FORCEWAKE_MEDIA_VDBOX4), \ + GEN_FW_RANGE(0x1cf000, 0x1cffff, FORCEWAKE_MEDIA_VDBOX6), \ + GEN_FW_RANGE(0x1d0000, 0x1d3fff, FORCEWAKE_MEDIA_VDBOX2), /* \ + 0x1d0000 - 0x1d2bff: VD2 \ + 0x1d2c00 - 0x1d2cff: reserved \ + 0x1d2d00 - 0x1d2dff: VD2 \ + 0x1d2e00 - 0x1d3dff: VD2 (DG2 only) \ + 0x1d3e00 - 0x1d3eff: reserved \ + 0x1d3f00 - 0x1d3fff: VD2 */ \ + GEN_FW_RANGE(0x1d4000, 0x1d7fff, FORCEWAKE_MEDIA_VDBOX3), /* \ + 0x1d4000 - 0x1d6bff: VD3 \ + 0x1d6c00 - 0x1d6cff: reserved \ + 0x1d6d00 - 0x1d6dff: VD3 \ + 0x1d6e00 - 0x1d7fff: reserved */ \ + GEN_FW_RANGE(0x1d8000, 0x1dffff, FORCEWAKE_MEDIA_VEBOX1), /* \ + 0x1d8000 - 0x1da0ff: VE1 \ + 0x1da100 - 0x1dffff: reserved */ \ + GEN_FW_RANGE(0x1e0000, 0x1e3fff, FORCEWAKE_MEDIA_VDBOX4), /* \ + 0x1e0000 - 0x1e2bff: VD4 \ + 0x1e2c00 - 0x1e2cff: reserved \ + 0x1e2d00 - 0x1e2dff: VD4 \ + 0x1e2e00 - 0x1e3eff: reserved \ + 0x1e3f00 - 0x1e3fff: VD4 */ \ + GEN_FW_RANGE(0x1e4000, 0x1e7fff, FORCEWAKE_MEDIA_VDBOX5), /* \ + 0x1e4000 - 0x1e6bff: VD5 \ + 0x1e6c00 - 0x1e6cff: reserved \ + 0x1e6d00 - 0x1e6dff: VD5 \ + 0x1e6e00 - 0x1e7fff: reserved */ \ + GEN_FW_RANGE(0x1e8000, 0x1effff, FORCEWAKE_MEDIA_VEBOX2), /* \ + 0x1e8000 - 0x1ea0ff: VE2 \ + 0x1ea100 - 0x1effff: reserved */ \ + GEN_FW_RANGE(0x1f0000, 0x1f3fff, FORCEWAKE_MEDIA_VDBOX6), /* \ + 0x1f0000 - 0x1f2bff: VD6 \ + 0x1f2c00 - 0x1f2cff: reserved \ + 0x1f2d00 - 0x1f2dff: VD6 \ + 0x1f2e00 - 0x1f3eff: reserved \ + 0x1f3f00 - 0x1f3fff: VD6 */ \ + GEN_FW_RANGE(0x1f4000, 0x1f7fff, FORCEWAKE_MEDIA_VDBOX7), /* \ + 0x1f4000 - 0x1f6bff: VD7 \ + 0x1f6c00 - 0x1f6cff: reserved \ + 0x1f6d00 - 0x1f6dff: VD7 \ + 0x1f6e00 - 0x1f7fff: reserved */ \ + GEN_FW_RANGE(0x1f8000, 0x1fa0ff, FORCEWAKE_MEDIA_VEBOX3), + +static const struct intel_forcewake_range __xehp_fw_ranges[] = { + XEHP_FWRANGES(FORCEWAKE_GT) +}; + +static const struct intel_forcewake_range __dg2_fw_ranges[] = { + XEHP_FWRANGES(FORCEWAKE_RENDER) +}; + static void ilk_dummy_write(struct intel_uncore *uncore) { @@ -1502,6 +1736,7 @@ __gen_write(func, 8) \ __gen_write(func, 16) \ __gen_write(func, 32) +__gen_reg_write_funcs(xehp_fwtable); __gen_reg_write_funcs(gen12_fwtable); __gen_reg_write_funcs(gen11_fwtable); __gen_reg_write_funcs(fwtable); @@ -1582,8 +1817,14 @@ static int __fw_domain_init(struct intel_uncore *uncore, BUILD_BUG_ON(FORCEWAKE_MEDIA_VDBOX1 != (1 << FW_DOMAIN_ID_MEDIA_VDBOX1)); BUILD_BUG_ON(FORCEWAKE_MEDIA_VDBOX2 != (1 << FW_DOMAIN_ID_MEDIA_VDBOX2)); BUILD_BUG_ON(FORCEWAKE_MEDIA_VDBOX3 != (1 << FW_DOMAIN_ID_MEDIA_VDBOX3)); + BUILD_BUG_ON(FORCEWAKE_MEDIA_VDBOX4 != (1 << FW_DOMAIN_ID_MEDIA_VDBOX4)); + BUILD_BUG_ON(FORCEWAKE_MEDIA_VDBOX5 != (1 << FW_DOMAIN_ID_MEDIA_VDBOX5)); + BUILD_BUG_ON(FORCEWAKE_MEDIA_VDBOX6 != (1 << FW_DOMAIN_ID_MEDIA_VDBOX6)); + BUILD_BUG_ON(FORCEWAKE_MEDIA_VDBOX7 != (1 << FW_DOMAIN_ID_MEDIA_VDBOX7)); BUILD_BUG_ON(FORCEWAKE_MEDIA_VEBOX0 != (1 << FW_DOMAIN_ID_MEDIA_VEBOX0)); BUILD_BUG_ON(FORCEWAKE_MEDIA_VEBOX1 != (1 << FW_DOMAIN_ID_MEDIA_VEBOX1)); + BUILD_BUG_ON(FORCEWAKE_MEDIA_VEBOX2 != (1 << FW_DOMAIN_ID_MEDIA_VEBOX2)); + BUILD_BUG_ON(FORCEWAKE_MEDIA_VEBOX3 != (1 << FW_DOMAIN_ID_MEDIA_VEBOX3)); d->mask = BIT(domain_id); @@ -1870,36 +2111,40 @@ static int uncore_forcewake_init(struct intel_uncore *uncore) return ret; forcewake_early_sanitize(uncore, 0); - if (IS_GRAPHICS_VER(i915, 6, 7)) { - ASSIGN_WRITE_MMIO_VFUNCS(uncore, gen6); - - if (IS_VALLEYVIEW(i915)) { - ASSIGN_FW_DOMAINS_TABLE(uncore, __vlv_fw_ranges); - ASSIGN_READ_MMIO_VFUNCS(uncore, fwtable); - } else { - ASSIGN_READ_MMIO_VFUNCS(uncore, gen6); - } - } else if (GRAPHICS_VER(i915) == 8) { - if (IS_CHERRYVIEW(i915)) { - ASSIGN_FW_DOMAINS_TABLE(uncore, __chv_fw_ranges); - ASSIGN_WRITE_MMIO_VFUNCS(uncore, fwtable); - ASSIGN_READ_MMIO_VFUNCS(uncore, fwtable); - } else { - ASSIGN_WRITE_MMIO_VFUNCS(uncore, gen8); - ASSIGN_READ_MMIO_VFUNCS(uncore, gen6); - } - } else if (IS_GRAPHICS_VER(i915, 9, 10)) { - ASSIGN_FW_DOMAINS_TABLE(uncore, __gen9_fw_ranges); - ASSIGN_WRITE_MMIO_VFUNCS(uncore, fwtable); - ASSIGN_READ_MMIO_VFUNCS(uncore, fwtable); - } else if (GRAPHICS_VER(i915) == 11) { - ASSIGN_FW_DOMAINS_TABLE(uncore, __gen11_fw_ranges); - ASSIGN_WRITE_MMIO_VFUNCS(uncore, gen11_fwtable); + if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 55)) { + ASSIGN_FW_DOMAINS_TABLE(uncore, __dg2_fw_ranges); + ASSIGN_WRITE_MMIO_VFUNCS(uncore, xehp_fwtable); ASSIGN_READ_MMIO_VFUNCS(uncore, gen11_fwtable); - } else { + } else if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50)) { + ASSIGN_FW_DOMAINS_TABLE(uncore, __xehp_fw_ranges); + ASSIGN_WRITE_MMIO_VFUNCS(uncore, xehp_fwtable); + ASSIGN_READ_MMIO_VFUNCS(uncore, gen11_fwtable); + } else if (GRAPHICS_VER(i915) >= 12) { ASSIGN_FW_DOMAINS_TABLE(uncore, __gen12_fw_ranges); ASSIGN_WRITE_MMIO_VFUNCS(uncore, gen12_fwtable); ASSIGN_READ_MMIO_VFUNCS(uncore, gen12_fwtable); + } else if (GRAPHICS_VER(i915) == 11) { + ASSIGN_FW_DOMAINS_TABLE(uncore, __gen11_fw_ranges); + ASSIGN_WRITE_MMIO_VFUNCS(uncore, gen11_fwtable); + ASSIGN_READ_MMIO_VFUNCS(uncore, gen11_fwtable); + } else if (IS_GRAPHICS_VER(i915, 9, 10)) { + ASSIGN_FW_DOMAINS_TABLE(uncore, __gen9_fw_ranges); + ASSIGN_WRITE_MMIO_VFUNCS(uncore, fwtable); + ASSIGN_READ_MMIO_VFUNCS(uncore, fwtable); + } else if (IS_CHERRYVIEW(i915)) { + ASSIGN_FW_DOMAINS_TABLE(uncore, __chv_fw_ranges); + ASSIGN_WRITE_MMIO_VFUNCS(uncore, fwtable); + ASSIGN_READ_MMIO_VFUNCS(uncore, fwtable); + } else if (GRAPHICS_VER(i915) == 8) { + ASSIGN_WRITE_MMIO_VFUNCS(uncore, gen8); + ASSIGN_READ_MMIO_VFUNCS(uncore, gen6); + } else if (IS_VALLEYVIEW(i915)) { + ASSIGN_FW_DOMAINS_TABLE(uncore, __vlv_fw_ranges); + ASSIGN_WRITE_MMIO_VFUNCS(uncore, gen6); + ASSIGN_READ_MMIO_VFUNCS(uncore, fwtable); + } else if (IS_GRAPHICS_VER(i915, 6, 7)) { + ASSIGN_WRITE_MMIO_VFUNCS(uncore, gen6); + ASSIGN_READ_MMIO_VFUNCS(uncore, gen6); } uncore->pmic_bus_access_nb.notifier_call = i915_pmic_bus_access_notifier; @@ -1929,7 +2174,7 @@ int intel_uncore_init_mmio(struct intel_uncore *uncore) return -ENODEV; } - if (INTEL_GEN(i915) > 5 && !intel_vgpu_active(i915)) + if (GRAPHICS_VER(i915) > 5 && !intel_vgpu_active(i915)) uncore->flags |= UNCORE_HAS_FORCEWAKE; if (!intel_uncore_has_forcewake(uncore)) { @@ -1988,6 +2233,22 @@ void intel_uncore_prune_engine_fw_domains(struct intel_uncore *uncore, if (HAS_ENGINE(gt, _VCS(i))) continue; + /* + * Starting with XeHP, the power well for an even-numbered + * VDBOX is also used for shared units within the + * media slice such as SFC. So even if the engine + * itself is fused off, we still need to initialize + * the forcewake domain if any of the other engines + * in the same media slice are present. + */ + if (GRAPHICS_VER_FULL(uncore->i915) >= IP_VER(12, 50) && i % 2 == 0) { + if ((i + 1 < I915_MAX_VCS) && HAS_ENGINE(gt, _VCS(i + 1))) + continue; + + if (HAS_ENGINE(gt, _VECS(i / 2))) + continue; + } + if (fw_domains & BIT(domain_id)) fw_domain_fini(uncore, domain_id); } @@ -2277,6 +2538,61 @@ intel_uncore_forcewake_for_reg(struct intel_uncore *uncore, return fw_domains; } +u32 intel_uncore_read_with_mcr_steering_fw(struct intel_uncore *uncore, + i915_reg_t reg, + int slice, int subslice) +{ + u32 mcr_mask, mcr_ss, mcr, old_mcr, val; + + lockdep_assert_held(&uncore->lock); + + if (GRAPHICS_VER(uncore->i915) >= 11) { + mcr_mask = GEN11_MCR_SLICE_MASK | GEN11_MCR_SUBSLICE_MASK; + mcr_ss = GEN11_MCR_SLICE(slice) | GEN11_MCR_SUBSLICE(subslice); + } else { + mcr_mask = GEN8_MCR_SLICE_MASK | GEN8_MCR_SUBSLICE_MASK; + mcr_ss = GEN8_MCR_SLICE(slice) | GEN8_MCR_SUBSLICE(subslice); + } + + old_mcr = mcr = intel_uncore_read_fw(uncore, GEN8_MCR_SELECTOR); + + mcr &= ~mcr_mask; + mcr |= mcr_ss; + intel_uncore_write_fw(uncore, GEN8_MCR_SELECTOR, mcr); + + val = intel_uncore_read_fw(uncore, reg); + + mcr &= ~mcr_mask; + mcr |= old_mcr & mcr_mask; + + intel_uncore_write_fw(uncore, GEN8_MCR_SELECTOR, mcr); + + return val; +} + +u32 intel_uncore_read_with_mcr_steering(struct intel_uncore *uncore, + i915_reg_t reg, int slice, int subslice) +{ + enum forcewake_domains fw_domains; + u32 val; + + fw_domains = intel_uncore_forcewake_for_reg(uncore, reg, + FW_REG_READ); + fw_domains |= intel_uncore_forcewake_for_reg(uncore, + GEN8_MCR_SELECTOR, + FW_REG_READ | FW_REG_WRITE); + + spin_lock_irq(&uncore->lock); + intel_uncore_forcewake_get__locked(uncore, fw_domains); + + val = intel_uncore_read_with_mcr_steering_fw(uncore, reg, slice, subslice); + + intel_uncore_forcewake_put__locked(uncore, fw_domains); + spin_unlock_irq(&uncore->lock); + + return val; +} + #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) #include "selftests/mock_uncore.c" #include "selftests/intel_uncore.c" diff --git a/drivers/gpu/drm/i915/intel_uncore.h b/drivers/gpu/drm/i915/intel_uncore.h index 59f0da8f1fbb..3c0b0a8b5250 100644 --- a/drivers/gpu/drm/i915/intel_uncore.h +++ b/drivers/gpu/drm/i915/intel_uncore.h @@ -52,8 +52,14 @@ enum forcewake_domain_id { FW_DOMAIN_ID_MEDIA_VDBOX1, FW_DOMAIN_ID_MEDIA_VDBOX2, FW_DOMAIN_ID_MEDIA_VDBOX3, + FW_DOMAIN_ID_MEDIA_VDBOX4, + FW_DOMAIN_ID_MEDIA_VDBOX5, + FW_DOMAIN_ID_MEDIA_VDBOX6, + FW_DOMAIN_ID_MEDIA_VDBOX7, FW_DOMAIN_ID_MEDIA_VEBOX0, FW_DOMAIN_ID_MEDIA_VEBOX1, + FW_DOMAIN_ID_MEDIA_VEBOX2, + FW_DOMAIN_ID_MEDIA_VEBOX3, FW_DOMAIN_ID_COUNT }; @@ -66,10 +72,16 @@ enum forcewake_domains { FORCEWAKE_MEDIA_VDBOX1 = BIT(FW_DOMAIN_ID_MEDIA_VDBOX1), FORCEWAKE_MEDIA_VDBOX2 = BIT(FW_DOMAIN_ID_MEDIA_VDBOX2), FORCEWAKE_MEDIA_VDBOX3 = BIT(FW_DOMAIN_ID_MEDIA_VDBOX3), + FORCEWAKE_MEDIA_VDBOX4 = BIT(FW_DOMAIN_ID_MEDIA_VDBOX4), + FORCEWAKE_MEDIA_VDBOX5 = BIT(FW_DOMAIN_ID_MEDIA_VDBOX5), + FORCEWAKE_MEDIA_VDBOX6 = BIT(FW_DOMAIN_ID_MEDIA_VDBOX6), + FORCEWAKE_MEDIA_VDBOX7 = BIT(FW_DOMAIN_ID_MEDIA_VDBOX7), FORCEWAKE_MEDIA_VEBOX0 = BIT(FW_DOMAIN_ID_MEDIA_VEBOX0), FORCEWAKE_MEDIA_VEBOX1 = BIT(FW_DOMAIN_ID_MEDIA_VEBOX1), + FORCEWAKE_MEDIA_VEBOX2 = BIT(FW_DOMAIN_ID_MEDIA_VEBOX2), + FORCEWAKE_MEDIA_VEBOX3 = BIT(FW_DOMAIN_ID_MEDIA_VEBOX3), - FORCEWAKE_ALL = BIT(FW_DOMAIN_ID_COUNT) - 1 + FORCEWAKE_ALL = BIT(FW_DOMAIN_ID_COUNT) - 1, }; struct intel_uncore_funcs { @@ -182,6 +194,12 @@ intel_uncore_has_fifo(const struct intel_uncore *uncore) return uncore->flags & UNCORE_HAS_FIFO; } +u32 intel_uncore_read_with_mcr_steering_fw(struct intel_uncore *uncore, + i915_reg_t reg, + int slice, int subslice); +u32 intel_uncore_read_with_mcr_steering(struct intel_uncore *uncore, + i915_reg_t reg, int slice, int subslice); + void intel_uncore_mmio_debug_init_early(struct intel_uncore_mmio_debug *mmio_debug); void intel_uncore_init_early(struct intel_uncore *uncore, diff --git a/drivers/gpu/drm/i915/intel_wopcm.c b/drivers/gpu/drm/i915/intel_wopcm.c index 8309455f13ea..5e511bb891f9 100644 --- a/drivers/gpu/drm/i915/intel_wopcm.c +++ b/drivers/gpu/drm/i915/intel_wopcm.c @@ -56,8 +56,8 @@ /* 24KB at the end of WOPCM is reserved for RC6 CTX on BXT. */ #define BXT_WOPCM_RC6_CTX_RESERVED (SZ_16K + SZ_8K) -/* 36KB WOPCM reserved at the end of WOPCM on CNL. */ -#define CNL_WOPCM_HW_CTX_RESERVED (SZ_32K + SZ_4K) +/* 36KB WOPCM reserved at the end of WOPCM on ICL. */ +#define ICL_WOPCM_HW_CTX_RESERVED (SZ_32K + SZ_4K) /* 128KB from GUC_WOPCM_RESERVED is reserved for FW on Gen9. */ #define GEN9_GUC_FW_RESERVED SZ_128K @@ -93,8 +93,8 @@ static u32 context_reserved_size(struct drm_i915_private *i915) { if (IS_GEN9_LP(i915)) return BXT_WOPCM_RC6_CTX_RESERVED; - else if (GRAPHICS_VER(i915) >= 10) - return CNL_WOPCM_HW_CTX_RESERVED; + else if (GRAPHICS_VER(i915) >= 11) + return ICL_WOPCM_HW_CTX_RESERVED; else return 0; } @@ -126,7 +126,7 @@ static bool gen9_check_huc_fw_fits(struct drm_i915_private *i915, u32 guc_wopcm_size, u32 huc_fw_size) { /* - * On Gen9 & CNL A0, hardware requires the total available GuC WOPCM + * On Gen9, hardware requires the total available GuC WOPCM * size to be larger than or equal to HuC firmware size. Otherwise, * firmware uploading would fail. */ diff --git a/drivers/gpu/drm/i915/selftests/i915_buddy.c b/drivers/gpu/drm/i915/selftests/i915_buddy.c new file mode 100644 index 000000000000..d61ec9c951bf --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/i915_buddy.c @@ -0,0 +1,787 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2019 Intel Corporation + */ + +#include <linux/prime_numbers.h> + +#include "../i915_selftest.h" +#include "i915_random.h" + +static void __igt_dump_block(struct i915_buddy_mm *mm, + struct i915_buddy_block *block, + bool buddy) +{ + pr_err("block info: header=%llx, state=%u, order=%d, offset=%llx size=%llx root=%s buddy=%s\n", + block->header, + i915_buddy_block_state(block), + i915_buddy_block_order(block), + i915_buddy_block_offset(block), + i915_buddy_block_size(mm, block), + yesno(!block->parent), + yesno(buddy)); +} + +static void igt_dump_block(struct i915_buddy_mm *mm, + struct i915_buddy_block *block) +{ + struct i915_buddy_block *buddy; + + __igt_dump_block(mm, block, false); + + buddy = get_buddy(block); + if (buddy) + __igt_dump_block(mm, buddy, true); +} + +static int igt_check_block(struct i915_buddy_mm *mm, + struct i915_buddy_block *block) +{ + struct i915_buddy_block *buddy; + unsigned int block_state; + u64 block_size; + u64 offset; + int err = 0; + + block_state = i915_buddy_block_state(block); + + if (block_state != I915_BUDDY_ALLOCATED && + block_state != I915_BUDDY_FREE && + block_state != I915_BUDDY_SPLIT) { + pr_err("block state mismatch\n"); + err = -EINVAL; + } + + block_size = i915_buddy_block_size(mm, block); + offset = i915_buddy_block_offset(block); + + if (block_size < mm->chunk_size) { + pr_err("block size smaller than min size\n"); + err = -EINVAL; + } + + if (!is_power_of_2(block_size)) { + pr_err("block size not power of two\n"); + err = -EINVAL; + } + + if (!IS_ALIGNED(block_size, mm->chunk_size)) { + pr_err("block size not aligned to min size\n"); + err = -EINVAL; + } + + if (!IS_ALIGNED(offset, mm->chunk_size)) { + pr_err("block offset not aligned to min size\n"); + err = -EINVAL; + } + + if (!IS_ALIGNED(offset, block_size)) { + pr_err("block offset not aligned to block size\n"); + err = -EINVAL; + } + + buddy = get_buddy(block); + + if (!buddy && block->parent) { + pr_err("buddy has gone fishing\n"); + err = -EINVAL; + } + + if (buddy) { + if (i915_buddy_block_offset(buddy) != (offset ^ block_size)) { + pr_err("buddy has wrong offset\n"); + err = -EINVAL; + } + + if (i915_buddy_block_size(mm, buddy) != block_size) { + pr_err("buddy size mismatch\n"); + err = -EINVAL; + } + + if (i915_buddy_block_state(buddy) == block_state && + block_state == I915_BUDDY_FREE) { + pr_err("block and its buddy are free\n"); + err = -EINVAL; + } + } + + return err; +} + +static int igt_check_blocks(struct i915_buddy_mm *mm, + struct list_head *blocks, + u64 expected_size, + bool is_contiguous) +{ + struct i915_buddy_block *block; + struct i915_buddy_block *prev; + u64 total; + int err = 0; + + block = NULL; + prev = NULL; + total = 0; + + list_for_each_entry(block, blocks, link) { + err = igt_check_block(mm, block); + + if (!i915_buddy_block_is_allocated(block)) { + pr_err("block not allocated\n"), + err = -EINVAL; + } + + if (is_contiguous && prev) { + u64 prev_block_size; + u64 prev_offset; + u64 offset; + + prev_offset = i915_buddy_block_offset(prev); + prev_block_size = i915_buddy_block_size(mm, prev); + offset = i915_buddy_block_offset(block); + + if (offset != (prev_offset + prev_block_size)) { + pr_err("block offset mismatch\n"); + err = -EINVAL; + } + } + + if (err) + break; + + total += i915_buddy_block_size(mm, block); + prev = block; + } + + if (!err) { + if (total != expected_size) { + pr_err("size mismatch, expected=%llx, found=%llx\n", + expected_size, total); + err = -EINVAL; + } + return err; + } + + if (prev) { + pr_err("prev block, dump:\n"); + igt_dump_block(mm, prev); + } + + pr_err("bad block, dump:\n"); + igt_dump_block(mm, block); + + return err; +} + +static int igt_check_mm(struct i915_buddy_mm *mm) +{ + struct i915_buddy_block *root; + struct i915_buddy_block *prev; + unsigned int i; + u64 total; + int err = 0; + + if (!mm->n_roots) { + pr_err("n_roots is zero\n"); + return -EINVAL; + } + + if (mm->n_roots != hweight64(mm->size)) { + pr_err("n_roots mismatch, n_roots=%u, expected=%lu\n", + mm->n_roots, hweight64(mm->size)); + return -EINVAL; + } + + root = NULL; + prev = NULL; + total = 0; + + for (i = 0; i < mm->n_roots; ++i) { + struct i915_buddy_block *block; + unsigned int order; + + root = mm->roots[i]; + if (!root) { + pr_err("root(%u) is NULL\n", i); + err = -EINVAL; + break; + } + + err = igt_check_block(mm, root); + + if (!i915_buddy_block_is_free(root)) { + pr_err("root not free\n"); + err = -EINVAL; + } + + order = i915_buddy_block_order(root); + + if (!i) { + if (order != mm->max_order) { + pr_err("max order root missing\n"); + err = -EINVAL; + } + } + + if (prev) { + u64 prev_block_size; + u64 prev_offset; + u64 offset; + + prev_offset = i915_buddy_block_offset(prev); + prev_block_size = i915_buddy_block_size(mm, prev); + offset = i915_buddy_block_offset(root); + + if (offset != (prev_offset + prev_block_size)) { + pr_err("root offset mismatch\n"); + err = -EINVAL; + } + } + + block = list_first_entry_or_null(&mm->free_list[order], + struct i915_buddy_block, + link); + if (block != root) { + pr_err("root mismatch at order=%u\n", order); + err = -EINVAL; + } + + if (err) + break; + + prev = root; + total += i915_buddy_block_size(mm, root); + } + + if (!err) { + if (total != mm->size) { + pr_err("expected mm size=%llx, found=%llx\n", mm->size, + total); + err = -EINVAL; + } + return err; + } + + if (prev) { + pr_err("prev root(%u), dump:\n", i - 1); + igt_dump_block(mm, prev); + } + + if (root) { + pr_err("bad root(%u), dump:\n", i); + igt_dump_block(mm, root); + } + + return err; +} + +static void igt_mm_config(u64 *size, u64 *chunk_size) +{ + I915_RND_STATE(prng); + u32 s, ms; + + /* Nothing fancy, just try to get an interesting bit pattern */ + + prandom_seed_state(&prng, i915_selftest.random_seed); + + /* Let size be a random number of pages up to 8 GB (2M pages) */ + s = 1 + i915_prandom_u32_max_state((BIT(33 - 12)) - 1, &prng); + /* Let the chunk size be a random power of 2 less than size */ + ms = BIT(i915_prandom_u32_max_state(ilog2(s), &prng)); + /* Round size down to the chunk size */ + s &= -ms; + + /* Convert from pages to bytes */ + *chunk_size = (u64)ms << 12; + *size = (u64)s << 12; +} + +static int igt_buddy_alloc_smoke(void *arg) +{ + struct i915_buddy_mm mm; + IGT_TIMEOUT(end_time); + I915_RND_STATE(prng); + u64 chunk_size; + u64 mm_size; + int *order; + int err, i; + + igt_mm_config(&mm_size, &chunk_size); + + pr_info("buddy_init with size=%llx, chunk_size=%llx\n", mm_size, chunk_size); + + err = i915_buddy_init(&mm, mm_size, chunk_size); + if (err) { + pr_err("buddy_init failed(%d)\n", err); + return err; + } + + order = i915_random_order(mm.max_order + 1, &prng); + if (!order) + goto out_fini; + + for (i = 0; i <= mm.max_order; ++i) { + struct i915_buddy_block *block; + int max_order = order[i]; + bool timeout = false; + LIST_HEAD(blocks); + int order; + u64 total; + + err = igt_check_mm(&mm); + if (err) { + pr_err("pre-mm check failed, abort\n"); + break; + } + + pr_info("filling from max_order=%u\n", max_order); + + order = max_order; + total = 0; + + do { +retry: + block = i915_buddy_alloc(&mm, order); + if (IS_ERR(block)) { + err = PTR_ERR(block); + if (err == -ENOMEM) { + pr_info("buddy_alloc hit -ENOMEM with order=%d\n", + order); + } else { + if (order--) { + err = 0; + goto retry; + } + + pr_err("buddy_alloc with order=%d failed(%d)\n", + order, err); + } + + break; + } + + list_add_tail(&block->link, &blocks); + + if (i915_buddy_block_order(block) != order) { + pr_err("buddy_alloc order mismatch\n"); + err = -EINVAL; + break; + } + + total += i915_buddy_block_size(&mm, block); + + if (__igt_timeout(end_time, NULL)) { + timeout = true; + break; + } + } while (total < mm.size); + + if (!err) + err = igt_check_blocks(&mm, &blocks, total, false); + + i915_buddy_free_list(&mm, &blocks); + + if (!err) { + err = igt_check_mm(&mm); + if (err) + pr_err("post-mm check failed\n"); + } + + if (err || timeout) + break; + + cond_resched(); + } + + if (err == -ENOMEM) + err = 0; + + kfree(order); +out_fini: + i915_buddy_fini(&mm); + + return err; +} + +static int igt_buddy_alloc_pessimistic(void *arg) +{ + const unsigned int max_order = 16; + struct i915_buddy_block *block, *bn; + struct i915_buddy_mm mm; + unsigned int order; + LIST_HEAD(blocks); + int err; + + /* + * Create a pot-sized mm, then allocate one of each possible + * order within. This should leave the mm with exactly one + * page left. + */ + + err = i915_buddy_init(&mm, PAGE_SIZE << max_order, PAGE_SIZE); + if (err) { + pr_err("buddy_init failed(%d)\n", err); + return err; + } + GEM_BUG_ON(mm.max_order != max_order); + + for (order = 0; order < max_order; order++) { + block = i915_buddy_alloc(&mm, order); + if (IS_ERR(block)) { + pr_info("buddy_alloc hit -ENOMEM with order=%d\n", + order); + err = PTR_ERR(block); + goto err; + } + + list_add_tail(&block->link, &blocks); + } + + /* And now the last remaining block available */ + block = i915_buddy_alloc(&mm, 0); + if (IS_ERR(block)) { + pr_info("buddy_alloc hit -ENOMEM on final alloc\n"); + err = PTR_ERR(block); + goto err; + } + list_add_tail(&block->link, &blocks); + + /* Should be completely full! */ + for (order = max_order; order--; ) { + block = i915_buddy_alloc(&mm, order); + if (!IS_ERR(block)) { + pr_info("buddy_alloc unexpectedly succeeded at order %d, it should be full!", + order); + list_add_tail(&block->link, &blocks); + err = -EINVAL; + goto err; + } + } + + block = list_last_entry(&blocks, typeof(*block), link); + list_del(&block->link); + i915_buddy_free(&mm, block); + + /* As we free in increasing size, we make available larger blocks */ + order = 1; + list_for_each_entry_safe(block, bn, &blocks, link) { + list_del(&block->link); + i915_buddy_free(&mm, block); + + block = i915_buddy_alloc(&mm, order); + if (IS_ERR(block)) { + pr_info("buddy_alloc (realloc) hit -ENOMEM with order=%d\n", + order); + err = PTR_ERR(block); + goto err; + } + i915_buddy_free(&mm, block); + order++; + } + + /* To confirm, now the whole mm should be available */ + block = i915_buddy_alloc(&mm, max_order); + if (IS_ERR(block)) { + pr_info("buddy_alloc (realloc) hit -ENOMEM with order=%d\n", + max_order); + err = PTR_ERR(block); + goto err; + } + i915_buddy_free(&mm, block); + +err: + i915_buddy_free_list(&mm, &blocks); + i915_buddy_fini(&mm); + return err; +} + +static int igt_buddy_alloc_optimistic(void *arg) +{ + const int max_order = 16; + struct i915_buddy_block *block; + struct i915_buddy_mm mm; + LIST_HEAD(blocks); + int order; + int err; + + /* + * Create a mm with one block of each order available, and + * try to allocate them all. + */ + + err = i915_buddy_init(&mm, + PAGE_SIZE * ((1 << (max_order + 1)) - 1), + PAGE_SIZE); + if (err) { + pr_err("buddy_init failed(%d)\n", err); + return err; + } + GEM_BUG_ON(mm.max_order != max_order); + + for (order = 0; order <= max_order; order++) { + block = i915_buddy_alloc(&mm, order); + if (IS_ERR(block)) { + pr_info("buddy_alloc hit -ENOMEM with order=%d\n", + order); + err = PTR_ERR(block); + goto err; + } + + list_add_tail(&block->link, &blocks); + } + + /* Should be completely full! */ + block = i915_buddy_alloc(&mm, 0); + if (!IS_ERR(block)) { + pr_info("buddy_alloc unexpectedly succeeded, it should be full!"); + list_add_tail(&block->link, &blocks); + err = -EINVAL; + goto err; + } + +err: + i915_buddy_free_list(&mm, &blocks); + i915_buddy_fini(&mm); + return err; +} + +static int igt_buddy_alloc_pathological(void *arg) +{ + const int max_order = 16; + struct i915_buddy_block *block; + struct i915_buddy_mm mm; + LIST_HEAD(blocks); + LIST_HEAD(holes); + int order, top; + int err; + + /* + * Create a pot-sized mm, then allocate one of each possible + * order within. This should leave the mm with exactly one + * page left. Free the largest block, then whittle down again. + * Eventually we will have a fully 50% fragmented mm. + */ + + err = i915_buddy_init(&mm, PAGE_SIZE << max_order, PAGE_SIZE); + if (err) { + pr_err("buddy_init failed(%d)\n", err); + return err; + } + GEM_BUG_ON(mm.max_order != max_order); + + for (top = max_order; top; top--) { + /* Make room by freeing the largest allocated block */ + block = list_first_entry_or_null(&blocks, typeof(*block), link); + if (block) { + list_del(&block->link); + i915_buddy_free(&mm, block); + } + + for (order = top; order--; ) { + block = i915_buddy_alloc(&mm, order); + if (IS_ERR(block)) { + pr_info("buddy_alloc hit -ENOMEM with order=%d, top=%d\n", + order, top); + err = PTR_ERR(block); + goto err; + } + list_add_tail(&block->link, &blocks); + } + + /* There should be one final page for this sub-allocation */ + block = i915_buddy_alloc(&mm, 0); + if (IS_ERR(block)) { + pr_info("buddy_alloc hit -ENOMEM for hole\n"); + err = PTR_ERR(block); + goto err; + } + list_add_tail(&block->link, &holes); + + block = i915_buddy_alloc(&mm, top); + if (!IS_ERR(block)) { + pr_info("buddy_alloc unexpectedly succeeded at top-order %d/%d, it should be full!", + top, max_order); + list_add_tail(&block->link, &blocks); + err = -EINVAL; + goto err; + } + } + + i915_buddy_free_list(&mm, &holes); + + /* Nothing larger than blocks of chunk_size now available */ + for (order = 1; order <= max_order; order++) { + block = i915_buddy_alloc(&mm, order); + if (!IS_ERR(block)) { + pr_info("buddy_alloc unexpectedly succeeded at order %d, it should be full!", + order); + list_add_tail(&block->link, &blocks); + err = -EINVAL; + goto err; + } + } + +err: + list_splice_tail(&holes, &blocks); + i915_buddy_free_list(&mm, &blocks); + i915_buddy_fini(&mm); + return err; +} + +static int igt_buddy_alloc_range(void *arg) +{ + struct i915_buddy_mm mm; + unsigned long page_num; + LIST_HEAD(blocks); + u64 chunk_size; + u64 offset; + u64 size; + u64 rem; + int err; + + igt_mm_config(&size, &chunk_size); + + pr_info("buddy_init with size=%llx, chunk_size=%llx\n", size, chunk_size); + + err = i915_buddy_init(&mm, size, chunk_size); + if (err) { + pr_err("buddy_init failed(%d)\n", err); + return err; + } + + err = igt_check_mm(&mm); + if (err) { + pr_err("pre-mm check failed, abort, abort, abort!\n"); + goto err_fini; + } + + rem = mm.size; + offset = 0; + + for_each_prime_number_from(page_num, 1, ULONG_MAX - 1) { + struct i915_buddy_block *block; + LIST_HEAD(tmp); + + size = min(page_num * mm.chunk_size, rem); + + err = i915_buddy_alloc_range(&mm, &tmp, offset, size); + if (err) { + if (err == -ENOMEM) { + pr_info("alloc_range hit -ENOMEM with size=%llx\n", + size); + } else { + pr_err("alloc_range with offset=%llx, size=%llx failed(%d)\n", + offset, size, err); + } + + break; + } + + block = list_first_entry_or_null(&tmp, + struct i915_buddy_block, + link); + if (!block) { + pr_err("alloc_range has no blocks\n"); + err = -EINVAL; + break; + } + + if (i915_buddy_block_offset(block) != offset) { + pr_err("alloc_range start offset mismatch, found=%llx, expected=%llx\n", + i915_buddy_block_offset(block), offset); + err = -EINVAL; + } + + if (!err) + err = igt_check_blocks(&mm, &tmp, size, true); + + list_splice_tail(&tmp, &blocks); + + if (err) + break; + + offset += size; + + rem -= size; + if (!rem) + break; + + cond_resched(); + } + + if (err == -ENOMEM) + err = 0; + + i915_buddy_free_list(&mm, &blocks); + + if (!err) { + err = igt_check_mm(&mm); + if (err) + pr_err("post-mm check failed\n"); + } + +err_fini: + i915_buddy_fini(&mm); + + return err; +} + +static int igt_buddy_alloc_limit(void *arg) +{ + struct i915_buddy_block *block; + struct i915_buddy_mm mm; + const u64 size = U64_MAX; + int err; + + err = i915_buddy_init(&mm, size, PAGE_SIZE); + if (err) + return err; + + if (mm.max_order != I915_BUDDY_MAX_ORDER) { + pr_err("mm.max_order(%d) != %d\n", + mm.max_order, I915_BUDDY_MAX_ORDER); + err = -EINVAL; + goto out_fini; + } + + block = i915_buddy_alloc(&mm, mm.max_order); + if (IS_ERR(block)) { + err = PTR_ERR(block); + goto out_fini; + } + + if (i915_buddy_block_order(block) != mm.max_order) { + pr_err("block order(%d) != %d\n", + i915_buddy_block_order(block), mm.max_order); + err = -EINVAL; + goto out_free; + } + + if (i915_buddy_block_size(&mm, block) != + BIT_ULL(mm.max_order) * PAGE_SIZE) { + pr_err("block size(%llu) != %llu\n", + i915_buddy_block_size(&mm, block), + BIT_ULL(mm.max_order) * PAGE_SIZE); + err = -EINVAL; + goto out_free; + } + +out_free: + i915_buddy_free(&mm, block); +out_fini: + i915_buddy_fini(&mm); + return err; +} + +int i915_buddy_mock_selftests(void) +{ + static const struct i915_subtest tests[] = { + SUBTEST(igt_buddy_alloc_pessimistic), + SUBTEST(igt_buddy_alloc_optimistic), + SUBTEST(igt_buddy_alloc_pathological), + SUBTEST(igt_buddy_alloc_smoke), + SUBTEST(igt_buddy_alloc_range), + SUBTEST(igt_buddy_alloc_limit), + }; + + return i915_subtests(tests, NULL); +} diff --git a/drivers/gpu/drm/i915/selftests/i915_live_selftests.h b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h index a92c0e9b7e6b..cfa5c4165a4f 100644 --- a/drivers/gpu/drm/i915/selftests/i915_live_selftests.h +++ b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h @@ -26,6 +26,7 @@ selftest(gt_mocs, intel_mocs_live_selftests) selftest(gt_pm, intel_gt_pm_live_selftests) selftest(gt_heartbeat, intel_heartbeat_live_selftests) selftest(requests, i915_request_live_selftests) +selftest(migrate, intel_migrate_live_selftests) selftest(active, i915_active_live_selftests) selftest(objects, i915_gem_object_live_selftests) selftest(mman, i915_gem_mman_live_selftests) @@ -37,14 +38,14 @@ selftest(gem, i915_gem_live_selftests) selftest(evict, i915_gem_evict_live_selftests) selftest(hugepages, i915_gem_huge_page_live_selftests) selftest(gem_contexts, i915_gem_context_live_selftests) -selftest(gem_execbuf, i915_gem_execbuffer_live_selftests) -selftest(blt, i915_gem_object_blt_live_selftests) selftest(client, i915_gem_client_blt_live_selftests) +selftest(gem_migrate, i915_gem_migrate_live_selftests) selftest(reset, intel_reset_live_selftests) selftest(memory_region, intel_memory_region_live_selftests) selftest(hangcheck, intel_hangcheck_live_selftests) selftest(execlists, intel_execlists_live_selftests) selftest(ring_submission, intel_ring_submission_live_selftests) selftest(perf, i915_perf_live_selftests) +selftest(slpc, intel_slpc_live_selftests) /* Here be dragons: keep last to run last! */ selftest(late_gt_pm, intel_gt_pm_late_selftests) diff --git a/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h b/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h index 34e5caf38093..793fb28a770d 100644 --- a/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h +++ b/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h @@ -32,5 +32,5 @@ selftest(vma, i915_vma_mock_selftests) selftest(evict, i915_gem_evict_mock_selftests) selftest(gtt, i915_gem_gtt_mock_selftests) selftest(hugepages, i915_gem_huge_page_mock_selftests) -selftest(contexts, i915_gem_context_mock_selftests) selftest(memory_region, intel_memory_region_mock_selftests) +selftest(buddy, i915_buddy_mock_selftests) diff --git a/drivers/gpu/drm/i915/selftests/i915_perf_selftests.h b/drivers/gpu/drm/i915/selftests/i915_perf_selftests.h index c2389f8a257d..058450d351f7 100644 --- a/drivers/gpu/drm/i915/selftests/i915_perf_selftests.h +++ b/drivers/gpu/drm/i915/selftests/i915_perf_selftests.h @@ -17,5 +17,5 @@ */ selftest(engine_cs, intel_engine_cs_perf_selftests) selftest(request, i915_request_perf_selftests) -selftest(blt, i915_gem_object_blt_perf_selftests) +selftest(migrate, intel_migrate_perf_selftests) selftest(region, intel_memory_region_perf_selftests) diff --git a/drivers/gpu/drm/i915/selftests/i915_request.c b/drivers/gpu/drm/i915/selftests/i915_request.c index bd5c96a77ba3..d67710d10615 100644 --- a/drivers/gpu/drm/i915/selftests/i915_request.c +++ b/drivers/gpu/drm/i915/selftests/i915_request.c @@ -1313,7 +1313,7 @@ static int __live_parallel_engine1(void *arg) i915_request_add(rq); err = 0; - if (i915_request_wait(rq, 0, HZ / 5) < 0) + if (i915_request_wait(rq, 0, HZ) < 0) err = -ETIME; i915_request_put(rq); if (err) @@ -1419,7 +1419,7 @@ static int __live_parallel_spin(void *arg) } igt_spinner_end(&spin); - if (err == 0 && i915_request_wait(rq, 0, HZ / 5) < 0) + if (err == 0 && i915_request_wait(rq, 0, HZ) < 0) err = -EIO; i915_request_put(rq); diff --git a/drivers/gpu/drm/i915/selftests/i915_selftest.c b/drivers/gpu/drm/i915/selftests/i915_selftest.c index 1bc11c09faef..484759c9409c 100644 --- a/drivers/gpu/drm/i915/selftests/i915_selftest.c +++ b/drivers/gpu/drm/i915/selftests/i915_selftest.c @@ -187,7 +187,7 @@ int i915_mock_selftests(void) err = run_selftests(mock, NULL); if (err) { i915_selftest.mock = err; - return err; + return 1; } if (i915_selftest.mock < 0) { @@ -430,7 +430,7 @@ module_param_named(st_timeout, i915_selftest.timeout_ms, uint, 0400); module_param_named(st_filter, i915_selftest.filter, charp, 0400); module_param_named_unsafe(mock_selftests, i915_selftest.mock, int, 0400); -MODULE_PARM_DESC(mock_selftests, "Run selftests before loading, using mock hardware (0:disabled [default], 1:run tests then load driver, -1:run tests then exit module)"); +MODULE_PARM_DESC(mock_selftests, "Run selftests before loading, using mock hardware (0:disabled [default], 1:run tests then load driver, -1:run tests then leave dummy module)"); module_param_named_unsafe(live_selftests, i915_selftest.live, int, 0400); MODULE_PARM_DESC(live_selftests, "Run selftests after driver initialisation on the live system (0:disabled [default], 1:run tests then continue, -1:run tests then exit module)"); diff --git a/drivers/gpu/drm/i915/selftests/igt_flush_test.c b/drivers/gpu/drm/i915/selftests/igt_flush_test.c index 7b0939e3f007..a6c71fca61aa 100644 --- a/drivers/gpu/drm/i915/selftests/igt_flush_test.c +++ b/drivers/gpu/drm/i915/selftests/igt_flush_test.c @@ -19,7 +19,7 @@ int igt_flush_test(struct drm_i915_private *i915) cond_resched(); - if (intel_gt_wait_for_idle(gt, HZ / 5) == -ETIME) { + if (intel_gt_wait_for_idle(gt, HZ) == -ETIME) { pr_err("%pS timed out, cancelling all further testing.\n", __builtin_return_address(0)); diff --git a/drivers/gpu/drm/i915/selftests/igt_live_test.c b/drivers/gpu/drm/i915/selftests/igt_live_test.c index c130010a7033..1c721542e277 100644 --- a/drivers/gpu/drm/i915/selftests/igt_live_test.c +++ b/drivers/gpu/drm/i915/selftests/igt_live_test.c @@ -5,7 +5,7 @@ */ #include "i915_drv.h" -#include "gt/intel_gt_requests.h" +#include "gt/intel_gt.h" #include "../i915_selftest.h" #include "igt_flush_test.h" diff --git a/drivers/gpu/drm/i915/selftests/igt_mmap.c b/drivers/gpu/drm/i915/selftests/igt_mmap.c index 583a4ff8b8c9..e920a461bd36 100644 --- a/drivers/gpu/drm/i915/selftests/igt_mmap.c +++ b/drivers/gpu/drm/i915/selftests/igt_mmap.c @@ -9,15 +9,28 @@ #include "i915_drv.h" #include "igt_mmap.h" -unsigned long igt_mmap_node(struct drm_i915_private *i915, - struct drm_vma_offset_node *node, - unsigned long addr, - unsigned long prot, - unsigned long flags) +unsigned long igt_mmap_offset(struct drm_i915_private *i915, + u64 offset, + unsigned long size, + unsigned long prot, + unsigned long flags) { + struct drm_vma_offset_node *node; struct file *file; + unsigned long addr; int err; + /* no need to refcount, we own this object */ + drm_vma_offset_lock_lookup(i915->drm.vma_offset_manager); + node = drm_vma_offset_exact_lookup_locked(i915->drm.vma_offset_manager, + offset / PAGE_SIZE, size / PAGE_SIZE); + drm_vma_offset_unlock_lookup(i915->drm.vma_offset_manager); + + if (GEM_WARN_ON(!node)) { + pr_info("Failed to lookup %llx\n", offset); + return -ENOENT; + } + /* Pretend to open("/dev/dri/card0") */ file = mock_drm_getfile(i915->drm.primary, O_RDWR); if (IS_ERR(file)) @@ -29,7 +42,7 @@ unsigned long igt_mmap_node(struct drm_i915_private *i915, goto out_file; } - addr = vm_mmap(file, addr, drm_vma_node_size(node) << PAGE_SHIFT, + addr = vm_mmap(file, 0, drm_vma_node_size(node) << PAGE_SHIFT, prot, flags, drm_vma_node_offset_addr(node)); drm_vma_node_revoke(node, file->private_data); diff --git a/drivers/gpu/drm/i915/selftests/igt_mmap.h b/drivers/gpu/drm/i915/selftests/igt_mmap.h index 6e716cb59d7e..acbe34d81a6d 100644 --- a/drivers/gpu/drm/i915/selftests/igt_mmap.h +++ b/drivers/gpu/drm/i915/selftests/igt_mmap.h @@ -7,13 +7,15 @@ #ifndef IGT_MMAP_H #define IGT_MMAP_H +#include <linux/types.h> + struct drm_i915_private; struct drm_vma_offset_node; -unsigned long igt_mmap_node(struct drm_i915_private *i915, - struct drm_vma_offset_node *node, - unsigned long addr, - unsigned long prot, - unsigned long flags); +unsigned long igt_mmap_offset(struct drm_i915_private *i915, + u64 offset, + unsigned long size, + unsigned long prot, + unsigned long flags); #endif /* IGT_MMAP_H */ diff --git a/drivers/gpu/drm/i915/selftests/intel_memory_region.c b/drivers/gpu/drm/i915/selftests/intel_memory_region.c index c85d516b85cd..418caae84759 100644 --- a/drivers/gpu/drm/i915/selftests/intel_memory_region.c +++ b/drivers/gpu/drm/i915/selftests/intel_memory_region.c @@ -15,12 +15,15 @@ #include "gem/i915_gem_context.h" #include "gem/i915_gem_lmem.h" #include "gem/i915_gem_region.h" -#include "gem/i915_gem_object_blt.h" #include "gem/selftests/igt_gem_utils.h" #include "gem/selftests/mock_context.h" +#include "gt/intel_engine_pm.h" #include "gt/intel_engine_user.h" #include "gt/intel_gt.h" +#include "i915_buddy.h" +#include "gt/intel_migrate.h" #include "i915_memcpy.h" +#include "i915_ttm_buddy_manager.h" #include "selftests/igt_flush_test.h" #include "selftests/i915_random.h" @@ -57,16 +60,15 @@ static int igt_mock_fill(void *arg) LIST_HEAD(objects); int err = 0; - page_size = mem->chunk_size; + page_size = PAGE_SIZE; + max_pages = div64_u64(total, page_size); rem = total; -retry: - max_pages = div64_u64(rem, page_size); for_each_prime_number_from(page_num, 1, max_pages) { resource_size_t size = page_num * page_size; struct drm_i915_gem_object *obj; - obj = i915_gem_object_create_region(mem, size, 0); + obj = i915_gem_object_create_region(mem, size, 0, 0); if (IS_ERR(obj)) { err = PTR_ERR(obj); break; @@ -86,11 +88,6 @@ retry: err = 0; if (err == -ENXIO) { if (page_num * page_size <= rem) { - if (mem->is_range_manager && max_pages > 1) { - max_pages >>= 1; - goto retry; - } - pr_err("%s failed, space still left in region\n", __func__); err = -EINVAL; @@ -113,7 +110,7 @@ igt_object_create(struct intel_memory_region *mem, struct drm_i915_gem_object *obj; int err; - obj = i915_gem_object_create_region(mem, size, flags); + obj = i915_gem_object_create_region(mem, size, 0, flags); if (IS_ERR(obj)) return obj; @@ -157,6 +154,7 @@ static bool is_contiguous(struct drm_i915_gem_object *obj) static int igt_mock_reserve(void *arg) { struct intel_memory_region *mem = arg; + struct drm_i915_private *i915 = mem->i915; resource_size_t avail = resource_size(&mem->region); struct drm_i915_gem_object *obj; const u32 chunk_size = SZ_32M; @@ -166,16 +164,18 @@ static int igt_mock_reserve(void *arg) LIST_HEAD(objects); int err = 0; - if (!list_empty(&mem->reserved)) { - pr_err("%s region reserved list is not empty\n", __func__); - return -EINVAL; - } - count = avail / chunk_size; order = i915_random_order(count, &prng); if (!order) return 0; + mem = mock_region_create(i915, 0, SZ_2G, I915_GTT_PAGE_SIZE_4K, 0); + if (IS_ERR(mem)) { + pr_err("failed to create memory region\n"); + err = PTR_ERR(mem); + goto out_free_order; + } + /* Reserve a bunch of ranges within the region */ for (i = 0; i < count; ++i) { u64 start = order[i] * chunk_size; @@ -205,18 +205,12 @@ static int igt_mock_reserve(void *arg) do { u32 size = i915_prandom_u32_max_state(cur_avail, &prng); -retry: size = max_t(u32, round_up(size, PAGE_SIZE), PAGE_SIZE); obj = igt_object_create(mem, &objects, size, 0); if (IS_ERR(obj)) { - if (PTR_ERR(obj) == -ENXIO) { - if (mem->is_range_manager && - size > mem->chunk_size) { - size >>= 1; - goto retry; - } + if (PTR_ERR(obj) == -ENXIO) break; - } + err = PTR_ERR(obj); goto out_close; } @@ -230,9 +224,10 @@ retry: } out_close: - kfree(order); close_objects(mem, &objects); - intel_memory_region_unreserve(mem); + intel_memory_region_put(mem); +out_free_order: + kfree(order); return err; } @@ -252,7 +247,7 @@ static int igt_mock_contiguous(void *arg) total = resource_size(&mem->region); /* Min size */ - obj = igt_object_create(mem, &objects, mem->chunk_size, + obj = igt_object_create(mem, &objects, PAGE_SIZE, I915_BO_ALLOC_CONTIGUOUS); if (IS_ERR(obj)) return PTR_ERR(obj); @@ -333,17 +328,15 @@ static int igt_mock_contiguous(void *arg) min = target; target = total >> 1; - if (!mem->is_range_manager) { - /* Make sure we can still allocate all the fragmented space */ - obj = igt_object_create(mem, &objects, target, 0); - if (IS_ERR(obj)) { - err = PTR_ERR(obj); - goto err_close_objects; - } - - igt_object_release(obj); + /* Make sure we can still allocate all the fragmented space */ + obj = igt_object_create(mem, &objects, target, 0); + if (IS_ERR(obj)) { + err = PTR_ERR(obj); + goto err_close_objects; } + igt_object_release(obj); + /* * Even though we have enough free space, we don't have a big enough * contiguous block. Make sure that holds true. @@ -362,7 +355,7 @@ static int igt_mock_contiguous(void *arg) } target >>= 1; - } while (target >= mem->chunk_size); + } while (target >= PAGE_SIZE); err_close_objects: list_splice_tail(&holes, &objects); @@ -374,7 +367,9 @@ static int igt_mock_splintered_region(void *arg) { struct intel_memory_region *mem = arg; struct drm_i915_private *i915 = mem->i915; + struct i915_ttm_buddy_resource *res; struct drm_i915_gem_object *obj; + struct i915_buddy_mm *mm; unsigned int expected_order; LIST_HEAD(objects); u64 size; @@ -382,7 +377,7 @@ static int igt_mock_splintered_region(void *arg) /* * Sanity check we can still allocate everything even if the - * max_order != mm.size. i.e our starting address space size is not a + * mm.max_order != mm.size. i.e our starting address space size is not a * power-of-two. */ @@ -391,20 +386,29 @@ static int igt_mock_splintered_region(void *arg) if (IS_ERR(mem)) return PTR_ERR(mem); - expected_order = get_order(rounddown_pow_of_two(size)); - if (mem->max_order != expected_order) { - pr_err("%s order mismatch(%u != %u)\n", - __func__, mem->max_order, expected_order); - err = -EINVAL; - goto out_put; - } - obj = igt_object_create(mem, &objects, size, 0); if (IS_ERR(obj)) { err = PTR_ERR(obj); goto out_close; } + res = to_ttm_buddy_resource(obj->mm.res); + mm = res->mm; + if (mm->size != size) { + pr_err("%s size mismatch(%llu != %llu)\n", + __func__, mm->size, size); + err = -EINVAL; + goto out_put; + } + + expected_order = get_order(rounddown_pow_of_two(size)); + if (mm->max_order != expected_order) { + pr_err("%s order mismatch(%u != %u)\n", + __func__, mm->max_order, expected_order); + err = -EINVAL; + goto out_put; + } + close_objects(mem, &objects); /* @@ -415,15 +419,12 @@ static int igt_mock_splintered_region(void *arg) * sure that does indeed hold true. */ - if (!mem->is_range_manager) { - obj = igt_object_create(mem, &objects, size, - I915_BO_ALLOC_CONTIGUOUS); - if (!IS_ERR(obj)) { - pr_err("%s too large contiguous allocation was not rejected\n", - __func__); - err = -EINVAL; - goto out_close; - } + obj = igt_object_create(mem, &objects, size, I915_BO_ALLOC_CONTIGUOUS); + if (!IS_ERR(obj)) { + pr_err("%s too large contiguous allocation was not rejected\n", + __func__); + err = -EINVAL; + goto out_close; } obj = igt_object_create(mem, &objects, rounddown_pow_of_two(size), @@ -442,6 +443,74 @@ out_put: return err; } +#ifndef SZ_8G +#define SZ_8G BIT_ULL(33) +#endif + +static int igt_mock_max_segment(void *arg) +{ + const unsigned int max_segment = rounddown(UINT_MAX, PAGE_SIZE); + struct intel_memory_region *mem = arg; + struct drm_i915_private *i915 = mem->i915; + struct i915_ttm_buddy_resource *res; + struct drm_i915_gem_object *obj; + struct i915_buddy_block *block; + struct i915_buddy_mm *mm; + struct list_head *blocks; + struct scatterlist *sg; + LIST_HEAD(objects); + u64 size; + int err = 0; + + /* + * While we may create very large contiguous blocks, we may need + * to break those down for consumption elsewhere. In particular, + * dma-mapping with scatterlist elements have an implicit limit of + * UINT_MAX on each element. + */ + + size = SZ_8G; + mem = mock_region_create(i915, 0, size, PAGE_SIZE, 0); + if (IS_ERR(mem)) + return PTR_ERR(mem); + + obj = igt_object_create(mem, &objects, size, 0); + if (IS_ERR(obj)) { + err = PTR_ERR(obj); + goto out_put; + } + + res = to_ttm_buddy_resource(obj->mm.res); + blocks = &res->blocks; + mm = res->mm; + size = 0; + list_for_each_entry(block, blocks, link) { + if (i915_buddy_block_size(mm, block) > size) + size = i915_buddy_block_size(mm, block); + } + if (size < max_segment) { + pr_err("%s: Failed to create a huge contiguous block [> %u], largest block %lld\n", + __func__, max_segment, size); + err = -EINVAL; + goto out_close; + } + + for (sg = obj->mm.pages->sgl; sg; sg = sg_next(sg)) { + if (sg->length > max_segment) { + pr_err("%s: Created an oversized scatterlist entry, %u > %u\n", + __func__, sg->length, max_segment); + err = -EINVAL; + goto out_close; + } + } + +out_close: + close_objects(mem, &objects); +out_put: + intel_memory_region_put(mem); + return err; +} + static int igt_gpu_write_dw(struct intel_context *ce, struct i915_vma *vma, u32 dword, @@ -579,6 +648,62 @@ out_put: return err; } +static int igt_lmem_create_with_ps(void *arg) +{ + struct drm_i915_private *i915 = arg; + int err = 0; + u32 ps; + + for (ps = PAGE_SIZE; ps <= SZ_1G; ps <<= 1) { + struct drm_i915_gem_object *obj; + dma_addr_t daddr; + + obj = __i915_gem_object_create_lmem_with_ps(i915, ps, ps, 0); + if (IS_ERR(obj)) { + err = PTR_ERR(obj); + if (err == -ENXIO || err == -E2BIG) { + pr_info("%s not enough lmem for ps(%u) err=%d\n", + __func__, ps, err); + err = 0; + } + + break; + } + + if (obj->base.size != ps) { + pr_err("%s size(%zu) != ps(%u)\n", + __func__, obj->base.size, ps); + err = -EINVAL; + goto out_put; + } + + i915_gem_object_lock(obj, NULL); + err = i915_gem_object_pin_pages(obj); + if (err) + goto out_put; + + daddr = i915_gem_object_get_dma_address(obj, 0); + if (!IS_ALIGNED(daddr, ps)) { + pr_err("%s daddr(%pa) not aligned with ps(%u)\n", + __func__, &daddr, ps); + err = -EINVAL; + goto out_unpin; + } + +out_unpin: + i915_gem_object_unpin_pages(obj); + __i915_gem_object_put_pages(obj); +out_put: + i915_gem_object_unlock(obj); + i915_gem_object_put(obj); + + if (err) + break; + } + + return err; +} + static int igt_lmem_create_cleared_cpu(void *arg) { struct drm_i915_private *i915 = arg; @@ -741,6 +866,7 @@ static int igt_lmem_write_cpu(void *arg) PAGE_SIZE - 64, }; struct intel_engine_cs *engine; + struct i915_request *rq; u32 *vaddr; u32 sz; u32 i; @@ -767,15 +893,20 @@ static int igt_lmem_write_cpu(void *arg) goto out_put; } + i915_gem_object_lock(obj, NULL); /* Put the pages into a known state -- from the gpu for added fun */ intel_engine_pm_get(engine); - err = i915_gem_object_fill_blt(obj, engine->kernel_context, 0xdeadbeaf); - intel_engine_pm_put(engine); - if (err) - goto out_unpin; + err = intel_context_migrate_clear(engine->gt->migrate.context, NULL, + obj->mm.pages->sgl, I915_CACHE_NONE, + true, 0xdeadbeaf, &rq); + if (rq) { + dma_resv_add_excl_fence(obj->base.resv, &rq->fence); + i915_request_put(rq); + } - i915_gem_object_lock(obj, NULL); - err = i915_gem_object_set_to_wc_domain(obj, true); + intel_engine_pm_put(engine); + if (!err) + err = i915_gem_object_set_to_wc_domain(obj, true); i915_gem_object_unlock(obj); if (err) goto out_unpin; @@ -858,7 +989,7 @@ create_region_for_mapping(struct intel_memory_region *mr, u64 size, u32 type, struct drm_i915_gem_object *obj; void *addr; - obj = i915_gem_object_create_region(mr, size, 0); + obj = i915_gem_object_create_region(mr, size, 0, 0); if (IS_ERR(obj)) { if (PTR_ERR(obj) == -ENOSPC) /* Stolen memory */ return ERR_PTR(-ENODEV); @@ -1046,6 +1177,7 @@ int intel_memory_region_mock_selftests(void) SUBTEST(igt_mock_fill), SUBTEST(igt_mock_contiguous), SUBTEST(igt_mock_splintered_region), + SUBTEST(igt_mock_max_segment), }; struct intel_memory_region *mem; struct drm_i915_private *i915; @@ -1074,6 +1206,7 @@ int intel_memory_region_live_selftests(struct drm_i915_private *i915) { static const struct i915_subtest tests[] = { SUBTEST(igt_lmem_create), + SUBTEST(igt_lmem_create_with_ps), SUBTEST(igt_lmem_create_cleared_cpu), SUBTEST(igt_lmem_write_cpu), SUBTEST(igt_lmem_write_gpu), diff --git a/drivers/gpu/drm/i915/selftests/intel_scheduler_helpers.c b/drivers/gpu/drm/i915/selftests/intel_scheduler_helpers.c new file mode 100644 index 000000000000..4b328346b48a --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/intel_scheduler_helpers.c @@ -0,0 +1,88 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2021 Intel Corporation + */ + +//#include "gt/intel_engine_user.h" +#include "gt/intel_gt.h" +#include "i915_drv.h" +#include "i915_selftest.h" + +#include "selftests/intel_scheduler_helpers.h" + +#define REDUCED_TIMESLICE 5 +#define REDUCED_PREEMPT 10 +#define WAIT_FOR_RESET_TIME 10000 + +int intel_selftest_modify_policy(struct intel_engine_cs *engine, + struct intel_selftest_saved_policy *saved, + u32 modify_type) + +{ + int err; + + saved->reset = engine->i915->params.reset; + saved->flags = engine->flags; + saved->timeslice = engine->props.timeslice_duration_ms; + saved->preempt_timeout = engine->props.preempt_timeout_ms; + + switch (modify_type) { + case SELFTEST_SCHEDULER_MODIFY_FAST_RESET: + /* + * Enable force pre-emption on time slice expiration + * together with engine reset on pre-emption timeout. + * This is required to make the GuC notice and reset + * the single hanging context. + * Also, reduce the preemption timeout to something + * small to speed the test up. + */ + engine->i915->params.reset = 2; + engine->flags |= I915_ENGINE_WANT_FORCED_PREEMPTION; + engine->props.timeslice_duration_ms = REDUCED_TIMESLICE; + engine->props.preempt_timeout_ms = REDUCED_PREEMPT; + break; + + case SELFTEST_SCHEDULER_MODIFY_NO_HANGCHECK: + engine->props.preempt_timeout_ms = 0; + break; + + default: + pr_err("Invalid scheduler policy modification type: %d!\n", modify_type); + return -EINVAL; + } + + if (!intel_engine_uses_guc(engine)) + return 0; + + err = intel_guc_global_policies_update(&engine->gt->uc.guc); + if (err) + intel_selftest_restore_policy(engine, saved); + + return err; +} + +int intel_selftest_restore_policy(struct intel_engine_cs *engine, + struct intel_selftest_saved_policy *saved) +{ + /* Restore the original policies */ + engine->i915->params.reset = saved->reset; + engine->flags = saved->flags; + engine->props.timeslice_duration_ms = saved->timeslice; + engine->props.preempt_timeout_ms = saved->preempt_timeout; + + if (!intel_engine_uses_guc(engine)) + return 0; + + return intel_guc_global_policies_update(&engine->gt->uc.guc); +} + +int intel_selftest_wait_for_rq(struct i915_request *rq) +{ + long ret; + + ret = i915_request_wait(rq, 0, WAIT_FOR_RESET_TIME); + if (ret < 0) + return ret; + + return 0; +} diff --git a/drivers/gpu/drm/i915/selftests/intel_scheduler_helpers.h b/drivers/gpu/drm/i915/selftests/intel_scheduler_helpers.h new file mode 100644 index 000000000000..35c098601ac0 --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/intel_scheduler_helpers.h @@ -0,0 +1,33 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2021 Intel Corporation + */ + +#ifndef _INTEL_SELFTEST_SCHEDULER_HELPERS_H_ +#define _INTEL_SELFTEST_SCHEDULER_HELPERS_H_ + +#include <linux/types.h> + +struct i915_request; +struct intel_engine_cs; + +struct intel_selftest_saved_policy { + u32 flags; + u32 reset; + u64 timeslice; + u64 preempt_timeout; +}; + +enum selftest_scheduler_modify { + SELFTEST_SCHEDULER_MODIFY_NO_HANGCHECK = 0, + SELFTEST_SCHEDULER_MODIFY_FAST_RESET, +}; + +int intel_selftest_modify_policy(struct intel_engine_cs *engine, + struct intel_selftest_saved_policy *saved, + enum selftest_scheduler_modify modify_type); +int intel_selftest_restore_policy(struct intel_engine_cs *engine, + struct intel_selftest_saved_policy *saved); +int intel_selftest_wait_for_rq(struct i915_request *rq); + +#endif diff --git a/drivers/gpu/drm/i915/selftests/intel_uncore.c b/drivers/gpu/drm/i915/selftests/intel_uncore.c index 8ef9e6a4ad05..720b60853f8b 100644 --- a/drivers/gpu/drm/i915/selftests/intel_uncore.c +++ b/drivers/gpu/drm/i915/selftests/intel_uncore.c @@ -68,6 +68,7 @@ static int intel_shadow_table_check(void) { gen8_shadowed_regs, ARRAY_SIZE(gen8_shadowed_regs) }, { gen11_shadowed_regs, ARRAY_SIZE(gen11_shadowed_regs) }, { gen12_shadowed_regs, ARRAY_SIZE(gen12_shadowed_regs) }, + { xehp_shadowed_regs, ARRAY_SIZE(xehp_shadowed_regs) }, }; const i915_reg_t *reg; unsigned int i, j; @@ -103,6 +104,7 @@ int intel_uncore_mock_selftests(void) { __gen9_fw_ranges, ARRAY_SIZE(__gen9_fw_ranges), true }, { __gen11_fw_ranges, ARRAY_SIZE(__gen11_fw_ranges), true }, { __gen12_fw_ranges, ARRAY_SIZE(__gen12_fw_ranges), true }, + { __xehp_fw_ranges, ARRAY_SIZE(__xehp_fw_ranges), true }, }; int err, i; diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c index d189c4bd4bef..4f8180146888 100644 --- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c +++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c @@ -52,7 +52,8 @@ void mock_device_flush(struct drm_i915_private *i915) do { for_each_engine(engine, gt, id) mock_engine_flush(engine); - } while (intel_gt_retire_requests_timeout(gt, MAX_SCHEDULE_TIMEOUT)); + } while (intel_gt_retire_requests_timeout(gt, MAX_SCHEDULE_TIMEOUT, + NULL)); } static void mock_device_release(struct drm_device *dev) diff --git a/drivers/gpu/drm/i915/selftests/mock_gtt.c b/drivers/gpu/drm/i915/selftests/mock_gtt.c index 5c7ae40bba63..cc047ec594f9 100644 --- a/drivers/gpu/drm/i915/selftests/mock_gtt.c +++ b/drivers/gpu/drm/i915/selftests/mock_gtt.c @@ -73,7 +73,6 @@ struct i915_ppgtt *mock_ppgtt(struct drm_i915_private *i915, const char *name) ppgtt->vm.gt = &i915->gt; ppgtt->vm.i915 = i915; ppgtt->vm.total = round_down(U64_MAX, PAGE_SIZE); - ppgtt->vm.file = ERR_PTR(-ENODEV); ppgtt->vm.dma = i915->drm.dev; i915_address_space_init(&ppgtt->vm, VM_CLASS_PPGTT); diff --git a/drivers/gpu/drm/i915/selftests/mock_region.c b/drivers/gpu/drm/i915/selftests/mock_region.c index eafc5a04975c..efa86dffe3c6 100644 --- a/drivers/gpu/drm/i915/selftests/mock_region.c +++ b/drivers/gpu/drm/i915/selftests/mock_region.c @@ -3,6 +3,7 @@ * Copyright © 2019-2021 Intel Corporation */ +#include <drm/ttm/ttm_placement.h> #include <linux/scatterlist.h> #include <drm/ttm/ttm_placement.h> @@ -16,7 +17,7 @@ static void mock_region_put_pages(struct drm_i915_gem_object *obj, struct sg_table *pages) { - intel_region_ttm_node_free(obj->mm.region, obj->mm.st_mm_node); + intel_region_ttm_resource_free(obj->mm.region, obj->mm.res); sg_free_table(pages); kfree(pages); } @@ -25,26 +26,31 @@ static int mock_region_get_pages(struct drm_i915_gem_object *obj) { unsigned int flags; struct sg_table *pages; + int err; - flags = I915_ALLOC_MIN_PAGE_SIZE; + flags = 0; if (obj->flags & I915_BO_ALLOC_CONTIGUOUS) - flags |= I915_ALLOC_CONTIGUOUS; + flags |= TTM_PL_FLAG_CONTIGUOUS; - obj->mm.st_mm_node = intel_region_ttm_node_alloc(obj->mm.region, - obj->base.size, - flags); - if (IS_ERR(obj->mm.st_mm_node)) - return PTR_ERR(obj->mm.st_mm_node); + obj->mm.res = intel_region_ttm_resource_alloc(obj->mm.region, + obj->base.size, + flags); + if (IS_ERR(obj->mm.res)) + return PTR_ERR(obj->mm.res); - pages = intel_region_ttm_node_to_st(obj->mm.region, obj->mm.st_mm_node); + pages = intel_region_ttm_resource_to_st(obj->mm.region, obj->mm.res); if (IS_ERR(pages)) { - intel_region_ttm_node_free(obj->mm.region, obj->mm.st_mm_node); - return PTR_ERR(pages); + err = PTR_ERR(pages); + goto err_free_resource; } __i915_gem_object_set_pages(obj, pages, i915_sg_dma_sizes(pages->sgl)); return 0; + +err_free_resource: + intel_region_ttm_resource_free(obj->mm.region, obj->mm.res); + return err; } static const struct drm_i915_gem_object_ops mock_region_obj_ops = { @@ -57,6 +63,7 @@ static const struct drm_i915_gem_object_ops mock_region_obj_ops = { static int mock_object_init(struct intel_memory_region *mem, struct drm_i915_gem_object *obj, resource_size_t size, + resource_size_t page_size, unsigned int flags) { static struct lock_class_key lock_class; diff --git a/drivers/gpu/drm/imx/dcss/dcss-kms.c b/drivers/gpu/drm/imx/dcss/dcss-kms.c index 37ae68a7fba5..9b84df34a6a1 100644 --- a/drivers/gpu/drm/imx/dcss/dcss-kms.c +++ b/drivers/gpu/drm/imx/dcss/dcss-kms.c @@ -93,11 +93,8 @@ static int dcss_kms_bridge_connector_init(struct dcss_kms_dev *kms) ret = drm_bridge_attach(encoder, bridge, NULL, DRM_BRIDGE_ATTACH_NO_CONNECTOR); - if (ret < 0) { - dev_err(ddev->dev, "Unable to attach bridge %pOF\n", - bridge->of_node); + if (ret < 0) return ret; - } kms->connector = drm_bridge_connector_init(ddev, encoder); if (IS_ERR(kms->connector)) { @@ -133,8 +130,6 @@ struct dcss_kms_dev *dcss_kms_attach(struct dcss_dev *dcss) if (ret) goto cleanup_mode_config; - drm->irq_enabled = true; - ret = dcss_kms_bridge_connector_init(kms); if (ret) goto cleanup_mode_config; @@ -178,7 +173,6 @@ void dcss_kms_detach(struct dcss_kms_dev *kms) drm_kms_helper_poll_fini(drm); drm_atomic_helper_shutdown(drm); drm_crtc_vblank_off(&kms->crtc.base); - drm->irq_enabled = false; drm_mode_config_cleanup(drm); dcss_crtc_deinit(&kms->crtc, drm); drm->dev_private = NULL; diff --git a/drivers/gpu/drm/imx/dcss/dcss-plane.c b/drivers/gpu/drm/imx/dcss/dcss-plane.c index 044d3bdf313c..ac45d54acd4e 100644 --- a/drivers/gpu/drm/imx/dcss/dcss-plane.c +++ b/drivers/gpu/drm/imx/dcss/dcss-plane.c @@ -361,7 +361,6 @@ static void dcss_plane_atomic_disable(struct drm_plane *plane, } static const struct drm_plane_helper_funcs dcss_plane_helper_funcs = { - .prepare_fb = drm_gem_plane_helper_prepare_fb, .atomic_check = dcss_plane_atomic_check, .atomic_update = dcss_plane_atomic_update, .atomic_disable = dcss_plane_atomic_disable, diff --git a/drivers/gpu/drm/imx/imx-drm-core.c b/drivers/gpu/drm/imx/imx-drm-core.c index 76819a8ac37f..9558e9e1b431 100644 --- a/drivers/gpu/drm/imx/imx-drm-core.c +++ b/drivers/gpu/drm/imx/imx-drm-core.c @@ -208,17 +208,6 @@ static int imx_drm_bind(struct device *dev) return PTR_ERR(drm); /* - * enable drm irq mode. - * - with irq_enabled = true, we can use the vblank feature. - * - * P.S. note that we wouldn't use drm irq handler but - * just specific driver own one instead because - * drm framework supports only one irq handler and - * drivers can well take care of their interrupts - */ - drm->irq_enabled = true; - - /* * set max width and height as default value(4096x4096). * this value would be used to check framebuffer size limitation * at drm_mode_addfb(). diff --git a/drivers/gpu/drm/imx/imx-ldb.c b/drivers/gpu/drm/imx/imx-ldb.c index 53132ddf9587..e5078d03020d 100644 --- a/drivers/gpu/drm/imx/imx-ldb.c +++ b/drivers/gpu/drm/imx/imx-ldb.c @@ -465,10 +465,8 @@ static int imx_ldb_register(struct drm_device *drm, if (imx_ldb_ch->bridge) { ret = drm_bridge_attach(encoder, imx_ldb_ch->bridge, NULL, 0); - if (ret) { - DRM_ERROR("Failed to initialize bridge with drm\n"); + if (ret) return ret; - } } else { /* * We want to add the connector whenever there is no bridge diff --git a/drivers/gpu/drm/imx/ipuv3-plane.c b/drivers/gpu/drm/imx/ipuv3-plane.c index bd1f9f0366d3..846c1aae69c8 100644 --- a/drivers/gpu/drm/imx/ipuv3-plane.c +++ b/drivers/gpu/drm/imx/ipuv3-plane.c @@ -772,7 +772,6 @@ static void ipu_plane_atomic_update(struct drm_plane *plane, } static const struct drm_plane_helper_funcs ipu_plane_helper_funcs = { - .prepare_fb = drm_gem_plane_helper_prepare_fb, .atomic_check = ipu_plane_atomic_check, .atomic_disable = ipu_plane_atomic_disable, .atomic_update = ipu_plane_atomic_update, diff --git a/drivers/gpu/drm/imx/parallel-display.c b/drivers/gpu/drm/imx/parallel-display.c index e0412e694fd9..a8aba0141ce7 100644 --- a/drivers/gpu/drm/imx/parallel-display.c +++ b/drivers/gpu/drm/imx/parallel-display.c @@ -294,11 +294,8 @@ static int imx_pd_bind(struct device *dev, struct device *master, void *data) if (imxpd->next_bridge) { ret = drm_bridge_attach(encoder, imxpd->next_bridge, bridge, 0); - if (ret < 0) { - dev_err(imxpd->dev, "failed to attach bridge: %d\n", - ret); + if (ret < 0) return ret; - } } else { drm_connector_helper_add(connector, &imx_pd_connector_helper_funcs); diff --git a/drivers/gpu/drm/ingenic/ingenic-drm-drv.c b/drivers/gpu/drm/ingenic/ingenic-drm-drv.c index 5244f4763477..a5df1c8d34cd 100644 --- a/drivers/gpu/drm/ingenic/ingenic-drm-drv.c +++ b/drivers/gpu/drm/ingenic/ingenic-drm-drv.c @@ -33,7 +33,6 @@ #include <drm/drm_fourcc.h> #include <drm/drm_gem_atomic_helper.h> #include <drm/drm_gem_framebuffer_helper.h> -#include <drm/drm_irq.h> #include <drm/drm_managed.h> #include <drm/drm_of.h> #include <drm/drm_panel.h> @@ -702,29 +701,6 @@ static int ingenic_drm_encoder_atomic_check(struct drm_encoder *encoder, } } -static void ingenic_drm_atomic_helper_commit_tail(struct drm_atomic_state *old_state) -{ - /* - * Just your regular drm_atomic_helper_commit_tail(), but only calls - * drm_atomic_helper_wait_for_vblanks() if priv->no_vblank. - */ - struct drm_device *dev = old_state->dev; - struct ingenic_drm *priv = drm_device_get_priv(dev); - - drm_atomic_helper_commit_modeset_disables(dev, old_state); - - drm_atomic_helper_commit_planes(dev, old_state, 0); - - drm_atomic_helper_commit_modeset_enables(dev, old_state); - - drm_atomic_helper_commit_hw_done(old_state); - - if (!priv->no_vblank) - drm_atomic_helper_wait_for_vblanks(dev, old_state); - - drm_atomic_helper_cleanup_planes(dev, old_state); -} - static irqreturn_t ingenic_drm_irq_handler(int irq, void *arg) { struct ingenic_drm *priv = drm_device_get_priv(arg); @@ -745,6 +721,9 @@ static int ingenic_drm_enable_vblank(struct drm_crtc *crtc) { struct ingenic_drm *priv = drm_crtc_get_priv(crtc); + if (priv->no_vblank) + return -EINVAL; + regmap_update_bits(priv->map, JZ_REG_LCD_CTRL, JZ_LCD_CTRL_EOF_IRQ, JZ_LCD_CTRL_EOF_IRQ); @@ -799,8 +778,6 @@ static const struct drm_driver ingenic_drm_driver_data = { .fops = &ingenic_drm_fops, .gem_create_object = ingenic_drm_gem_create_object, DRM_GEM_CMA_DRIVER_OPS, - - .irq_handler = ingenic_drm_irq_handler, }; static const struct drm_plane_funcs ingenic_drm_primary_plane_funcs = { @@ -830,7 +807,6 @@ static const struct drm_plane_helper_funcs ingenic_drm_plane_helper_funcs = { .atomic_update = ingenic_drm_plane_atomic_update, .atomic_check = ingenic_drm_plane_atomic_check, .atomic_disable = ingenic_drm_plane_atomic_disable, - .prepare_fb = drm_gem_plane_helper_prepare_fb, }; static const struct drm_crtc_helper_funcs ingenic_drm_crtc_helper_funcs = { @@ -855,7 +831,7 @@ static const struct drm_mode_config_funcs ingenic_drm_mode_config_funcs = { }; static struct drm_mode_config_helper_funcs ingenic_drm_mode_config_helpers = { - .atomic_commit_tail = ingenic_drm_atomic_helper_commit_tail, + .atomic_commit_tail = drm_atomic_helper_commit_tail, }; static void ingenic_drm_unbind_all(void *d) @@ -988,9 +964,6 @@ static int ingenic_drm_bind(struct device *dev, bool has_components) priv->dma_hwdescs->hwdesc_pal.cmd = JZ_LCD_CMD_ENABLE_PAL | (sizeof(priv->dma_hwdescs->palette) / 4); - if (soc_info->has_osd) - priv->ipu_plane = drm_plane_from_index(drm, 0); - primary = priv->soc_info->has_osd ? &priv->f1 : &priv->f0; drm_plane_helper_add(primary, &ingenic_drm_plane_helper_funcs); @@ -1085,10 +1058,8 @@ static int ingenic_drm_bind(struct device *dev, bool has_components) drm_encoder_helper_add(encoder, &ingenic_drm_encoder_helper_funcs); ret = drm_bridge_attach(encoder, bridge, NULL, 0); - if (ret) { - dev_err(dev, "Unable to attach bridge\n"); + if (ret) return ret; - } } drm_for_each_encoder(encoder, drm) { @@ -1099,7 +1070,7 @@ static int ingenic_drm_bind(struct device *dev, bool has_components) encoder->possible_clones = clone_mask; } - ret = drm_irq_install(drm, irq); + ret = devm_request_irq(dev, irq, ingenic_drm_irq_handler, 0, drm->driver->name, drm); if (ret) { dev_err(dev, "Unable to install IRQ handler\n"); return ret; diff --git a/drivers/gpu/drm/ingenic/ingenic-ipu.c b/drivers/gpu/drm/ingenic/ingenic-ipu.c index 61b6d9fdbba1..aeb8a757d213 100644 --- a/drivers/gpu/drm/ingenic/ingenic-ipu.c +++ b/drivers/gpu/drm/ingenic/ingenic-ipu.c @@ -625,7 +625,6 @@ static const struct drm_plane_helper_funcs ingenic_ipu_plane_helper_funcs = { .atomic_update = ingenic_ipu_plane_atomic_update, .atomic_check = ingenic_ipu_plane_atomic_check, .atomic_disable = ingenic_ipu_plane_atomic_disable, - .prepare_fb = drm_gem_plane_helper_prepare_fb, }; static int diff --git a/drivers/gpu/drm/kmb/kmb_drv.c b/drivers/gpu/drm/kmb/kmb_drv.c index f54392ec4fab..1c2f4799f421 100644 --- a/drivers/gpu/drm/kmb/kmb_drv.c +++ b/drivers/gpu/drm/kmb/kmb_drv.c @@ -17,7 +17,6 @@ #include <drm/drm_drv.h> #include <drm/drm_gem_cma_helper.h> #include <drm/drm_gem_framebuffer_helper.h> -#include <drm/drm_irq.h> #include <drm/drm_probe_helper.h> #include <drm/drm_vblank.h> @@ -413,14 +412,29 @@ static void kmb_irq_reset(struct drm_device *drm) kmb_write_lcd(to_kmb(drm), LCD_INT_ENABLE, 0); } +static int kmb_irq_install(struct drm_device *drm, unsigned int irq) +{ + if (irq == IRQ_NOTCONNECTED) + return -ENOTCONN; + + kmb_irq_reset(drm); + + return request_irq(irq, kmb_isr, 0, drm->driver->name, drm); +} + +static void kmb_irq_uninstall(struct drm_device *drm) +{ + struct kmb_drm_private *kmb = to_kmb(drm); + + kmb_irq_reset(drm); + free_irq(kmb->irq_lcd, drm); +} + DEFINE_DRM_GEM_CMA_FOPS(fops); static const struct drm_driver kmb_driver = { .driver_features = DRIVER_GEM | DRIVER_MODESET | DRIVER_ATOMIC, - .irq_handler = kmb_isr, - .irq_preinstall = kmb_irq_reset, - .irq_uninstall = kmb_irq_reset, /* GEM Operations */ .fops = &fops, DRM_GEM_CMA_DRIVER_OPS_VMAP, @@ -442,7 +456,7 @@ static int kmb_remove(struct platform_device *pdev) of_node_put(kmb->crtc.port); kmb->crtc.port = NULL; pm_runtime_get_sync(drm->dev); - drm_irq_uninstall(drm); + kmb_irq_uninstall(drm); pm_runtime_put_sync(drm->dev); pm_runtime_disable(drm->dev); @@ -532,7 +546,7 @@ static int kmb_probe(struct platform_device *pdev) if (ret) goto err_free; - ret = drm_irq_install(&kmb->drm, kmb->irq_lcd); + ret = kmb_irq_install(&kmb->drm, kmb->irq_lcd); if (ret < 0) { drm_err(&kmb->drm, "failed to install IRQ handler\n"); goto err_irq; diff --git a/drivers/gpu/drm/kmb/kmb_dsi.c b/drivers/gpu/drm/kmb/kmb_dsi.c index 231041b269f5..1793cd31b117 100644 --- a/drivers/gpu/drm/kmb/kmb_dsi.c +++ b/drivers/gpu/drm/kmb/kmb_dsi.c @@ -1441,7 +1441,6 @@ int kmb_dsi_encoder_init(struct drm_device *dev, struct kmb_dsi *kmb_dsi) ret = drm_bridge_attach(encoder, adv_bridge, NULL, DRM_BRIDGE_ATTACH_NO_CONNECTOR); if (ret) { - DRM_ERROR("failed to attach bridge to MIPI\n"); drm_encoder_cleanup(encoder); return ret; } diff --git a/drivers/gpu/drm/lima/lima_sched.c b/drivers/gpu/drm/lima/lima_sched.c index ecf3267334ff..dba8329937a3 100644 --- a/drivers/gpu/drm/lima/lima_sched.c +++ b/drivers/gpu/drm/lima/lima_sched.c @@ -508,7 +508,8 @@ int lima_sched_pipe_init(struct lima_sched_pipe *pipe, const char *name) INIT_WORK(&pipe->recover_work, lima_sched_recover_work); return drm_sched_init(&pipe->base, &lima_sched_ops, 1, - lima_job_hang_limit, msecs_to_jiffies(timeout), + lima_job_hang_limit, + msecs_to_jiffies(timeout), NULL, NULL, name); } diff --git a/drivers/gpu/drm/mcde/mcde_display.c b/drivers/gpu/drm/mcde/mcde_display.c index 4ddc55d58f38..ce12a36e2db4 100644 --- a/drivers/gpu/drm/mcde/mcde_display.c +++ b/drivers/gpu/drm/mcde/mcde_display.c @@ -1479,7 +1479,6 @@ static struct drm_simple_display_pipe_funcs mcde_display_funcs = { .update = mcde_display_update, .enable_vblank = mcde_display_enable_vblank, .disable_vblank = mcde_display_disable_vblank, - .prepare_fb = drm_gem_simple_display_pipe_prepare_fb, }; int mcde_display_init(struct drm_device *drm) diff --git a/drivers/gpu/drm/mcde/mcde_dsi.c b/drivers/gpu/drm/mcde/mcde_dsi.c index 34a00d7e9c38..180ebbccbeda 100644 --- a/drivers/gpu/drm/mcde/mcde_dsi.c +++ b/drivers/gpu/drm/mcde/mcde_dsi.c @@ -760,7 +760,7 @@ static void mcde_dsi_start(struct mcde_dsi *d) DSI_MCTL_MAIN_DATA_CTL_BTA_EN | DSI_MCTL_MAIN_DATA_CTL_READ_EN | DSI_MCTL_MAIN_DATA_CTL_REG_TE_EN; - if (!(d->mdsi->mode_flags & MIPI_DSI_MODE_EOT_PACKET)) + if (!(d->mdsi->mode_flags & MIPI_DSI_MODE_NO_EOT_PACKET)) val |= DSI_MCTL_MAIN_DATA_CTL_HOST_EOT_GEN; writel(val, d->regs + DSI_MCTL_MAIN_DATA_CTL); @@ -1052,7 +1052,6 @@ static int mcde_dsi_bridge_attach(struct drm_bridge *bridge, { struct mcde_dsi *d = bridge_to_mcde_dsi(bridge); struct drm_device *drm = bridge->dev; - int ret; if (!drm_core_check_feature(drm, DRIVER_ATOMIC)) { dev_err(d->dev, "we need atomic updates\n"); @@ -1060,13 +1059,7 @@ static int mcde_dsi_bridge_attach(struct drm_bridge *bridge, } /* Attach the DSI bridge to the output (panel etc) bridge */ - ret = drm_bridge_attach(bridge->encoder, d->bridge_out, bridge, flags); - if (ret) { - dev_err(d->dev, "failed to attach the DSI bridge\n"); - return ret; - } - - return 0; + return drm_bridge_attach(bridge->encoder, d->bridge_out, bridge, flags); } static const struct drm_bridge_funcs mcde_dsi_bridge_funcs = { diff --git a/drivers/gpu/drm/mediatek/Makefile b/drivers/gpu/drm/mediatek/Makefile index dc54a7a69005..29098d7c8307 100644 --- a/drivers/gpu/drm/mediatek/Makefile +++ b/drivers/gpu/drm/mediatek/Makefile @@ -1,6 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 -mediatek-drm-y := mtk_disp_ccorr.o \ +mediatek-drm-y := mtk_disp_aal.o \ + mtk_disp_ccorr.o \ mtk_disp_color.o \ mtk_disp_gamma.o \ mtk_disp_ovl.o \ diff --git a/drivers/gpu/drm/mediatek/mtk_disp_aal.c b/drivers/gpu/drm/mediatek/mtk_disp_aal.c new file mode 100644 index 000000000000..f46d4ab73d6a --- /dev/null +++ b/drivers/gpu/drm/mediatek/mtk_disp_aal.c @@ -0,0 +1,169 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2021 MediaTek Inc. + */ + +#include <linux/clk.h> +#include <linux/component.h> +#include <linux/module.h> +#include <linux/of_device.h> +#include <linux/of_irq.h> +#include <linux/platform_device.h> +#include <linux/soc/mediatek/mtk-cmdq.h> + +#include "mtk_disp_drv.h" +#include "mtk_drm_crtc.h" +#include "mtk_drm_ddp_comp.h" + +#define DISP_AAL_EN 0x0000 +#define AAL_EN BIT(0) +#define DISP_AAL_SIZE 0x0030 +#define DISP_AAL_OUTPUT_SIZE 0x04d8 + + +struct mtk_disp_aal_data { + bool has_gamma; +}; + +/** + * struct mtk_disp_aal - DISP_AAL driver structure + * @ddp_comp - structure containing type enum and hardware resources + * @crtc - associated crtc to report irq events to + */ +struct mtk_disp_aal { + struct clk *clk; + void __iomem *regs; + struct cmdq_client_reg cmdq_reg; + const struct mtk_disp_aal_data *data; +}; + +int mtk_aal_clk_enable(struct device *dev) +{ + struct mtk_disp_aal *aal = dev_get_drvdata(dev); + + return clk_prepare_enable(aal->clk); +} + +void mtk_aal_clk_disable(struct device *dev) +{ + struct mtk_disp_aal *aal = dev_get_drvdata(dev); + + clk_disable_unprepare(aal->clk); +} + +void mtk_aal_config(struct device *dev, unsigned int w, + unsigned int h, unsigned int vrefresh, + unsigned int bpc, struct cmdq_pkt *cmdq_pkt) +{ + struct mtk_disp_aal *aal = dev_get_drvdata(dev); + + mtk_ddp_write(cmdq_pkt, w << 16 | h, &aal->cmdq_reg, aal->regs, DISP_AAL_SIZE); + mtk_ddp_write(cmdq_pkt, w << 16 | h, &aal->cmdq_reg, aal->regs, DISP_AAL_OUTPUT_SIZE); +} + +void mtk_aal_gamma_set(struct device *dev, struct drm_crtc_state *state) +{ + struct mtk_disp_aal *aal = dev_get_drvdata(dev); + + if (aal->data && aal->data->has_gamma) + mtk_gamma_set_common(aal->regs, state); +} + +void mtk_aal_start(struct device *dev) +{ + struct mtk_disp_aal *aal = dev_get_drvdata(dev); + + writel(AAL_EN, aal->regs + DISP_AAL_EN); +} + +void mtk_aal_stop(struct device *dev) +{ + struct mtk_disp_aal *aal = dev_get_drvdata(dev); + + writel_relaxed(0x0, aal->regs + DISP_AAL_EN); +} + +static int mtk_disp_aal_bind(struct device *dev, struct device *master, + void *data) +{ + return 0; +} + +static void mtk_disp_aal_unbind(struct device *dev, struct device *master, + void *data) +{ +} + +static const struct component_ops mtk_disp_aal_component_ops = { + .bind = mtk_disp_aal_bind, + .unbind = mtk_disp_aal_unbind, +}; + +static int mtk_disp_aal_probe(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + struct mtk_disp_aal *priv; + struct resource *res; + int ret; + + priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL); + if (!priv) + return -ENOMEM; + + priv->clk = devm_clk_get(dev, NULL); + if (IS_ERR(priv->clk)) { + dev_err(dev, "failed to get aal clk\n"); + return PTR_ERR(priv->clk); + } + + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + priv->regs = devm_ioremap_resource(dev, res); + if (IS_ERR(priv->regs)) { + dev_err(dev, "failed to ioremap aal\n"); + return PTR_ERR(priv->regs); + } + +#if IS_REACHABLE(CONFIG_MTK_CMDQ) + ret = cmdq_dev_get_client_reg(dev, &priv->cmdq_reg, 0); + if (ret) + dev_dbg(dev, "get mediatek,gce-client-reg fail!\n"); +#endif + + priv->data = of_device_get_match_data(dev); + platform_set_drvdata(pdev, priv); + + ret = component_add(dev, &mtk_disp_aal_component_ops); + if (ret) + dev_err(dev, "Failed to add component: %d\n", ret); + + return ret; +} + +static int mtk_disp_aal_remove(struct platform_device *pdev) +{ + component_del(&pdev->dev, &mtk_disp_aal_component_ops); + + return 0; +} + +static const struct mtk_disp_aal_data mt8173_aal_driver_data = { + .has_gamma = true, +}; + +static const struct of_device_id mtk_disp_aal_driver_dt_match[] = { + { .compatible = "mediatek,mt8173-disp-aal", + .data = &mt8173_aal_driver_data}, + { .compatible = "mediatek,mt8183-disp-aal"}, + {}, +}; +MODULE_DEVICE_TABLE(of, mtk_disp_aal_driver_dt_match); + +struct platform_driver mtk_disp_aal_driver = { + .probe = mtk_disp_aal_probe, + .remove = mtk_disp_aal_remove, + .driver = { + .name = "mediatek-disp-aal", + .owner = THIS_MODULE, + .of_match_table = mtk_disp_aal_driver_dt_match, + }, +}; diff --git a/drivers/gpu/drm/mediatek/mtk_disp_drv.h b/drivers/gpu/drm/mediatek/mtk_disp_drv.h index cafd9df2d63b..86c3068894b1 100644 --- a/drivers/gpu/drm/mediatek/mtk_disp_drv.h +++ b/drivers/gpu/drm/mediatek/mtk_disp_drv.h @@ -9,6 +9,15 @@ #include <linux/soc/mediatek/mtk-cmdq.h> #include "mtk_drm_plane.h" +int mtk_aal_clk_enable(struct device *dev); +void mtk_aal_clk_disable(struct device *dev); +void mtk_aal_config(struct device *dev, unsigned int w, + unsigned int h, unsigned int vrefresh, + unsigned int bpc, struct cmdq_pkt *cmdq_pkt); +void mtk_aal_gamma_set(struct device *dev, struct drm_crtc_state *state); +void mtk_aal_start(struct device *dev); +void mtk_aal_stop(struct device *dev); + void mtk_ccorr_ctm_set(struct device *dev, struct drm_crtc_state *state); int mtk_ccorr_clk_enable(struct device *dev); void mtk_ccorr_clk_disable(struct device *dev); diff --git a/drivers/gpu/drm/mediatek/mtk_disp_rdma.c b/drivers/gpu/drm/mediatek/mtk_disp_rdma.c index 705f28ceb4dd..75d7f45579e2 100644 --- a/drivers/gpu/drm/mediatek/mtk_disp_rdma.c +++ b/drivers/gpu/drm/mediatek/mtk_disp_rdma.c @@ -162,10 +162,10 @@ void mtk_rdma_config(struct device *dev, unsigned int width, /* * Enable FIFO underflow since DSI and DPI can't be blocked. * Keep the FIFO pseudo size reset default of 8 KiB. Set the - * output threshold to 6 microseconds with 7/6 overhead to - * account for blanking, and with a pixel depth of 4 bytes: + * output threshold to 70% of max fifo size to make sure the + * threhold will not overflow */ - threshold = width * height * vrefresh * 4 * 7 / 1000000; + threshold = rdma_fifo_size * 7 / 10; reg = RDMA_FIFO_UNDERFLOW_EN | RDMA_FIFO_PSEUDO_SIZE(rdma_fifo_size) | RDMA_OUTPUT_VALID_FIFO_THRESHOLD(threshold); diff --git a/drivers/gpu/drm/mediatek/mtk_dpi.c b/drivers/gpu/drm/mediatek/mtk_dpi.c index e94738fe4db8..4554e2de1430 100644 --- a/drivers/gpu/drm/mediatek/mtk_dpi.c +++ b/drivers/gpu/drm/mediatek/mtk_dpi.c @@ -718,10 +718,8 @@ static int mtk_dpi_bind(struct device *dev, struct device *master, void *data) ret = drm_bridge_attach(&dpi->encoder, &dpi->bridge, NULL, DRM_BRIDGE_ATTACH_NO_CONNECTOR); - if (ret) { - dev_err(dev, "Failed to attach bridge: %d\n", ret); + if (ret) goto err_cleanup; - } dpi->connector = drm_bridge_connector_init(drm_dev, &dpi->encoder); if (IS_ERR(dpi->connector)) { diff --git a/drivers/gpu/drm/mediatek/mtk_drm_crtc.c b/drivers/gpu/drm/mediatek/mtk_drm_crtc.c index 735efe79f075..5f81489fc60c 100644 --- a/drivers/gpu/drm/mediatek/mtk_drm_crtc.c +++ b/drivers/gpu/drm/mediatek/mtk_drm_crtc.c @@ -4,6 +4,8 @@ */ #include <linux/clk.h> +#include <linux/dma-mapping.h> +#include <linux/mailbox_controller.h> #include <linux/pm_runtime.h> #include <linux/soc/mediatek/mtk-cmdq.h> #include <linux/soc/mediatek/mtk-mmsys.h> @@ -50,8 +52,11 @@ struct mtk_drm_crtc { bool pending_async_planes; #if IS_REACHABLE(CONFIG_MTK_CMDQ) - struct cmdq_client *cmdq_client; + struct mbox_client cmdq_cl; + struct mbox_chan *cmdq_chan; + struct cmdq_pkt cmdq_handle; u32 cmdq_event; + u32 cmdq_vblank_cnt; #endif struct device *mmsys_dev; @@ -222,9 +227,79 @@ struct mtk_ddp_comp *mtk_drm_ddp_comp_for_plane(struct drm_crtc *crtc, } #if IS_REACHABLE(CONFIG_MTK_CMDQ) -static void ddp_cmdq_cb(struct cmdq_cb_data data) +static int mtk_drm_cmdq_pkt_create(struct mbox_chan *chan, struct cmdq_pkt *pkt, + size_t size) { - cmdq_pkt_destroy(data.data); + struct device *dev; + dma_addr_t dma_addr; + + pkt->va_base = kzalloc(size, GFP_KERNEL); + if (!pkt->va_base) { + kfree(pkt); + return -ENOMEM; + } + pkt->buf_size = size; + + dev = chan->mbox->dev; + dma_addr = dma_map_single(dev, pkt->va_base, pkt->buf_size, + DMA_TO_DEVICE); + if (dma_mapping_error(dev, dma_addr)) { + dev_err(dev, "dma map failed, size=%u\n", (u32)(u64)size); + kfree(pkt->va_base); + kfree(pkt); + return -ENOMEM; + } + + pkt->pa_base = dma_addr; + + return 0; +} + +static void mtk_drm_cmdq_pkt_destroy(struct mbox_chan *chan, struct cmdq_pkt *pkt) +{ + dma_unmap_single(chan->mbox->dev, pkt->pa_base, pkt->buf_size, + DMA_TO_DEVICE); + kfree(pkt->va_base); + kfree(pkt); +} + +static void ddp_cmdq_cb(struct mbox_client *cl, void *mssg) +{ + struct mtk_drm_crtc *mtk_crtc = container_of(cl, struct mtk_drm_crtc, cmdq_cl); + struct cmdq_cb_data *data = mssg; + struct mtk_crtc_state *state; + unsigned int i; + + state = to_mtk_crtc_state(mtk_crtc->base.state); + + state->pending_config = false; + + if (mtk_crtc->pending_planes) { + for (i = 0; i < mtk_crtc->layer_nr; i++) { + struct drm_plane *plane = &mtk_crtc->planes[i]; + struct mtk_plane_state *plane_state; + + plane_state = to_mtk_plane_state(plane->state); + + plane_state->pending.config = false; + } + mtk_crtc->pending_planes = false; + } + + if (mtk_crtc->pending_async_planes) { + for (i = 0; i < mtk_crtc->layer_nr; i++) { + struct drm_plane *plane = &mtk_crtc->planes[i]; + struct mtk_plane_state *plane_state; + + plane_state = to_mtk_plane_state(plane->state); + + plane_state->pending.async_config = false; + } + mtk_crtc->pending_async_planes = false; + } + + mtk_crtc->cmdq_vblank_cnt = 0; + mtk_drm_cmdq_pkt_destroy(mtk_crtc->cmdq_chan, data->pkt); } #endif @@ -378,7 +453,8 @@ static void mtk_crtc_ddp_config(struct drm_crtc *crtc, state->pending_vrefresh, 0, cmdq_handle); - state->pending_config = false; + if (!cmdq_handle) + state->pending_config = false; } if (mtk_crtc->pending_planes) { @@ -398,9 +474,12 @@ static void mtk_crtc_ddp_config(struct drm_crtc *crtc, mtk_ddp_comp_layer_config(comp, local_layer, plane_state, cmdq_handle); - plane_state->pending.config = false; + if (!cmdq_handle) + plane_state->pending.config = false; } - mtk_crtc->pending_planes = false; + + if (!cmdq_handle) + mtk_crtc->pending_planes = false; } if (mtk_crtc->pending_async_planes) { @@ -420,9 +499,12 @@ static void mtk_crtc_ddp_config(struct drm_crtc *crtc, mtk_ddp_comp_layer_config(comp, local_layer, plane_state, cmdq_handle); - plane_state->pending.async_config = false; + if (!cmdq_handle) + plane_state->pending.async_config = false; } - mtk_crtc->pending_async_planes = false; + + if (!cmdq_handle) + mtk_crtc->pending_async_planes = false; } } @@ -430,7 +512,7 @@ static void mtk_drm_crtc_update_config(struct mtk_drm_crtc *mtk_crtc, bool needs_vblank) { #if IS_REACHABLE(CONFIG_MTK_CMDQ) - struct cmdq_pkt *cmdq_handle; + struct cmdq_pkt *cmdq_handle = &mtk_crtc->cmdq_handle; #endif struct drm_crtc *crtc = &mtk_crtc->base; struct mtk_drm_private *priv = crtc->dev->dev_private; @@ -468,14 +550,24 @@ static void mtk_drm_crtc_update_config(struct mtk_drm_crtc *mtk_crtc, mtk_mutex_release(mtk_crtc->mutex); } #if IS_REACHABLE(CONFIG_MTK_CMDQ) - if (mtk_crtc->cmdq_client) { - mbox_flush(mtk_crtc->cmdq_client->chan, 2000); - cmdq_handle = cmdq_pkt_create(mtk_crtc->cmdq_client, PAGE_SIZE); + if (mtk_crtc->cmdq_chan) { + mbox_flush(mtk_crtc->cmdq_chan, 2000); + cmdq_handle->cmd_buf_size = 0; cmdq_pkt_clear_event(cmdq_handle, mtk_crtc->cmdq_event); cmdq_pkt_wfe(cmdq_handle, mtk_crtc->cmdq_event, false); mtk_crtc_ddp_config(crtc, cmdq_handle); cmdq_pkt_finalize(cmdq_handle); - cmdq_pkt_flush_async(cmdq_handle, ddp_cmdq_cb, cmdq_handle); + dma_sync_single_for_device(mtk_crtc->cmdq_chan->mbox->dev, + cmdq_handle->pa_base, + cmdq_handle->cmd_buf_size, + DMA_TO_DEVICE); + /* + * CMDQ command should execute in next vblank, + * If it fail to execute in next 2 vblank, timeout happen. + */ + mtk_crtc->cmdq_vblank_cnt = 2; + mbox_send_message(mtk_crtc->cmdq_chan, cmdq_handle); + mbox_client_txdone(mtk_crtc->cmdq_chan, 0); } #endif mtk_crtc->config_updating = false; @@ -489,12 +581,15 @@ static void mtk_crtc_ddp_irq(void *data) struct mtk_drm_private *priv = crtc->dev->dev_private; #if IS_REACHABLE(CONFIG_MTK_CMDQ) - if (!priv->data->shadow_register && !mtk_crtc->cmdq_client) + if (!priv->data->shadow_register && !mtk_crtc->cmdq_chan) + mtk_crtc_ddp_config(crtc, NULL); + else if (mtk_crtc->cmdq_vblank_cnt > 0 && --mtk_crtc->cmdq_vblank_cnt == 0) + DRM_ERROR("mtk_crtc %d CMDQ execute command timeout!\n", + drm_crtc_index(&mtk_crtc->base)); #else if (!priv->data->shadow_register) -#endif mtk_crtc_ddp_config(crtc, NULL); - +#endif mtk_drm_finish_page_flip(mtk_crtc); } @@ -752,14 +847,22 @@ int mtk_drm_crtc_create(struct drm_device *drm_dev, for (i = 0; i < path_len; i++) { enum mtk_ddp_comp_id comp_id = path[i]; struct device_node *node; + struct mtk_ddp_comp *comp; node = priv->comp_node[comp_id]; + comp = &priv->ddp_comp[comp_id]; + if (!node) { dev_info(dev, "Not creating crtc %d because component %d is disabled or missing\n", pipe, comp_id); return 0; } + + if (!comp->dev) { + dev_err(dev, "Component %pOF not initialized\n", node); + return -ENODEV; + } } mtk_crtc = devm_kzalloc(dev, sizeof(*mtk_crtc), GFP_KERNEL); @@ -784,16 +887,8 @@ int mtk_drm_crtc_create(struct drm_device *drm_dev, for (i = 0; i < mtk_crtc->ddp_comp_nr; i++) { enum mtk_ddp_comp_id comp_id = path[i]; struct mtk_ddp_comp *comp; - struct device_node *node; - node = priv->comp_node[comp_id]; comp = &priv->ddp_comp[comp_id]; - if (!comp) { - dev_err(dev, "Component %pOF not initialized\n", node); - ret = -ENODEV; - return ret; - } - mtk_crtc->ddp_comp[i] = comp; if (comp->funcs) { @@ -829,16 +924,20 @@ int mtk_drm_crtc_create(struct drm_device *drm_dev, mutex_init(&mtk_crtc->hw_lock); #if IS_REACHABLE(CONFIG_MTK_CMDQ) - mtk_crtc->cmdq_client = - cmdq_mbox_create(mtk_crtc->mmsys_dev, - drm_crtc_index(&mtk_crtc->base)); - if (IS_ERR(mtk_crtc->cmdq_client)) { + mtk_crtc->cmdq_cl.dev = mtk_crtc->mmsys_dev; + mtk_crtc->cmdq_cl.tx_block = false; + mtk_crtc->cmdq_cl.knows_txdone = true; + mtk_crtc->cmdq_cl.rx_callback = ddp_cmdq_cb; + mtk_crtc->cmdq_chan = + mbox_request_channel(&mtk_crtc->cmdq_cl, + drm_crtc_index(&mtk_crtc->base)); + if (IS_ERR(mtk_crtc->cmdq_chan)) { dev_dbg(dev, "mtk_crtc %d failed to create mailbox client, writing register by CPU now\n", drm_crtc_index(&mtk_crtc->base)); - mtk_crtc->cmdq_client = NULL; + mtk_crtc->cmdq_chan = NULL; } - if (mtk_crtc->cmdq_client) { + if (mtk_crtc->cmdq_chan) { ret = of_property_read_u32_index(priv->mutex_node, "mediatek,gce-events", drm_crtc_index(&mtk_crtc->base), @@ -846,8 +945,18 @@ int mtk_drm_crtc_create(struct drm_device *drm_dev, if (ret) { dev_dbg(dev, "mtk_crtc %d failed to get mediatek,gce-events property\n", drm_crtc_index(&mtk_crtc->base)); - cmdq_mbox_destroy(mtk_crtc->cmdq_client); - mtk_crtc->cmdq_client = NULL; + mbox_free_channel(mtk_crtc->cmdq_chan); + mtk_crtc->cmdq_chan = NULL; + } else { + ret = mtk_drm_cmdq_pkt_create(mtk_crtc->cmdq_chan, + &mtk_crtc->cmdq_handle, + PAGE_SIZE); + if (ret) { + dev_dbg(dev, "mtk_crtc %d failed to create cmdq packet\n", + drm_crtc_index(&mtk_crtc->base)); + mbox_free_channel(mtk_crtc->cmdq_chan); + mtk_crtc->cmdq_chan = NULL; + } } } #endif diff --git a/drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.c b/drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.c index 50d20562e612..99cbf44463e4 100644 --- a/drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.c +++ b/drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.c @@ -32,10 +32,6 @@ #define DISP_REG_UFO_START 0x0000 -#define DISP_AAL_EN 0x0000 -#define DISP_AAL_SIZE 0x0030 -#define DISP_AAL_OUTPUT_SIZE 0x04d8 - #define DISP_DITHER_EN 0x0000 #define DITHER_EN BIT(0) #define DISP_DITHER_CFG 0x0020 @@ -49,8 +45,6 @@ #define UFO_BYPASS BIT(2) -#define AAL_EN BIT(0) - #define DISP_DITHERING BIT(2) #define DITHER_LSB_ERR_SHIFT_R(x) (((x) & 0x7) << 28) #define DITHER_OVFLW_BIT_R(x) (((x) & 0x7) << 24) @@ -191,37 +185,6 @@ static void mtk_ufoe_start(struct device *dev) writel(UFO_BYPASS, priv->regs + DISP_REG_UFO_START); } -static void mtk_aal_config(struct device *dev, unsigned int w, - unsigned int h, unsigned int vrefresh, - unsigned int bpc, struct cmdq_pkt *cmdq_pkt) -{ - struct mtk_ddp_comp_dev *priv = dev_get_drvdata(dev); - - mtk_ddp_write(cmdq_pkt, w << 16 | h, &priv->cmdq_reg, priv->regs, DISP_AAL_SIZE); - mtk_ddp_write(cmdq_pkt, w << 16 | h, &priv->cmdq_reg, priv->regs, DISP_AAL_OUTPUT_SIZE); -} - -static void mtk_aal_gamma_set(struct device *dev, struct drm_crtc_state *state) -{ - struct mtk_ddp_comp_dev *priv = dev_get_drvdata(dev); - - mtk_gamma_set_common(priv->regs, state); -} - -static void mtk_aal_start(struct device *dev) -{ - struct mtk_ddp_comp_dev *priv = dev_get_drvdata(dev); - - writel(AAL_EN, priv->regs + DISP_AAL_EN); -} - -static void mtk_aal_stop(struct device *dev) -{ - struct mtk_ddp_comp_dev *priv = dev_get_drvdata(dev); - - writel_relaxed(0x0, priv->regs + DISP_AAL_EN); -} - static void mtk_dither_config(struct device *dev, unsigned int w, unsigned int h, unsigned int vrefresh, unsigned int bpc, struct cmdq_pkt *cmdq_pkt) @@ -249,8 +212,8 @@ static void mtk_dither_stop(struct device *dev) } static const struct mtk_ddp_comp_funcs ddp_aal = { - .clk_enable = mtk_ddp_clk_enable, - .clk_disable = mtk_ddp_clk_disable, + .clk_enable = mtk_aal_clk_enable, + .clk_disable = mtk_aal_clk_disable, .gamma_set = mtk_aal_gamma_set, .config = mtk_aal_config, .start = mtk_aal_start, @@ -507,7 +470,8 @@ int mtk_ddp_comp_init(struct device_node *node, struct mtk_ddp_comp *comp, return ret; } - if (type == MTK_DISP_BLS || + if (type == MTK_DISP_AAL || + type == MTK_DISP_BLS || type == MTK_DISP_CCORR || type == MTK_DISP_COLOR || type == MTK_DISP_GAMMA || diff --git a/drivers/gpu/drm/mediatek/mtk_drm_drv.c b/drivers/gpu/drm/mediatek/mtk_drm_drv.c index b46bdb8985da..aec39724ebeb 100644 --- a/drivers/gpu/drm/mediatek/mtk_drm_drv.c +++ b/drivers/gpu/drm/mediatek/mtk_drm_drv.c @@ -110,6 +110,17 @@ static const enum mtk_ddp_comp_id mt2712_mtk_ddp_third[] = { DDP_COMPONENT_PWM2, }; +static enum mtk_ddp_comp_id mt8167_mtk_ddp_main[] = { + DDP_COMPONENT_OVL0, + DDP_COMPONENT_COLOR0, + DDP_COMPONENT_CCORR, + DDP_COMPONENT_AAL0, + DDP_COMPONENT_GAMMA, + DDP_COMPONENT_DITHER, + DDP_COMPONENT_RDMA0, + DDP_COMPONENT_DSI0, +}; + static const enum mtk_ddp_comp_id mt8173_mtk_ddp_main[] = { DDP_COMPONENT_OVL0, DDP_COMPONENT_COLOR0, @@ -172,6 +183,11 @@ static const struct mtk_mmsys_driver_data mt2712_mmsys_driver_data = { .third_len = ARRAY_SIZE(mt2712_mtk_ddp_third), }; +static const struct mtk_mmsys_driver_data mt8167_mmsys_driver_data = { + .main_path = mt8167_mtk_ddp_main, + .main_len = ARRAY_SIZE(mt8167_mtk_ddp_main), +}; + static const struct mtk_mmsys_driver_data mt8173_mmsys_driver_data = { .main_path = mt8173_mtk_ddp_main, .main_len = ARRAY_SIZE(mt8173_mtk_ddp_main), @@ -270,12 +286,6 @@ static int mtk_drm_kms_init(struct drm_device *drm) goto err_component_unbind; } - /* - * We don't use the drm_irq_install() helpers provided by the DRM - * core, so we need to set this manually in order to allow the - * DRM_IOCTL_WAIT_VBLANK to operate correctly. - */ - drm->irq_enabled = true; ret = drm_vblank_init(drm, MAX_CRTC); if (ret < 0) goto err_component_unbind; @@ -300,16 +310,7 @@ static void mtk_drm_kms_deinit(struct drm_device *drm) component_unbind_all(drm->dev, drm); } -static const struct file_operations mtk_drm_fops = { - .owner = THIS_MODULE, - .open = drm_open, - .release = drm_release, - .unlocked_ioctl = drm_ioctl, - .mmap = mtk_drm_gem_mmap, - .poll = drm_poll, - .read = drm_read, - .compat_ioctl = drm_compat_ioctl, -}; +DEFINE_DRM_GEM_FOPS(mtk_drm_fops); /* * We need to override this because the device used to import the memory is @@ -332,7 +333,7 @@ static const struct drm_driver mtk_drm_driver = { .prime_fd_to_handle = drm_gem_prime_fd_to_handle, .gem_prime_import = mtk_drm_gem_prime_import, .gem_prime_import_sg_table = mtk_gem_prime_import_sg_table, - .gem_prime_mmap = mtk_drm_gem_mmap_buf, + .gem_prime_mmap = drm_gem_prime_mmap, .fops = &mtk_drm_fops, .name = DRIVER_NAME, @@ -398,6 +399,8 @@ static const struct component_master_ops mtk_drm_ops = { static const struct of_device_id mtk_ddp_comp_dt_ids[] = { { .compatible = "mediatek,mt2701-disp-ovl", .data = (void *)MTK_DISP_OVL }, + { .compatible = "mediatek,mt8167-disp-ovl", + .data = (void *)MTK_DISP_OVL }, { .compatible = "mediatek,mt8173-disp-ovl", .data = (void *)MTK_DISP_OVL }, { .compatible = "mediatek,mt8183-disp-ovl", @@ -406,30 +409,46 @@ static const struct of_device_id mtk_ddp_comp_dt_ids[] = { .data = (void *)MTK_DISP_OVL_2L }, { .compatible = "mediatek,mt2701-disp-rdma", .data = (void *)MTK_DISP_RDMA }, + { .compatible = "mediatek,mt8167-disp-rdma", + .data = (void *)MTK_DISP_RDMA }, { .compatible = "mediatek,mt8173-disp-rdma", .data = (void *)MTK_DISP_RDMA }, { .compatible = "mediatek,mt8183-disp-rdma", .data = (void *)MTK_DISP_RDMA }, { .compatible = "mediatek,mt8173-disp-wdma", .data = (void *)MTK_DISP_WDMA }, + { .compatible = "mediatek,mt8167-disp-ccorr", + .data = (void *)MTK_DISP_CCORR }, { .compatible = "mediatek,mt8183-disp-ccorr", .data = (void *)MTK_DISP_CCORR }, { .compatible = "mediatek,mt2701-disp-color", .data = (void *)MTK_DISP_COLOR }, + { .compatible = "mediatek,mt8167-disp-color", + .data = (void *)MTK_DISP_COLOR }, { .compatible = "mediatek,mt8173-disp-color", .data = (void *)MTK_DISP_COLOR }, + { .compatible = "mediatek,mt8167-disp-aal", + .data = (void *)MTK_DISP_AAL}, { .compatible = "mediatek,mt8173-disp-aal", .data = (void *)MTK_DISP_AAL}, + { .compatible = "mediatek,mt8183-disp-aal", + .data = (void *)MTK_DISP_AAL}, + { .compatible = "mediatek,mt8167-disp-gamma", + .data = (void *)MTK_DISP_GAMMA, }, { .compatible = "mediatek,mt8173-disp-gamma", .data = (void *)MTK_DISP_GAMMA, }, { .compatible = "mediatek,mt8183-disp-gamma", .data = (void *)MTK_DISP_GAMMA, }, + { .compatible = "mediatek,mt8167-disp-dither", + .data = (void *)MTK_DISP_DITHER }, { .compatible = "mediatek,mt8183-disp-dither", .data = (void *)MTK_DISP_DITHER }, { .compatible = "mediatek,mt8173-disp-ufoe", .data = (void *)MTK_DISP_UFOE }, { .compatible = "mediatek,mt2701-dsi", .data = (void *)MTK_DSI }, + { .compatible = "mediatek,mt8167-dsi", + .data = (void *)MTK_DSI }, { .compatible = "mediatek,mt8173-dsi", .data = (void *)MTK_DSI }, { .compatible = "mediatek,mt8183-dsi", @@ -444,12 +463,16 @@ static const struct of_device_id mtk_ddp_comp_dt_ids[] = { .data = (void *)MTK_DISP_MUTEX }, { .compatible = "mediatek,mt2712-disp-mutex", .data = (void *)MTK_DISP_MUTEX }, + { .compatible = "mediatek,mt8167-disp-mutex", + .data = (void *)MTK_DISP_MUTEX }, { .compatible = "mediatek,mt8173-disp-mutex", .data = (void *)MTK_DISP_MUTEX }, { .compatible = "mediatek,mt8183-disp-mutex", .data = (void *)MTK_DISP_MUTEX }, { .compatible = "mediatek,mt2701-disp-pwm", .data = (void *)MTK_DISP_BLS }, + { .compatible = "mediatek,mt8167-disp-pwm", + .data = (void *)MTK_DISP_PWM }, { .compatible = "mediatek,mt8173-disp-pwm", .data = (void *)MTK_DISP_PWM }, { .compatible = "mediatek,mt8173-disp-od", @@ -464,6 +487,8 @@ static const struct of_device_id mtk_drm_of_ids[] = { .data = &mt7623_mmsys_driver_data}, { .compatible = "mediatek,mt2712-mmsys", .data = &mt2712_mmsys_driver_data}, + { .compatible = "mediatek,mt8167-mmsys", + .data = &mt8167_mmsys_driver_data}, { .compatible = "mediatek,mt8173-mmsys", .data = &mt8173_mmsys_driver_data}, { .compatible = "mediatek,mt8183-mmsys", @@ -532,11 +557,12 @@ static int mtk_drm_probe(struct platform_device *pdev) private->comp_node[comp_id] = of_node_get(node); /* - * Currently only the CCORR, COLOR, GAMMA, OVL, RDMA, DSI, and DPI + * Currently only the AAL, CCORR, COLOR, GAMMA, OVL, RDMA, DSI, and DPI * blocks have separate component platform drivers and initialize their own * DDP component structure. The others are initialized here. */ - if (comp_type == MTK_DISP_CCORR || + if (comp_type == MTK_DISP_AAL || + comp_type == MTK_DISP_CCORR || comp_type == MTK_DISP_COLOR || comp_type == MTK_DISP_GAMMA || comp_type == MTK_DISP_OVL || @@ -636,6 +662,7 @@ static struct platform_driver mtk_drm_platform_driver = { }; static struct platform_driver * const mtk_drm_drivers[] = { + &mtk_disp_aal_driver, &mtk_disp_ccorr_driver, &mtk_disp_color_driver, &mtk_disp_gamma_driver, diff --git a/drivers/gpu/drm/mediatek/mtk_drm_drv.h b/drivers/gpu/drm/mediatek/mtk_drm_drv.h index 637f5669e895..3e7d1e6fbe01 100644 --- a/drivers/gpu/drm/mediatek/mtk_drm_drv.h +++ b/drivers/gpu/drm/mediatek/mtk_drm_drv.h @@ -46,6 +46,7 @@ struct mtk_drm_private { struct drm_atomic_state *suspend_state; }; +extern struct platform_driver mtk_disp_aal_driver; extern struct platform_driver mtk_disp_ccorr_driver; extern struct platform_driver mtk_disp_color_driver; extern struct platform_driver mtk_disp_gamma_driver; diff --git a/drivers/gpu/drm/mediatek/mtk_drm_gem.c b/drivers/gpu/drm/mediatek/mtk_drm_gem.c index 280ea0d5e840..d0544962cfc1 100644 --- a/drivers/gpu/drm/mediatek/mtk_drm_gem.c +++ b/drivers/gpu/drm/mediatek/mtk_drm_gem.c @@ -14,11 +14,14 @@ #include "mtk_drm_drv.h" #include "mtk_drm_gem.h" +static int mtk_drm_gem_object_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma); + static const struct drm_gem_object_funcs mtk_drm_gem_object_funcs = { .free = mtk_drm_gem_free_object, .get_sg_table = mtk_gem_prime_get_sg_table, .vmap = mtk_drm_gem_prime_vmap, .vunmap = mtk_drm_gem_prime_vunmap, + .mmap = mtk_drm_gem_object_mmap, .vm_ops = &drm_gem_cma_vm_ops, }; @@ -146,10 +149,18 @@ static int mtk_drm_gem_object_mmap(struct drm_gem_object *obj, struct mtk_drm_private *priv = obj->dev->dev_private; /* + * Set vm_pgoff (used as a fake buffer offset by DRM) to 0 and map the + * whole buffer from the start. + */ + vma->vm_pgoff = 0; + + /* * dma_alloc_attrs() allocated a struct page table for mtk_gem, so clear * VM_PFNMAP flag that was set by drm_gem_mmap_obj()/drm_gem_mmap(). */ - vma->vm_flags &= ~VM_PFNMAP; + vma->vm_flags |= VM_IO | VM_DONTEXPAND | VM_DONTDUMP; + vma->vm_page_prot = pgprot_writecombine(vm_get_page_prot(vma->vm_flags)); + vma->vm_page_prot = pgprot_decrypted(vma->vm_page_prot); ret = dma_mmap_attrs(priv->dma_dev, vma, mtk_gem->cookie, mtk_gem->dma_addr, obj->size, mtk_gem->dma_attrs); @@ -159,37 +170,6 @@ static int mtk_drm_gem_object_mmap(struct drm_gem_object *obj, return ret; } -int mtk_drm_gem_mmap_buf(struct drm_gem_object *obj, struct vm_area_struct *vma) -{ - int ret; - - ret = drm_gem_mmap_obj(obj, obj->size, vma); - if (ret) - return ret; - - return mtk_drm_gem_object_mmap(obj, vma); -} - -int mtk_drm_gem_mmap(struct file *filp, struct vm_area_struct *vma) -{ - struct drm_gem_object *obj; - int ret; - - ret = drm_gem_mmap(filp, vma); - if (ret) - return ret; - - obj = vma->vm_private_data; - - /* - * Set vm_pgoff (used as a fake buffer offset by DRM) to 0 and map the - * whole buffer from the start. - */ - vma->vm_pgoff = 0; - - return mtk_drm_gem_object_mmap(obj, vma); -} - /* * Allocate a sg_table for this GEM object. * Note: Both the table's contents, and the sg_table itself must be freed by diff --git a/drivers/gpu/drm/mediatek/mtk_drm_gem.h b/drivers/gpu/drm/mediatek/mtk_drm_gem.h index 6da5ccb4b933..9a359a06cb73 100644 --- a/drivers/gpu/drm/mediatek/mtk_drm_gem.h +++ b/drivers/gpu/drm/mediatek/mtk_drm_gem.h @@ -39,9 +39,6 @@ struct mtk_drm_gem_obj *mtk_drm_gem_create(struct drm_device *dev, size_t size, bool alloc_kmap); int mtk_drm_gem_dumb_create(struct drm_file *file_priv, struct drm_device *dev, struct drm_mode_create_dumb *args); -int mtk_drm_gem_mmap(struct file *filp, struct vm_area_struct *vma); -int mtk_drm_gem_mmap_buf(struct drm_gem_object *obj, - struct vm_area_struct *vma); struct sg_table *mtk_gem_prime_get_sg_table(struct drm_gem_object *obj); struct drm_gem_object *mtk_gem_prime_import_sg_table(struct drm_device *dev, struct dma_buf_attachment *attach, struct sg_table *sg); diff --git a/drivers/gpu/drm/mediatek/mtk_drm_plane.c b/drivers/gpu/drm/mediatek/mtk_drm_plane.c index e6dcb34d3052..734a1fb052df 100644 --- a/drivers/gpu/drm/mediatek/mtk_drm_plane.c +++ b/drivers/gpu/drm/mediatek/mtk_drm_plane.c @@ -235,7 +235,6 @@ static void mtk_plane_atomic_update(struct drm_plane *plane, } static const struct drm_plane_helper_funcs mtk_plane_helper_funcs = { - .prepare_fb = drm_gem_plane_helper_prepare_fb, .atomic_check = mtk_plane_atomic_check, .atomic_update = mtk_plane_atomic_update, .atomic_disable = mtk_plane_atomic_disable, diff --git a/drivers/gpu/drm/mediatek/mtk_dsi.c b/drivers/gpu/drm/mediatek/mtk_dsi.c index ae403c67cbd9..93b40c245f00 100644 --- a/drivers/gpu/drm/mediatek/mtk_dsi.c +++ b/drivers/gpu/drm/mediatek/mtk_dsi.c @@ -404,7 +404,7 @@ static void mtk_dsi_rxtx_control(struct mtk_dsi *dsi) if (dsi->mode_flags & MIPI_DSI_CLOCK_NON_CONTINUOUS) tmp_reg |= HSTX_CKLP_EN; - if (!(dsi->mode_flags & MIPI_DSI_MODE_EOT_PACKET)) + if (!(dsi->mode_flags & MIPI_DSI_MODE_NO_EOT_PACKET)) tmp_reg |= DIS_EOT; writel(tmp_reg, dsi->regs + DSI_TXRX_CTRL); @@ -481,7 +481,7 @@ static void mtk_dsi_config_vdo_timing(struct mtk_dsi *dsi) timing->da_hs_zero + timing->da_hs_exit + 3; delta = dsi->mode_flags & MIPI_DSI_MODE_VIDEO_BURST ? 18 : 12; - delta += dsi->mode_flags & MIPI_DSI_MODE_EOT_PACKET ? 2 : 0; + delta += dsi->mode_flags & MIPI_DSI_MODE_NO_EOT_PACKET ? 2 : 0; horizontal_frontporch_byte = vm->hfront_porch * dsi_tmp_buf_bpp; horizontal_front_back_byte = horizontal_frontporch_byte + horizontal_backporch_byte; diff --git a/drivers/gpu/drm/mediatek/mtk_hdmi.c b/drivers/gpu/drm/mediatek/mtk_hdmi.c index c1651a83700d..5838c44cbf6f 100644 --- a/drivers/gpu/drm/mediatek/mtk_hdmi.c +++ b/drivers/gpu/drm/mediatek/mtk_hdmi.c @@ -1293,11 +1293,8 @@ static int mtk_hdmi_bridge_attach(struct drm_bridge *bridge, if (hdmi->next_bridge) { ret = drm_bridge_attach(bridge->encoder, hdmi->next_bridge, bridge, flags); - if (ret) { - dev_err(hdmi->dev, - "Failed to attach external bridge: %d\n", ret); + if (ret) return ret; - } } mtk_cec_set_hpd_event(hdmi->cec_dev, mtk_hdmi_hpd_event, hdmi->dev); diff --git a/drivers/gpu/drm/meson/meson_drv.c b/drivers/gpu/drm/meson/meson_drv.c index a7388bf7c838..bc0d60df04ae 100644 --- a/drivers/gpu/drm/meson/meson_drv.c +++ b/drivers/gpu/drm/meson/meson_drv.c @@ -21,7 +21,6 @@ #include <drm/drm_fb_helper.h> #include <drm/drm_gem_cma_helper.h> #include <drm/drm_gem_framebuffer_helper.h> -#include <drm/drm_irq.h> #include <drm/drm_modeset_helper_vtables.h> #include <drm/drm_probe_helper.h> #include <drm/drm_vblank.h> @@ -94,9 +93,6 @@ DEFINE_DRM_GEM_CMA_FOPS(fops); static const struct drm_driver meson_driver = { .driver_features = DRIVER_GEM | DRIVER_MODESET | DRIVER_ATOMIC, - /* IRQ */ - .irq_handler = meson_irq, - /* CMA Ops */ DRM_GEM_CMA_DRIVER_OPS_WITH_DUMB_CREATE(meson_dumb_create), @@ -285,7 +281,7 @@ static int meson_drv_bind_master(struct device *dev, bool has_components) * Remove early framebuffers (ie. simplefb). The framebuffer can be * located anywhere in RAM */ - ret = drm_aperture_remove_framebuffers(false, "meson-drm-fb"); + ret = drm_aperture_remove_framebuffers(false, &meson_driver); if (ret) goto free_drm; @@ -335,7 +331,7 @@ static int meson_drv_bind_master(struct device *dev, bool has_components) if (ret) goto free_drm; - ret = drm_irq_install(drm, priv->vsync_irq); + ret = request_irq(priv->vsync_irq, meson_irq, 0, drm->driver->name, drm); if (ret) goto free_drm; @@ -354,7 +350,7 @@ static int meson_drv_bind_master(struct device *dev, bool has_components) return 0; uninstall_irq: - drm_irq_uninstall(drm); + free_irq(priv->vsync_irq, drm); free_drm: drm_dev_put(drm); @@ -382,7 +378,7 @@ static void meson_drv_unbind(struct device *dev) drm_kms_helper_poll_fini(drm); drm_atomic_helper_shutdown(drm); component_unbind_all(dev, drm); - drm_irq_uninstall(drm); + free_irq(priv->vsync_irq, drm); drm_dev_put(drm); if (priv->afbcd.ops) { diff --git a/drivers/gpu/drm/meson/meson_overlay.c b/drivers/gpu/drm/meson/meson_overlay.c index ed063152aecd..dfef8afcc245 100644 --- a/drivers/gpu/drm/meson/meson_overlay.c +++ b/drivers/gpu/drm/meson/meson_overlay.c @@ -747,7 +747,6 @@ static const struct drm_plane_helper_funcs meson_overlay_helper_funcs = { .atomic_check = meson_overlay_atomic_check, .atomic_disable = meson_overlay_atomic_disable, .atomic_update = meson_overlay_atomic_update, - .prepare_fb = drm_gem_plane_helper_prepare_fb, }; static bool meson_overlay_format_mod_supported(struct drm_plane *plane, diff --git a/drivers/gpu/drm/meson/meson_plane.c b/drivers/gpu/drm/meson/meson_plane.c index a18510dae4c8..8640a8a8a469 100644 --- a/drivers/gpu/drm/meson/meson_plane.c +++ b/drivers/gpu/drm/meson/meson_plane.c @@ -422,7 +422,6 @@ static const struct drm_plane_helper_funcs meson_plane_helper_funcs = { .atomic_check = meson_plane_atomic_check, .atomic_disable = meson_plane_atomic_disable, .atomic_update = meson_plane_atomic_update, - .prepare_fb = drm_gem_plane_helper_prepare_fb, }; static bool meson_plane_format_mod_supported(struct drm_plane *plane, diff --git a/drivers/gpu/drm/mga/mga_dma.c b/drivers/gpu/drm/mga/mga_dma.c index 403efc1f1a7c..331c2f0da57a 100644 --- a/drivers/gpu/drm/mga/mga_dma.c +++ b/drivers/gpu/drm/mga/mga_dma.c @@ -949,7 +949,7 @@ static int mga_do_cleanup_dma(struct drm_device *dev, int full_cleanup) * is freed, it's too late. */ if (dev->irq_enabled) - drm_irq_uninstall(dev); + drm_legacy_irq_uninstall(dev); if (dev->dev_private) { drm_mga_private_t *dev_priv = dev->dev_private; diff --git a/drivers/gpu/drm/mga/mga_drv.h b/drivers/gpu/drm/mga/mga_drv.h index 84395d81ab9b..f61401c70b90 100644 --- a/drivers/gpu/drm/mga/mga_drv.h +++ b/drivers/gpu/drm/mga/mga_drv.h @@ -38,7 +38,6 @@ #include <drm/drm_device.h> #include <drm/drm_file.h> #include <drm/drm_ioctl.h> -#include <drm/drm_irq.h> #include <drm/drm_legacy.h> #include <drm/drm_print.h> #include <drm/drm_sarea.h> diff --git a/drivers/gpu/drm/mgag200/Makefile b/drivers/gpu/drm/mgag200/Makefile index 42fedef53882..d4f766522483 100644 --- a/drivers/gpu/drm/mgag200/Makefile +++ b/drivers/gpu/drm/mgag200/Makefile @@ -1,4 +1,4 @@ # SPDX-License-Identifier: GPL-2.0-only -mgag200-y := mgag200_drv.o mgag200_i2c.o mgag200_mm.o mgag200_mode.o +mgag200-y := mgag200_drv.o mgag200_i2c.o mgag200_mm.o mgag200_mode.o mgag200_pll.o obj-$(CONFIG_DRM_MGAG200) += mgag200.o diff --git a/drivers/gpu/drm/mgag200/mgag200_drv.c b/drivers/gpu/drm/mgag200/mgag200_drv.c index a701d9563257..6b9243713b3c 100644 --- a/drivers/gpu/drm/mgag200/mgag200_drv.c +++ b/drivers/gpu/drm/mgag200/mgag200_drv.c @@ -262,17 +262,26 @@ static void mgag200_g200se_init_unique_id(struct mga_device *mdev) mdev->model.g200se.unique_rev_id); } -static int mgag200_device_init(struct mga_device *mdev, unsigned long flags) +static struct mga_device * +mgag200_device_create(struct pci_dev *pdev, enum mga_type type, unsigned long flags) { - struct drm_device *dev = &mdev->base; + struct mga_device *mdev; + struct drm_device *dev; int ret; - mdev->flags = mgag200_flags_from_driver_data(flags); - mdev->type = mgag200_type_from_driver_data(flags); + mdev = devm_drm_dev_alloc(&pdev->dev, &mgag200_driver, struct mga_device, base); + if (IS_ERR(mdev)) + return mdev; + dev = &mdev->base; + + pci_set_drvdata(pdev, dev); + + mdev->flags = flags; + mdev->type = type; ret = mgag200_regs_init(mdev); if (ret) - return ret; + return ERR_PTR(ret); if (mdev->type == G200_PCI || mdev->type == G200_AGP) mgag200_g200_init_refclk(mdev); @@ -281,33 +290,9 @@ static int mgag200_device_init(struct mga_device *mdev, unsigned long flags) ret = mgag200_mm_init(mdev); if (ret) - return ret; + return ERR_PTR(ret); ret = mgag200_modeset_init(mdev); - if (ret) { - drm_err(dev, "Fatal error during modeset init: %d\n", ret); - return ret; - } - - return 0; -} - -static struct mga_device * -mgag200_device_create(struct pci_dev *pdev, unsigned long flags) -{ - struct drm_device *dev; - struct mga_device *mdev; - int ret; - - mdev = devm_drm_dev_alloc(&pdev->dev, &mgag200_driver, - struct mga_device, base); - if (IS_ERR(mdev)) - return mdev; - dev = &mdev->base; - - pci_set_drvdata(pdev, dev); - - ret = mgag200_device_init(mdev, flags); if (ret) return ERR_PTR(ret); @@ -335,14 +320,27 @@ static const struct pci_device_id mgag200_pciidlist[] = { MODULE_DEVICE_TABLE(pci, mgag200_pciidlist); +static enum mga_type mgag200_type_from_driver_data(kernel_ulong_t driver_data) +{ + return (enum mga_type)(driver_data & MGAG200_TYPE_MASK); +} + +static unsigned long mgag200_flags_from_driver_data(kernel_ulong_t driver_data) +{ + return driver_data & MGAG200_FLAG_MASK; +} + static int mgag200_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) { + kernel_ulong_t driver_data = ent->driver_data; + enum mga_type type = mgag200_type_from_driver_data(driver_data); + unsigned long flags = mgag200_flags_from_driver_data(driver_data); struct mga_device *mdev; struct drm_device *dev; int ret; - ret = drm_aperture_remove_conflicting_pci_framebuffers(pdev, "mgag200drmfb"); + ret = drm_aperture_remove_conflicting_pci_framebuffers(pdev, &mgag200_driver); if (ret) return ret; @@ -350,12 +348,12 @@ mgag200_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) if (ret) return ret; - mdev = mgag200_device_create(pdev, ent->driver_data); + mdev = mgag200_device_create(pdev, type, flags); if (IS_ERR(mdev)) return PTR_ERR(mdev); dev = &mdev->base; - ret = drm_dev_register(dev, ent->driver_data); + ret = drm_dev_register(dev, 0); if (ret) return ret; diff --git a/drivers/gpu/drm/mgag200/mgag200_drv.h b/drivers/gpu/drm/mgag200/mgag200_drv.h index 749a075fe9e4..196f74a0834e 100644 --- a/drivers/gpu/drm/mgag200/mgag200_drv.h +++ b/drivers/gpu/drm/mgag200/mgag200_drv.h @@ -43,6 +43,22 @@ #define ATTR_INDEX 0x1fc0 #define ATTR_DATA 0x1fc1 +#define WREG_MISC(v) \ + WREG8(MGA_MISC_OUT, v) + +#define RREG_MISC(v) \ + ((v) = RREG8(MGA_MISC_IN)) + +#define WREG_MISC_MASKED(v, mask) \ + do { \ + u8 misc_; \ + u8 mask_ = (mask); \ + RREG_MISC(misc_); \ + misc_ &= ~mask_; \ + misc_ |= ((v) & mask_); \ + WREG_MISC(misc_); \ + } while (0) + #define WREG_ATTR(reg, v) \ do { \ RREG8(0x1fda); \ @@ -110,6 +126,48 @@ #define MGAG200_MAX_FB_HEIGHT 4096 #define MGAG200_MAX_FB_WIDTH 4096 +struct mga_device; +struct mgag200_pll; + +/* + * Stores parameters for programming the PLLs + * + * Fref: reference frequency (A: 25.175 Mhz, B: 28.361, C: XX Mhz) + * Fo: output frequency + * Fvco = Fref * (N / M) + * Fo = Fvco / P + * + * S = [0..3] + */ +struct mgag200_pll_values { + unsigned int m; + unsigned int n; + unsigned int p; + unsigned int s; +}; + +struct mgag200_pll_funcs { + int (*compute)(struct mgag200_pll *pll, long clock, struct mgag200_pll_values *pllc); + void (*update)(struct mgag200_pll *pll, const struct mgag200_pll_values *pllc); +}; + +struct mgag200_pll { + struct mga_device *mdev; + + const struct mgag200_pll_funcs *funcs; +}; + +struct mgag200_crtc_state { + struct drm_crtc_state base; + + struct mgag200_pll_values pixpllc; +}; + +static inline struct mgag200_crtc_state *to_mgag200_crtc_state(struct drm_crtc_state *base) +{ + return container_of(base, struct mgag200_crtc_state, base); +} + #define to_mga_connector(x) container_of(x, struct mga_connector, base) struct mga_i2c_chan { @@ -166,8 +224,6 @@ struct mga_device { enum mga_type type; - int bpp_shifts[4]; - int fb_mtrr; union { @@ -182,8 +238,8 @@ struct mga_device { } g200se; } model; - struct mga_connector connector; + struct mgag200_pll pixpll; struct drm_simple_display_pipe display_pipe; }; @@ -192,18 +248,6 @@ static inline struct mga_device *to_mga_device(struct drm_device *dev) return container_of(dev, struct mga_device, base); } -static inline enum mga_type -mgag200_type_from_driver_data(kernel_ulong_t driver_data) -{ - return (enum mga_type)(driver_data & MGAG200_TYPE_MASK); -} - -static inline unsigned long -mgag200_flags_from_driver_data(kernel_ulong_t driver_data) -{ - return driver_data & MGAG200_FLAG_MASK; -} - /* mgag200_mode.c */ int mgag200_modeset_init(struct mga_device *mdev); @@ -214,4 +258,7 @@ void mgag200_i2c_destroy(struct mga_i2c_chan *i2c); /* mgag200_mm.c */ int mgag200_mm_init(struct mga_device *mdev); + /* mgag200_pll.c */ +int mgag200_pixpll_init(struct mgag200_pll *pixpll, struct mga_device *mdev); + #endif /* __MGAG200_DRV_H__ */ diff --git a/drivers/gpu/drm/mgag200/mgag200_mode.c b/drivers/gpu/drm/mgag200/mgag200_mode.c index 9d576240faed..fd98e8bbc550 100644 --- a/drivers/gpu/drm/mgag200/mgag200_mode.c +++ b/drivers/gpu/drm/mgag200/mgag200_mode.c @@ -110,712 +110,6 @@ static inline void mga_wait_busy(struct mga_device *mdev) } while ((status & 0x01) && time_before(jiffies, timeout)); } -/* - * PLL setup - */ - -static int mgag200_g200_set_plls(struct mga_device *mdev, long clock) -{ - struct drm_device *dev = &mdev->base; - const int post_div_max = 7; - const int in_div_min = 1; - const int in_div_max = 6; - const int feed_div_min = 7; - const int feed_div_max = 127; - u8 testm, testn; - u8 n = 0, m = 0, p, s; - long f_vco; - long computed; - long delta, tmp_delta; - long ref_clk = mdev->model.g200.ref_clk; - long p_clk_min = mdev->model.g200.pclk_min; - long p_clk_max = mdev->model.g200.pclk_max; - - if (clock > p_clk_max) { - drm_err(dev, "Pixel Clock %ld too high\n", clock); - return 1; - } - - if (clock < p_clk_min >> 3) - clock = p_clk_min >> 3; - - f_vco = clock; - for (p = 0; - p <= post_div_max && f_vco < p_clk_min; - p = (p << 1) + 1, f_vco <<= 1) - ; - - delta = clock; - - for (testm = in_div_min; testm <= in_div_max; testm++) { - for (testn = feed_div_min; testn <= feed_div_max; testn++) { - computed = ref_clk * (testn + 1) / (testm + 1); - if (computed < f_vco) - tmp_delta = f_vco - computed; - else - tmp_delta = computed - f_vco; - if (tmp_delta < delta) { - delta = tmp_delta; - m = testm; - n = testn; - } - } - } - f_vco = ref_clk * (n + 1) / (m + 1); - if (f_vco < 100000) - s = 0; - else if (f_vco < 140000) - s = 1; - else if (f_vco < 180000) - s = 2; - else - s = 3; - - drm_dbg_kms(dev, "clock: %ld vco: %ld m: %d n: %d p: %d s: %d\n", - clock, f_vco, m, n, p, s); - - WREG_DAC(MGA1064_PIX_PLLC_M, m); - WREG_DAC(MGA1064_PIX_PLLC_N, n); - WREG_DAC(MGA1064_PIX_PLLC_P, (p | (s << 3))); - - return 0; -} - -#define P_ARRAY_SIZE 9 - -static int mga_g200se_set_plls(struct mga_device *mdev, long clock) -{ - u32 unique_rev_id = mdev->model.g200se.unique_rev_id; - unsigned int vcomax, vcomin, pllreffreq; - unsigned int delta, tmpdelta, permitteddelta; - unsigned int testp, testm, testn; - unsigned int p, m, n; - unsigned int computed; - unsigned int pvalues_e4[P_ARRAY_SIZE] = {16, 14, 12, 10, 8, 6, 4, 2, 1}; - unsigned int fvv; - unsigned int i; - - if (unique_rev_id <= 0x03) { - - m = n = p = 0; - vcomax = 320000; - vcomin = 160000; - pllreffreq = 25000; - - delta = 0xffffffff; - permitteddelta = clock * 5 / 1000; - - for (testp = 8; testp > 0; testp /= 2) { - if (clock * testp > vcomax) - continue; - if (clock * testp < vcomin) - continue; - - for (testn = 17; testn < 256; testn++) { - for (testm = 1; testm < 32; testm++) { - computed = (pllreffreq * testn) / - (testm * testp); - if (computed > clock) - tmpdelta = computed - clock; - else - tmpdelta = clock - computed; - if (tmpdelta < delta) { - delta = tmpdelta; - m = testm - 1; - n = testn - 1; - p = testp - 1; - } - } - } - } - } else { - - - m = n = p = 0; - vcomax = 1600000; - vcomin = 800000; - pllreffreq = 25000; - - if (clock < 25000) - clock = 25000; - - clock = clock * 2; - - delta = 0xFFFFFFFF; - /* Permited delta is 0.5% as VESA Specification */ - permitteddelta = clock * 5 / 1000; - - for (i = 0 ; i < P_ARRAY_SIZE ; i++) { - testp = pvalues_e4[i]; - - if ((clock * testp) > vcomax) - continue; - if ((clock * testp) < vcomin) - continue; - - for (testn = 50; testn <= 256; testn++) { - for (testm = 1; testm <= 32; testm++) { - computed = (pllreffreq * testn) / - (testm * testp); - if (computed > clock) - tmpdelta = computed - clock; - else - tmpdelta = clock - computed; - - if (tmpdelta < delta) { - delta = tmpdelta; - m = testm - 1; - n = testn - 1; - p = testp - 1; - } - } - } - } - - fvv = pllreffreq * (n + 1) / (m + 1); - fvv = (fvv - 800000) / 50000; - - if (fvv > 15) - fvv = 15; - - p |= (fvv << 4); - m |= 0x80; - - clock = clock / 2; - } - - if (delta > permitteddelta) { - pr_warn("PLL delta too large\n"); - return 1; - } - - WREG_DAC(MGA1064_PIX_PLLC_M, m); - WREG_DAC(MGA1064_PIX_PLLC_N, n); - WREG_DAC(MGA1064_PIX_PLLC_P, p); - - if (unique_rev_id >= 0x04) { - WREG_DAC(0x1a, 0x09); - msleep(20); - WREG_DAC(0x1a, 0x01); - - } - - return 0; -} - -static int mga_g200wb_set_plls(struct mga_device *mdev, long clock) -{ - unsigned int vcomax, vcomin, pllreffreq; - unsigned int delta, tmpdelta; - unsigned int testp, testm, testn, testp2; - unsigned int p, m, n; - unsigned int computed; - int i, j, tmpcount, vcount; - bool pll_locked = false; - u8 tmp; - - m = n = p = 0; - - delta = 0xffffffff; - - if (mdev->type == G200_EW3) { - - vcomax = 800000; - vcomin = 400000; - pllreffreq = 25000; - - for (testp = 1; testp < 8; testp++) { - for (testp2 = 1; testp2 < 8; testp2++) { - if (testp < testp2) - continue; - if ((clock * testp * testp2) > vcomax) - continue; - if ((clock * testp * testp2) < vcomin) - continue; - for (testm = 1; testm < 26; testm++) { - for (testn = 32; testn < 2048 ; testn++) { - computed = (pllreffreq * testn) / - (testm * testp * testp2); - if (computed > clock) - tmpdelta = computed - clock; - else - tmpdelta = clock - computed; - if (tmpdelta < delta) { - delta = tmpdelta; - m = ((testn & 0x100) >> 1) | - (testm); - n = (testn & 0xFF); - p = ((testn & 0x600) >> 3) | - (testp2 << 3) | - (testp); - } - } - } - } - } - } else { - - vcomax = 550000; - vcomin = 150000; - pllreffreq = 48000; - - for (testp = 1; testp < 9; testp++) { - if (clock * testp > vcomax) - continue; - if (clock * testp < vcomin) - continue; - - for (testm = 1; testm < 17; testm++) { - for (testn = 1; testn < 151; testn++) { - computed = (pllreffreq * testn) / - (testm * testp); - if (computed > clock) - tmpdelta = computed - clock; - else - tmpdelta = clock - computed; - if (tmpdelta < delta) { - delta = tmpdelta; - n = testn - 1; - m = (testm - 1) | - ((n >> 1) & 0x80); - p = testp - 1; - } - } - } - } - } - - for (i = 0; i <= 32 && pll_locked == false; i++) { - if (i > 0) { - WREG8(MGAREG_CRTC_INDEX, 0x1e); - tmp = RREG8(MGAREG_CRTC_DATA); - if (tmp < 0xff) - WREG8(MGAREG_CRTC_DATA, tmp+1); - } - - /* set pixclkdis to 1 */ - WREG8(DAC_INDEX, MGA1064_PIX_CLK_CTL); - tmp = RREG8(DAC_DATA); - tmp |= MGA1064_PIX_CLK_CTL_CLK_DIS; - WREG8(DAC_DATA, tmp); - - WREG8(DAC_INDEX, MGA1064_REMHEADCTL); - tmp = RREG8(DAC_DATA); - tmp |= MGA1064_REMHEADCTL_CLKDIS; - WREG8(DAC_DATA, tmp); - - /* select PLL Set C */ - tmp = RREG8(MGAREG_MEM_MISC_READ); - tmp |= 0x3 << 2; - WREG8(MGAREG_MEM_MISC_WRITE, tmp); - - WREG8(DAC_INDEX, MGA1064_PIX_CLK_CTL); - tmp = RREG8(DAC_DATA); - tmp |= MGA1064_PIX_CLK_CTL_CLK_POW_DOWN | 0x80; - WREG8(DAC_DATA, tmp); - - udelay(500); - - /* reset the PLL */ - WREG8(DAC_INDEX, MGA1064_VREF_CTL); - tmp = RREG8(DAC_DATA); - tmp &= ~0x04; - WREG8(DAC_DATA, tmp); - - udelay(50); - - /* program pixel pll register */ - WREG_DAC(MGA1064_WB_PIX_PLLC_N, n); - WREG_DAC(MGA1064_WB_PIX_PLLC_M, m); - WREG_DAC(MGA1064_WB_PIX_PLLC_P, p); - - udelay(50); - - /* turn pll on */ - WREG8(DAC_INDEX, MGA1064_VREF_CTL); - tmp = RREG8(DAC_DATA); - tmp |= 0x04; - WREG_DAC(MGA1064_VREF_CTL, tmp); - - udelay(500); - - /* select the pixel pll */ - WREG8(DAC_INDEX, MGA1064_PIX_CLK_CTL); - tmp = RREG8(DAC_DATA); - tmp &= ~MGA1064_PIX_CLK_CTL_SEL_MSK; - tmp |= MGA1064_PIX_CLK_CTL_SEL_PLL; - WREG8(DAC_DATA, tmp); - - WREG8(DAC_INDEX, MGA1064_REMHEADCTL); - tmp = RREG8(DAC_DATA); - tmp &= ~MGA1064_REMHEADCTL_CLKSL_MSK; - tmp |= MGA1064_REMHEADCTL_CLKSL_PLL; - WREG8(DAC_DATA, tmp); - - /* reset dotclock rate bit */ - WREG8(MGAREG_SEQ_INDEX, 1); - tmp = RREG8(MGAREG_SEQ_DATA); - tmp &= ~0x8; - WREG8(MGAREG_SEQ_DATA, tmp); - - WREG8(DAC_INDEX, MGA1064_PIX_CLK_CTL); - tmp = RREG8(DAC_DATA); - tmp &= ~MGA1064_PIX_CLK_CTL_CLK_DIS; - WREG8(DAC_DATA, tmp); - - vcount = RREG8(MGAREG_VCOUNT); - - for (j = 0; j < 30 && pll_locked == false; j++) { - tmpcount = RREG8(MGAREG_VCOUNT); - if (tmpcount < vcount) - vcount = 0; - if ((tmpcount - vcount) > 2) - pll_locked = true; - else - udelay(5); - } - } - WREG8(DAC_INDEX, MGA1064_REMHEADCTL); - tmp = RREG8(DAC_DATA); - tmp &= ~MGA1064_REMHEADCTL_CLKDIS; - WREG_DAC(MGA1064_REMHEADCTL, tmp); - return 0; -} - -static int mga_g200ev_set_plls(struct mga_device *mdev, long clock) -{ - unsigned int vcomax, vcomin, pllreffreq; - unsigned int delta, tmpdelta; - unsigned int testp, testm, testn; - unsigned int p, m, n; - unsigned int computed; - u8 tmp; - - m = n = p = 0; - vcomax = 550000; - vcomin = 150000; - pllreffreq = 50000; - - delta = 0xffffffff; - - for (testp = 16; testp > 0; testp--) { - if (clock * testp > vcomax) - continue; - if (clock * testp < vcomin) - continue; - - for (testn = 1; testn < 257; testn++) { - for (testm = 1; testm < 17; testm++) { - computed = (pllreffreq * testn) / - (testm * testp); - if (computed > clock) - tmpdelta = computed - clock; - else - tmpdelta = clock - computed; - if (tmpdelta < delta) { - delta = tmpdelta; - n = testn - 1; - m = testm - 1; - p = testp - 1; - } - } - } - } - - WREG8(DAC_INDEX, MGA1064_PIX_CLK_CTL); - tmp = RREG8(DAC_DATA); - tmp |= MGA1064_PIX_CLK_CTL_CLK_DIS; - WREG8(DAC_DATA, tmp); - - tmp = RREG8(MGAREG_MEM_MISC_READ); - tmp |= 0x3 << 2; - WREG8(MGAREG_MEM_MISC_WRITE, tmp); - - WREG8(DAC_INDEX, MGA1064_PIX_PLL_STAT); - tmp = RREG8(DAC_DATA); - WREG8(DAC_DATA, tmp & ~0x40); - - WREG8(DAC_INDEX, MGA1064_PIX_CLK_CTL); - tmp = RREG8(DAC_DATA); - tmp |= MGA1064_PIX_CLK_CTL_CLK_POW_DOWN; - WREG8(DAC_DATA, tmp); - - WREG_DAC(MGA1064_EV_PIX_PLLC_M, m); - WREG_DAC(MGA1064_EV_PIX_PLLC_N, n); - WREG_DAC(MGA1064_EV_PIX_PLLC_P, p); - - udelay(50); - - WREG8(DAC_INDEX, MGA1064_PIX_CLK_CTL); - tmp = RREG8(DAC_DATA); - tmp &= ~MGA1064_PIX_CLK_CTL_CLK_POW_DOWN; - WREG8(DAC_DATA, tmp); - - udelay(500); - - WREG8(DAC_INDEX, MGA1064_PIX_CLK_CTL); - tmp = RREG8(DAC_DATA); - tmp &= ~MGA1064_PIX_CLK_CTL_SEL_MSK; - tmp |= MGA1064_PIX_CLK_CTL_SEL_PLL; - WREG8(DAC_DATA, tmp); - - WREG8(DAC_INDEX, MGA1064_PIX_PLL_STAT); - tmp = RREG8(DAC_DATA); - WREG8(DAC_DATA, tmp | 0x40); - - tmp = RREG8(MGAREG_MEM_MISC_READ); - tmp |= (0x3 << 2); - WREG8(MGAREG_MEM_MISC_WRITE, tmp); - - WREG8(DAC_INDEX, MGA1064_PIX_CLK_CTL); - tmp = RREG8(DAC_DATA); - tmp &= ~MGA1064_PIX_CLK_CTL_CLK_DIS; - WREG8(DAC_DATA, tmp); - - return 0; -} - -static int mga_g200eh_set_plls(struct mga_device *mdev, long clock) -{ - unsigned int vcomax, vcomin, pllreffreq; - unsigned int delta, tmpdelta; - unsigned int testp, testm, testn; - unsigned int p, m, n; - unsigned int computed; - int i, j, tmpcount, vcount; - u8 tmp; - bool pll_locked = false; - - m = n = p = 0; - - if (mdev->type == G200_EH3) { - vcomax = 3000000; - vcomin = 1500000; - pllreffreq = 25000; - - delta = 0xffffffff; - - testp = 0; - - for (testm = 150; testm >= 6; testm--) { - if (clock * testm > vcomax) - continue; - if (clock * testm < vcomin) - continue; - for (testn = 120; testn >= 60; testn--) { - computed = (pllreffreq * testn) / testm; - if (computed > clock) - tmpdelta = computed - clock; - else - tmpdelta = clock - computed; - if (tmpdelta < delta) { - delta = tmpdelta; - n = testn; - m = testm; - p = testp; - } - if (delta == 0) - break; - } - if (delta == 0) - break; - } - } else { - - vcomax = 800000; - vcomin = 400000; - pllreffreq = 33333; - - delta = 0xffffffff; - - for (testp = 16; testp > 0; testp >>= 1) { - if (clock * testp > vcomax) - continue; - if (clock * testp < vcomin) - continue; - - for (testm = 1; testm < 33; testm++) { - for (testn = 17; testn < 257; testn++) { - computed = (pllreffreq * testn) / - (testm * testp); - if (computed > clock) - tmpdelta = computed - clock; - else - tmpdelta = clock - computed; - if (tmpdelta < delta) { - delta = tmpdelta; - n = testn - 1; - m = (testm - 1); - p = testp - 1; - } - if ((clock * testp) >= 600000) - p |= 0x80; - } - } - } - } - for (i = 0; i <= 32 && pll_locked == false; i++) { - WREG8(DAC_INDEX, MGA1064_PIX_CLK_CTL); - tmp = RREG8(DAC_DATA); - tmp |= MGA1064_PIX_CLK_CTL_CLK_DIS; - WREG8(DAC_DATA, tmp); - - tmp = RREG8(MGAREG_MEM_MISC_READ); - tmp |= 0x3 << 2; - WREG8(MGAREG_MEM_MISC_WRITE, tmp); - - WREG8(DAC_INDEX, MGA1064_PIX_CLK_CTL); - tmp = RREG8(DAC_DATA); - tmp |= MGA1064_PIX_CLK_CTL_CLK_POW_DOWN; - WREG8(DAC_DATA, tmp); - - udelay(500); - - WREG_DAC(MGA1064_EH_PIX_PLLC_M, m); - WREG_DAC(MGA1064_EH_PIX_PLLC_N, n); - WREG_DAC(MGA1064_EH_PIX_PLLC_P, p); - - udelay(500); - - WREG8(DAC_INDEX, MGA1064_PIX_CLK_CTL); - tmp = RREG8(DAC_DATA); - tmp &= ~MGA1064_PIX_CLK_CTL_SEL_MSK; - tmp |= MGA1064_PIX_CLK_CTL_SEL_PLL; - WREG8(DAC_DATA, tmp); - - WREG8(DAC_INDEX, MGA1064_PIX_CLK_CTL); - tmp = RREG8(DAC_DATA); - tmp &= ~MGA1064_PIX_CLK_CTL_CLK_DIS; - tmp &= ~MGA1064_PIX_CLK_CTL_CLK_POW_DOWN; - WREG8(DAC_DATA, tmp); - - vcount = RREG8(MGAREG_VCOUNT); - - for (j = 0; j < 30 && pll_locked == false; j++) { - tmpcount = RREG8(MGAREG_VCOUNT); - if (tmpcount < vcount) - vcount = 0; - if ((tmpcount - vcount) > 2) - pll_locked = true; - else - udelay(5); - } - } - - return 0; -} - -static int mga_g200er_set_plls(struct mga_device *mdev, long clock) -{ - static const unsigned int m_div_val[] = { 1, 2, 4, 8 }; - unsigned int vcomax, vcomin, pllreffreq; - unsigned int delta, tmpdelta; - int testr, testn, testm, testo; - unsigned int p, m, n; - unsigned int computed, vco; - int tmp; - - m = n = p = 0; - vcomax = 1488000; - vcomin = 1056000; - pllreffreq = 48000; - - delta = 0xffffffff; - - for (testr = 0; testr < 4; testr++) { - if (delta == 0) - break; - for (testn = 5; testn < 129; testn++) { - if (delta == 0) - break; - for (testm = 3; testm >= 0; testm--) { - if (delta == 0) - break; - for (testo = 5; testo < 33; testo++) { - vco = pllreffreq * (testn + 1) / - (testr + 1); - if (vco < vcomin) - continue; - if (vco > vcomax) - continue; - computed = vco / (m_div_val[testm] * (testo + 1)); - if (computed > clock) - tmpdelta = computed - clock; - else - tmpdelta = clock - computed; - if (tmpdelta < delta) { - delta = tmpdelta; - m = testm | (testo << 3); - n = testn; - p = testr | (testr << 3); - } - } - } - } - } - - WREG8(DAC_INDEX, MGA1064_PIX_CLK_CTL); - tmp = RREG8(DAC_DATA); - tmp |= MGA1064_PIX_CLK_CTL_CLK_DIS; - WREG8(DAC_DATA, tmp); - - WREG8(DAC_INDEX, MGA1064_REMHEADCTL); - tmp = RREG8(DAC_DATA); - tmp |= MGA1064_REMHEADCTL_CLKDIS; - WREG8(DAC_DATA, tmp); - - tmp = RREG8(MGAREG_MEM_MISC_READ); - tmp |= (0x3<<2) | 0xc0; - WREG8(MGAREG_MEM_MISC_WRITE, tmp); - - WREG8(DAC_INDEX, MGA1064_PIX_CLK_CTL); - tmp = RREG8(DAC_DATA); - tmp &= ~MGA1064_PIX_CLK_CTL_CLK_DIS; - tmp |= MGA1064_PIX_CLK_CTL_CLK_POW_DOWN; - WREG8(DAC_DATA, tmp); - - udelay(500); - - WREG_DAC(MGA1064_ER_PIX_PLLC_N, n); - WREG_DAC(MGA1064_ER_PIX_PLLC_M, m); - WREG_DAC(MGA1064_ER_PIX_PLLC_P, p); - - udelay(50); - - return 0; -} - -static int mgag200_crtc_set_plls(struct mga_device *mdev, long clock) -{ - u8 misc; - - switch(mdev->type) { - case G200_PCI: - case G200_AGP: - return mgag200_g200_set_plls(mdev, clock); - case G200_SE_A: - case G200_SE_B: - return mga_g200se_set_plls(mdev, clock); - case G200_WB: - case G200_EW3: - return mga_g200wb_set_plls(mdev, clock); - case G200_EV: - return mga_g200ev_set_plls(mdev, clock); - case G200_EH: - case G200_EH3: - return mga_g200eh_set_plls(mdev, clock); - case G200_ER: - return mga_g200er_set_plls(mdev, clock); - } - - misc = RREG8(MGA_MISC_IN); - misc &= ~MGAREG_MISC_CLK_SEL_MASK; - misc |= MGAREG_MISC_CLK_SEL_MGA_MSK; - WREG8(MGA_MISC_OUT, misc); - - return 0; -} - static void mgag200_g200wb_hold_bmc(struct mga_device *mdev) { u8 tmp; @@ -1137,10 +431,11 @@ static void mgag200_set_mode_regs(struct mga_device *mdev, WREG8(MGA_MISC_OUT, misc); } -static u8 mgag200_get_bpp_shift(struct mga_device *mdev, - const struct drm_format_info *format) +static u8 mgag200_get_bpp_shift(const struct drm_format_info *format) { - return mdev->bpp_shifts[format->cpp[0] - 1]; + static const u8 bpp_shift[] = {0, 1, 0, 2}; + + return bpp_shift[format->cpp[0] - 1]; } /* @@ -1152,7 +447,7 @@ static u32 mgag200_calculate_offset(struct mga_device *mdev, const struct drm_framebuffer *fb) { u32 offset = fb->pitches[0] / fb->format->cpp[0]; - u8 bppshift = mgag200_get_bpp_shift(mdev, fb->format); + u8 bppshift = mgag200_get_bpp_shift(fb->format); if (fb->format->cpp[0] * 8 == 24) offset = (offset * 3) >> (4 - bppshift); @@ -1189,7 +484,7 @@ static void mgag200_set_format_regs(struct mga_device *mdev, bpp = format->cpp[0] * 8; - bppshift = mgag200_get_bpp_shift(mdev, format); + bppshift = mgag200_get_bpp_shift(format); switch (bpp) { case 24: scale = ((1 << bppshift) * 3) - 1; @@ -1569,7 +864,9 @@ mgag200_simple_display_pipe_enable(struct drm_simple_display_pipe *pipe, struct drm_crtc *crtc = &pipe->crtc; struct drm_device *dev = crtc->dev; struct mga_device *mdev = to_mga_device(dev); + struct mgag200_pll *pixpll = &mdev->pixpll; struct drm_display_mode *adjusted_mode = &crtc_state->adjusted_mode; + struct mgag200_crtc_state *mgag200_crtc_state = to_mgag200_crtc_state(crtc_state); struct drm_framebuffer *fb = plane_state->fb; struct drm_shadow_plane_state *shadow_plane_state = to_drm_shadow_plane_state(plane_state); struct drm_rect fullscreen = { @@ -1584,7 +881,8 @@ mgag200_simple_display_pipe_enable(struct drm_simple_display_pipe *pipe, mgag200_set_format_regs(mdev, fb); mgag200_set_mode_regs(mdev, adjusted_mode); - mgag200_crtc_set_plls(mdev, adjusted_mode->clock); + + pixpll->funcs->update(pixpll, &mgag200_crtc_state->pixpllc); if (mdev->type == G200_ER) mgag200_g200er_reset_tagfifo(mdev); @@ -1600,7 +898,7 @@ mgag200_simple_display_pipe_enable(struct drm_simple_display_pipe *pipe, mga_crtc_load_lut(crtc); mgag200_enable_display(mdev); - mgag200_handle_damage(mdev, fb, &fullscreen, &shadow_plane_state->map[0]); + mgag200_handle_damage(mdev, fb, &fullscreen, &shadow_plane_state->data[0]); } static void @@ -1618,8 +916,13 @@ mgag200_simple_display_pipe_check(struct drm_simple_display_pipe *pipe, struct drm_crtc_state *crtc_state) { struct drm_plane *plane = plane_state->plane; + struct drm_device *dev = plane->dev; + struct mga_device *mdev = to_mga_device(dev); + struct mgag200_pll *pixpll = &mdev->pixpll; + struct mgag200_crtc_state *mgag200_crtc_state = to_mgag200_crtc_state(crtc_state); struct drm_framebuffer *new_fb = plane_state->fb; struct drm_framebuffer *fb = NULL; + int ret; if (!new_fb) return 0; @@ -1630,6 +933,13 @@ mgag200_simple_display_pipe_check(struct drm_simple_display_pipe *pipe, if (!fb || (fb->format != new_fb->format)) crtc_state->mode_changed = true; /* update PLL settings */ + if (crtc_state->mode_changed) { + ret = pixpll->funcs->compute(pixpll, crtc_state->mode.clock, + &mgag200_crtc_state->pixpllc); + if (ret) + return ret; + } + return 0; } @@ -1649,7 +959,54 @@ mgag200_simple_display_pipe_update(struct drm_simple_display_pipe *pipe, return; if (drm_atomic_helper_damage_merged(old_state, state, &damage)) - mgag200_handle_damage(mdev, fb, &damage, &shadow_plane_state->map[0]); + mgag200_handle_damage(mdev, fb, &damage, &shadow_plane_state->data[0]); +} + +static struct drm_crtc_state * +mgag200_simple_display_pipe_duplicate_crtc_state(struct drm_simple_display_pipe *pipe) +{ + struct drm_crtc *crtc = &pipe->crtc; + struct drm_crtc_state *crtc_state = crtc->state; + struct mgag200_crtc_state *mgag200_crtc_state = to_mgag200_crtc_state(crtc_state); + struct mgag200_crtc_state *new_mgag200_crtc_state; + + if (!crtc_state) + return NULL; + + new_mgag200_crtc_state = kzalloc(sizeof(*new_mgag200_crtc_state), GFP_KERNEL); + if (!new_mgag200_crtc_state) + return NULL; + __drm_atomic_helper_crtc_duplicate_state(crtc, &new_mgag200_crtc_state->base); + + memcpy(&new_mgag200_crtc_state->pixpllc, &mgag200_crtc_state->pixpllc, + sizeof(new_mgag200_crtc_state->pixpllc)); + + return &new_mgag200_crtc_state->base; +} + +static void mgag200_simple_display_pipe_destroy_crtc_state(struct drm_simple_display_pipe *pipe, + struct drm_crtc_state *crtc_state) +{ + struct mgag200_crtc_state *mgag200_crtc_state = to_mgag200_crtc_state(crtc_state); + + __drm_atomic_helper_crtc_destroy_state(&mgag200_crtc_state->base); + kfree(mgag200_crtc_state); +} + +static void mgag200_simple_display_pipe_reset_crtc(struct drm_simple_display_pipe *pipe) +{ + struct drm_crtc *crtc = &pipe->crtc; + struct mgag200_crtc_state *mgag200_crtc_state; + + if (crtc->state) { + mgag200_simple_display_pipe_destroy_crtc_state(pipe, crtc->state); + crtc->state = NULL; /* must be set to NULL here */ + } + + mgag200_crtc_state = kzalloc(sizeof(*mgag200_crtc_state), GFP_KERNEL); + if (!mgag200_crtc_state) + return; + __drm_atomic_helper_crtc_reset(crtc, &mgag200_crtc_state->base); } static const struct drm_simple_display_pipe_funcs @@ -1659,6 +1016,9 @@ mgag200_simple_display_pipe_funcs = { .disable = mgag200_simple_display_pipe_disable, .check = mgag200_simple_display_pipe_check, .update = mgag200_simple_display_pipe_update, + .reset_crtc = mgag200_simple_display_pipe_reset_crtc, + .duplicate_crtc_state = mgag200_simple_display_pipe_duplicate_crtc_state, + .destroy_crtc_state = mgag200_simple_display_pipe_destroy_crtc_state, DRM_GEM_SIMPLE_DISPLAY_PIPE_SHADOW_PLANE_FUNCS, }; @@ -1699,11 +1059,6 @@ int mgag200_modeset_init(struct mga_device *mdev) size_t format_count = ARRAY_SIZE(mgag200_simple_display_pipe_formats); int ret; - mdev->bpp_shifts[0] = 0; - mdev->bpp_shifts[1] = 1; - mdev->bpp_shifts[2] = 0; - mdev->bpp_shifts[3] = 2; - mgag200_init_regs(mdev); ret = drmm_mode_config_init(dev); @@ -1730,6 +1085,10 @@ int mgag200_modeset_init(struct mga_device *mdev) return ret; } + ret = mgag200_pixpll_init(&mdev->pixpll, mdev); + if (ret) + return ret; + ret = drm_simple_display_pipe_init(dev, pipe, &mgag200_simple_display_pipe_funcs, mgag200_simple_display_pipe_formats, diff --git a/drivers/gpu/drm/mgag200/mgag200_pll.c b/drivers/gpu/drm/mgag200/mgag200_pll.c new file mode 100644 index 000000000000..7c903cf19c0d --- /dev/null +++ b/drivers/gpu/drm/mgag200/mgag200_pll.c @@ -0,0 +1,992 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#include <linux/delay.h> + +#include "mgag200_drv.h" + +/* + * G200 + */ + +static int mgag200_pixpll_compute_g200(struct mgag200_pll *pixpll, long clock, + struct mgag200_pll_values *pixpllc) +{ + struct mga_device *mdev = pixpll->mdev; + struct drm_device *dev = &mdev->base; + const int post_div_max = 7; + const int in_div_min = 1; + const int in_div_max = 6; + const int feed_div_min = 7; + const int feed_div_max = 127; + u8 testp, testm, testn; + u8 n = 0, m = 0, p, s; + long f_vco; + long computed; + long delta, tmp_delta; + long ref_clk = mdev->model.g200.ref_clk; + long p_clk_min = mdev->model.g200.pclk_min; + long p_clk_max = mdev->model.g200.pclk_max; + + if (clock > p_clk_max) { + drm_err(dev, "Pixel Clock %ld too high\n", clock); + return -EINVAL; + } + + if (clock < p_clk_min >> 3) + clock = p_clk_min >> 3; + + f_vco = clock; + for (testp = 0; + testp <= post_div_max && f_vco < p_clk_min; + testp = (testp << 1) + 1, f_vco <<= 1) + ; + p = testp + 1; + + delta = clock; + + for (testm = in_div_min; testm <= in_div_max; testm++) { + for (testn = feed_div_min; testn <= feed_div_max; testn++) { + computed = ref_clk * (testn + 1) / (testm + 1); + if (computed < f_vco) + tmp_delta = f_vco - computed; + else + tmp_delta = computed - f_vco; + if (tmp_delta < delta) { + delta = tmp_delta; + m = testm + 1; + n = testn + 1; + } + } + } + f_vco = ref_clk * n / m; + if (f_vco < 100000) + s = 0; + else if (f_vco < 140000) + s = 1; + else if (f_vco < 180000) + s = 2; + else + s = 3; + + drm_dbg_kms(dev, "clock: %ld vco: %ld m: %d n: %d p: %d s: %d\n", + clock, f_vco, m, n, p, s); + + pixpllc->m = m; + pixpllc->n = n; + pixpllc->p = p; + pixpllc->s = s; + + return 0; +} + +static void +mgag200_pixpll_update_g200(struct mgag200_pll *pixpll, const struct mgag200_pll_values *pixpllc) +{ + struct mga_device *mdev = pixpll->mdev; + unsigned int pixpllcm, pixpllcn, pixpllcp, pixpllcs; + u8 xpixpllcm, xpixpllcn, xpixpllcp; + + pixpllcm = pixpllc->m - 1; + pixpllcn = pixpllc->n - 1; + pixpllcp = pixpllc->p - 1; + pixpllcs = pixpllc->s; + + xpixpllcm = pixpllcm; + xpixpllcn = pixpllcn; + xpixpllcp = (pixpllcs << 3) | pixpllcp; + + WREG_MISC_MASKED(MGAREG_MISC_CLKSEL_MGA, MGAREG_MISC_CLKSEL_MASK); + + WREG_DAC(MGA1064_PIX_PLLC_M, xpixpllcm); + WREG_DAC(MGA1064_PIX_PLLC_N, xpixpllcn); + WREG_DAC(MGA1064_PIX_PLLC_P, xpixpllcp); +} + +static const struct mgag200_pll_funcs mgag200_pixpll_funcs_g200 = { + .compute = mgag200_pixpll_compute_g200, + .update = mgag200_pixpll_update_g200, +}; + +/* + * G200SE + */ + +static int mgag200_pixpll_compute_g200se_00(struct mgag200_pll *pixpll, long clock, + struct mgag200_pll_values *pixpllc) +{ + static const unsigned int vcomax = 320000; + static const unsigned int vcomin = 160000; + static const unsigned int pllreffreq = 25000; + + unsigned int delta, tmpdelta, permitteddelta; + unsigned int testp, testm, testn; + unsigned int p, m, n, s; + unsigned int computed; + + m = n = p = s = 0; + permitteddelta = clock * 5 / 1000; + + for (testp = 8; testp > 0; testp /= 2) { + if (clock * testp > vcomax) + continue; + if (clock * testp < vcomin) + continue; + + for (testn = 17; testn < 256; testn++) { + for (testm = 1; testm < 32; testm++) { + computed = (pllreffreq * testn) / (testm * testp); + if (computed > clock) + tmpdelta = computed - clock; + else + tmpdelta = clock - computed; + if (tmpdelta < delta) { + delta = tmpdelta; + m = testm; + n = testn; + p = testp; + } + } + } + } + + if (delta > permitteddelta) { + pr_warn("PLL delta too large\n"); + return -EINVAL; + } + + pixpllc->m = m; + pixpllc->n = n; + pixpllc->p = p; + pixpllc->s = s; + + return 0; +} + +static void mgag200_pixpll_update_g200se_00(struct mgag200_pll *pixpll, + const struct mgag200_pll_values *pixpllc) +{ + unsigned int pixpllcm, pixpllcn, pixpllcp, pixpllcs; + u8 xpixpllcm, xpixpllcn, xpixpllcp; + struct mga_device *mdev = pixpll->mdev; + + pixpllcm = pixpllc->m - 1; + pixpllcn = pixpllc->n - 1; + pixpllcp = pixpllc->p - 1; + pixpllcs = pixpllc->s; + + xpixpllcm = pixpllcm | ((pixpllcn & BIT(8)) >> 1); + xpixpllcn = pixpllcn; + xpixpllcp = (pixpllcs << 3) | pixpllcp; + + WREG_MISC_MASKED(MGAREG_MISC_CLKSEL_MGA, MGAREG_MISC_CLKSEL_MASK); + + WREG_DAC(MGA1064_PIX_PLLC_M, xpixpllcm); + WREG_DAC(MGA1064_PIX_PLLC_N, xpixpllcn); + WREG_DAC(MGA1064_PIX_PLLC_P, xpixpllcp); +} + +static int mgag200_pixpll_compute_g200se_04(struct mgag200_pll *pixpll, long clock, + struct mgag200_pll_values *pixpllc) +{ + static const unsigned int vcomax = 1600000; + static const unsigned int vcomin = 800000; + static const unsigned int pllreffreq = 25000; + static const unsigned int pvalues_e4[] = {16, 14, 12, 10, 8, 6, 4, 2, 1}; + + unsigned int delta, tmpdelta, permitteddelta; + unsigned int testp, testm, testn; + unsigned int p, m, n, s; + unsigned int computed; + unsigned int fvv; + unsigned int i; + + m = n = p = s = 0; + delta = 0xffffffff; + + if (clock < 25000) + clock = 25000; + clock = clock * 2; + + /* Permited delta is 0.5% as VESA Specification */ + permitteddelta = clock * 5 / 1000; + + for (i = 0 ; i < ARRAY_SIZE(pvalues_e4); i++) { + testp = pvalues_e4[i]; + + if ((clock * testp) > vcomax) + continue; + if ((clock * testp) < vcomin) + continue; + + for (testn = 50; testn <= 256; testn++) { + for (testm = 1; testm <= 32; testm++) { + computed = (pllreffreq * testn) / (testm * testp); + if (computed > clock) + tmpdelta = computed - clock; + else + tmpdelta = clock - computed; + + if (tmpdelta < delta) { + delta = tmpdelta; + m = testm; + n = testn; + p = testp; + } + } + } + } + + fvv = pllreffreq * n / m; + fvv = (fvv - 800000) / 50000; + if (fvv > 15) + fvv = 15; + s = fvv << 1; + + if (delta > permitteddelta) { + pr_warn("PLL delta too large\n"); + return -EINVAL; + } + + pixpllc->m = m; + pixpllc->n = n; + pixpllc->p = p; + pixpllc->s = s; + + return 0; +} + +static void mgag200_pixpll_update_g200se_04(struct mgag200_pll *pixpll, + const struct mgag200_pll_values *pixpllc) +{ + unsigned int pixpllcm, pixpllcn, pixpllcp, pixpllcs; + u8 xpixpllcm, xpixpllcn, xpixpllcp; + struct mga_device *mdev = pixpll->mdev; + + pixpllcm = pixpllc->m - 1; + pixpllcn = pixpllc->n - 1; + pixpllcp = pixpllc->p - 1; + pixpllcs = pixpllc->s; + + xpixpllcm = pixpllcm | ((pixpllcn & BIT(8)) >> 1); + xpixpllcn = pixpllcn; + xpixpllcp = (pixpllcs << 3) | pixpllcp; + + WREG_MISC_MASKED(MGAREG_MISC_CLKSEL_MGA, MGAREG_MISC_CLKSEL_MASK); + + WREG_DAC(MGA1064_PIX_PLLC_M, xpixpllcm); + WREG_DAC(MGA1064_PIX_PLLC_N, xpixpllcn); + WREG_DAC(MGA1064_PIX_PLLC_P, xpixpllcp); + + WREG_DAC(0x1a, 0x09); + msleep(20); + WREG_DAC(0x1a, 0x01); +} + +static const struct mgag200_pll_funcs mgag200_pixpll_funcs_g200se_00 = { + .compute = mgag200_pixpll_compute_g200se_00, + .update = mgag200_pixpll_update_g200se_00, +}; + +static const struct mgag200_pll_funcs mgag200_pixpll_funcs_g200se_04 = { + .compute = mgag200_pixpll_compute_g200se_04, + .update = mgag200_pixpll_update_g200se_04, +}; + +/* + * G200WB + */ + +static int mgag200_pixpll_compute_g200wb(struct mgag200_pll *pixpll, long clock, + struct mgag200_pll_values *pixpllc) +{ + static const unsigned int vcomax = 550000; + static const unsigned int vcomin = 150000; + static const unsigned int pllreffreq = 48000; + + unsigned int delta, tmpdelta; + unsigned int testp, testm, testn; + unsigned int p, m, n, s; + unsigned int computed; + + m = n = p = s = 0; + delta = 0xffffffff; + + for (testp = 1; testp < 9; testp++) { + if (clock * testp > vcomax) + continue; + if (clock * testp < vcomin) + continue; + + for (testm = 1; testm < 17; testm++) { + for (testn = 1; testn < 151; testn++) { + computed = (pllreffreq * testn) / (testm * testp); + if (computed > clock) + tmpdelta = computed - clock; + else + tmpdelta = clock - computed; + if (tmpdelta < delta) { + delta = tmpdelta; + n = testn; + m = testm; + p = testp; + s = 0; + } + } + } + } + + pixpllc->m = m; + pixpllc->n = n; + pixpllc->p = p; + pixpllc->s = s; + + return 0; +} + +static void +mgag200_pixpll_update_g200wb(struct mgag200_pll *pixpll, const struct mgag200_pll_values *pixpllc) +{ + unsigned int pixpllcm, pixpllcn, pixpllcp, pixpllcs; + u8 xpixpllcm, xpixpllcn, xpixpllcp, tmp; + int i, j, tmpcount, vcount; + struct mga_device *mdev = pixpll->mdev; + bool pll_locked = false; + + pixpllcm = pixpllc->m - 1; + pixpllcn = pixpllc->n - 1; + pixpllcp = pixpllc->p - 1; + pixpllcs = pixpllc->s; + + xpixpllcm = ((pixpllcn & BIT(8)) >> 1) | pixpllcm; + xpixpllcn = pixpllcn; + xpixpllcp = ((pixpllcn & GENMASK(10, 9)) >> 3) | (pixpllcs << 3) | pixpllcp; + + WREG_MISC_MASKED(MGAREG_MISC_CLKSEL_MGA, MGAREG_MISC_CLKSEL_MASK); + + for (i = 0; i <= 32 && pll_locked == false; i++) { + if (i > 0) { + WREG8(MGAREG_CRTC_INDEX, 0x1e); + tmp = RREG8(MGAREG_CRTC_DATA); + if (tmp < 0xff) + WREG8(MGAREG_CRTC_DATA, tmp+1); + } + + /* set pixclkdis to 1 */ + WREG8(DAC_INDEX, MGA1064_PIX_CLK_CTL); + tmp = RREG8(DAC_DATA); + tmp |= MGA1064_PIX_CLK_CTL_CLK_DIS; + WREG8(DAC_DATA, tmp); + + WREG8(DAC_INDEX, MGA1064_REMHEADCTL); + tmp = RREG8(DAC_DATA); + tmp |= MGA1064_REMHEADCTL_CLKDIS; + WREG8(DAC_DATA, tmp); + + /* select PLL Set C */ + tmp = RREG8(MGAREG_MEM_MISC_READ); + tmp |= 0x3 << 2; + WREG8(MGAREG_MEM_MISC_WRITE, tmp); + + WREG8(DAC_INDEX, MGA1064_PIX_CLK_CTL); + tmp = RREG8(DAC_DATA); + tmp |= MGA1064_PIX_CLK_CTL_CLK_POW_DOWN | 0x80; + WREG8(DAC_DATA, tmp); + + udelay(500); + + /* reset the PLL */ + WREG8(DAC_INDEX, MGA1064_VREF_CTL); + tmp = RREG8(DAC_DATA); + tmp &= ~0x04; + WREG8(DAC_DATA, tmp); + + udelay(50); + + /* program pixel pll register */ + WREG_DAC(MGA1064_PIX_PLLC_N, xpixpllcn); + WREG_DAC(MGA1064_PIX_PLLC_M, xpixpllcm); + WREG_DAC(MGA1064_PIX_PLLC_P, xpixpllcp); + + udelay(50); + + /* turn pll on */ + WREG8(DAC_INDEX, MGA1064_VREF_CTL); + tmp = RREG8(DAC_DATA); + tmp |= 0x04; + WREG_DAC(MGA1064_VREF_CTL, tmp); + + udelay(500); + + /* select the pixel pll */ + WREG8(DAC_INDEX, MGA1064_PIX_CLK_CTL); + tmp = RREG8(DAC_DATA); + tmp &= ~MGA1064_PIX_CLK_CTL_SEL_MSK; + tmp |= MGA1064_PIX_CLK_CTL_SEL_PLL; + WREG8(DAC_DATA, tmp); + + WREG8(DAC_INDEX, MGA1064_REMHEADCTL); + tmp = RREG8(DAC_DATA); + tmp &= ~MGA1064_REMHEADCTL_CLKSL_MSK; + tmp |= MGA1064_REMHEADCTL_CLKSL_PLL; + WREG8(DAC_DATA, tmp); + + /* reset dotclock rate bit */ + WREG8(MGAREG_SEQ_INDEX, 1); + tmp = RREG8(MGAREG_SEQ_DATA); + tmp &= ~0x8; + WREG8(MGAREG_SEQ_DATA, tmp); + + WREG8(DAC_INDEX, MGA1064_PIX_CLK_CTL); + tmp = RREG8(DAC_DATA); + tmp &= ~MGA1064_PIX_CLK_CTL_CLK_DIS; + WREG8(DAC_DATA, tmp); + + vcount = RREG8(MGAREG_VCOUNT); + + for (j = 0; j < 30 && pll_locked == false; j++) { + tmpcount = RREG8(MGAREG_VCOUNT); + if (tmpcount < vcount) + vcount = 0; + if ((tmpcount - vcount) > 2) + pll_locked = true; + else + udelay(5); + } + } + + WREG8(DAC_INDEX, MGA1064_REMHEADCTL); + tmp = RREG8(DAC_DATA); + tmp &= ~MGA1064_REMHEADCTL_CLKDIS; + WREG_DAC(MGA1064_REMHEADCTL, tmp); +} + +static const struct mgag200_pll_funcs mgag200_pixpll_funcs_g200wb = { + .compute = mgag200_pixpll_compute_g200wb, + .update = mgag200_pixpll_update_g200wb, +}; + +/* + * G200EV + */ + +static int mgag200_pixpll_compute_g200ev(struct mgag200_pll *pixpll, long clock, + struct mgag200_pll_values *pixpllc) +{ + static const unsigned int vcomax = 550000; + static const unsigned int vcomin = 150000; + static const unsigned int pllreffreq = 50000; + + unsigned int delta, tmpdelta; + unsigned int testp, testm, testn; + unsigned int p, m, n, s; + unsigned int computed; + + m = n = p = s = 0; + delta = 0xffffffff; + + for (testp = 16; testp > 0; testp--) { + if (clock * testp > vcomax) + continue; + if (clock * testp < vcomin) + continue; + + for (testn = 1; testn < 257; testn++) { + for (testm = 1; testm < 17; testm++) { + computed = (pllreffreq * testn) / + (testm * testp); + if (computed > clock) + tmpdelta = computed - clock; + else + tmpdelta = clock - computed; + if (tmpdelta < delta) { + delta = tmpdelta; + n = testn; + m = testm; + p = testp; + } + } + } + } + + pixpllc->m = m; + pixpllc->n = n; + pixpllc->p = p; + pixpllc->s = s; + + return 0; +} + +static void +mgag200_pixpll_update_g200ev(struct mgag200_pll *pixpll, const struct mgag200_pll_values *pixpllc) +{ + unsigned int pixpllcm, pixpllcn, pixpllcp, pixpllcs; + u8 xpixpllcm, xpixpllcn, xpixpllcp, tmp; + struct mga_device *mdev = pixpll->mdev; + + pixpllcm = pixpllc->m - 1; + pixpllcn = pixpllc->n - 1; + pixpllcp = pixpllc->p - 1; + pixpllcs = pixpllc->s; + + xpixpllcm = pixpllcm; + xpixpllcn = pixpllcn; + xpixpllcp = (pixpllcs << 3) | pixpllcp; + + WREG_MISC_MASKED(MGAREG_MISC_CLKSEL_MGA, MGAREG_MISC_CLKSEL_MASK); + + WREG8(DAC_INDEX, MGA1064_PIX_CLK_CTL); + tmp = RREG8(DAC_DATA); + tmp |= MGA1064_PIX_CLK_CTL_CLK_DIS; + WREG8(DAC_DATA, tmp); + + tmp = RREG8(MGAREG_MEM_MISC_READ); + tmp |= 0x3 << 2; + WREG8(MGAREG_MEM_MISC_WRITE, tmp); + + WREG8(DAC_INDEX, MGA1064_PIX_PLL_STAT); + tmp = RREG8(DAC_DATA); + WREG8(DAC_DATA, tmp & ~0x40); + + WREG8(DAC_INDEX, MGA1064_PIX_CLK_CTL); + tmp = RREG8(DAC_DATA); + tmp |= MGA1064_PIX_CLK_CTL_CLK_POW_DOWN; + WREG8(DAC_DATA, tmp); + + WREG_DAC(MGA1064_EV_PIX_PLLC_M, xpixpllcm); + WREG_DAC(MGA1064_EV_PIX_PLLC_N, xpixpllcn); + WREG_DAC(MGA1064_EV_PIX_PLLC_P, xpixpllcp); + + udelay(50); + + WREG8(DAC_INDEX, MGA1064_PIX_CLK_CTL); + tmp = RREG8(DAC_DATA); + tmp &= ~MGA1064_PIX_CLK_CTL_CLK_POW_DOWN; + WREG8(DAC_DATA, tmp); + + udelay(500); + + WREG8(DAC_INDEX, MGA1064_PIX_CLK_CTL); + tmp = RREG8(DAC_DATA); + tmp &= ~MGA1064_PIX_CLK_CTL_SEL_MSK; + tmp |= MGA1064_PIX_CLK_CTL_SEL_PLL; + WREG8(DAC_DATA, tmp); + + WREG8(DAC_INDEX, MGA1064_PIX_PLL_STAT); + tmp = RREG8(DAC_DATA); + WREG8(DAC_DATA, tmp | 0x40); + + tmp = RREG8(MGAREG_MEM_MISC_READ); + tmp |= (0x3 << 2); + WREG8(MGAREG_MEM_MISC_WRITE, tmp); + + WREG8(DAC_INDEX, MGA1064_PIX_CLK_CTL); + tmp = RREG8(DAC_DATA); + tmp &= ~MGA1064_PIX_CLK_CTL_CLK_DIS; + WREG8(DAC_DATA, tmp); +} + +static const struct mgag200_pll_funcs mgag200_pixpll_funcs_g200ev = { + .compute = mgag200_pixpll_compute_g200ev, + .update = mgag200_pixpll_update_g200ev, +}; + +/* + * G200EH + */ + +static int mgag200_pixpll_compute_g200eh(struct mgag200_pll *pixpll, long clock, + struct mgag200_pll_values *pixpllc) +{ + static const unsigned int vcomax = 800000; + static const unsigned int vcomin = 400000; + static const unsigned int pllreffreq = 33333; + + unsigned int delta, tmpdelta; + unsigned int testp, testm, testn; + unsigned int p, m, n, s; + unsigned int computed; + + m = n = p = s = 0; + delta = 0xffffffff; + + for (testp = 16; testp > 0; testp >>= 1) { + if (clock * testp > vcomax) + continue; + if (clock * testp < vcomin) + continue; + + for (testm = 1; testm < 33; testm++) { + for (testn = 17; testn < 257; testn++) { + computed = (pllreffreq * testn) / (testm * testp); + if (computed > clock) + tmpdelta = computed - clock; + else + tmpdelta = clock - computed; + if (tmpdelta < delta) { + delta = tmpdelta; + n = testn; + m = testm; + p = testp; + } + } + } + } + + pixpllc->m = m; + pixpllc->n = n; + pixpllc->p = p; + pixpllc->s = s; + + return 0; +} + +static void +mgag200_pixpll_update_g200eh(struct mgag200_pll *pixpll, const struct mgag200_pll_values *pixpllc) +{ + unsigned int pixpllcm, pixpllcn, pixpllcp, pixpllcs; + u8 xpixpllcm, xpixpllcn, xpixpllcp, tmp; + int i, j, tmpcount, vcount; + struct mga_device *mdev = pixpll->mdev; + bool pll_locked = false; + + pixpllcm = pixpllc->m - 1; + pixpllcn = pixpllc->n - 1; + pixpllcp = pixpllc->p - 1; + pixpllcs = pixpllc->s; + + xpixpllcm = ((pixpllcn & BIT(8)) >> 1) | pixpllcm; + xpixpllcn = pixpllcn; + xpixpllcp = (pixpllcs << 3) | pixpllcp; + + WREG_MISC_MASKED(MGAREG_MISC_CLKSEL_MGA, MGAREG_MISC_CLKSEL_MASK); + + for (i = 0; i <= 32 && pll_locked == false; i++) { + WREG8(DAC_INDEX, MGA1064_PIX_CLK_CTL); + tmp = RREG8(DAC_DATA); + tmp |= MGA1064_PIX_CLK_CTL_CLK_DIS; + WREG8(DAC_DATA, tmp); + + tmp = RREG8(MGAREG_MEM_MISC_READ); + tmp |= 0x3 << 2; + WREG8(MGAREG_MEM_MISC_WRITE, tmp); + + WREG8(DAC_INDEX, MGA1064_PIX_CLK_CTL); + tmp = RREG8(DAC_DATA); + tmp |= MGA1064_PIX_CLK_CTL_CLK_POW_DOWN; + WREG8(DAC_DATA, tmp); + + udelay(500); + + WREG_DAC(MGA1064_EH_PIX_PLLC_M, xpixpllcm); + WREG_DAC(MGA1064_EH_PIX_PLLC_N, xpixpllcn); + WREG_DAC(MGA1064_EH_PIX_PLLC_P, xpixpllcp); + + udelay(500); + + WREG8(DAC_INDEX, MGA1064_PIX_CLK_CTL); + tmp = RREG8(DAC_DATA); + tmp &= ~MGA1064_PIX_CLK_CTL_SEL_MSK; + tmp |= MGA1064_PIX_CLK_CTL_SEL_PLL; + WREG8(DAC_DATA, tmp); + + WREG8(DAC_INDEX, MGA1064_PIX_CLK_CTL); + tmp = RREG8(DAC_DATA); + tmp &= ~MGA1064_PIX_CLK_CTL_CLK_DIS; + tmp &= ~MGA1064_PIX_CLK_CTL_CLK_POW_DOWN; + WREG8(DAC_DATA, tmp); + + vcount = RREG8(MGAREG_VCOUNT); + + for (j = 0; j < 30 && pll_locked == false; j++) { + tmpcount = RREG8(MGAREG_VCOUNT); + if (tmpcount < vcount) + vcount = 0; + if ((tmpcount - vcount) > 2) + pll_locked = true; + else + udelay(5); + } + } +} + +static const struct mgag200_pll_funcs mgag200_pixpll_funcs_g200eh = { + .compute = mgag200_pixpll_compute_g200eh, + .update = mgag200_pixpll_update_g200eh, +}; + +/* + * G200EH3 + */ + +static int mgag200_pixpll_compute_g200eh3(struct mgag200_pll *pixpll, long clock, + struct mgag200_pll_values *pixpllc) +{ + static const unsigned int vcomax = 3000000; + static const unsigned int vcomin = 1500000; + static const unsigned int pllreffreq = 25000; + + unsigned int delta, tmpdelta; + unsigned int testp, testm, testn; + unsigned int p, m, n, s; + unsigned int computed; + + m = n = p = s = 0; + delta = 0xffffffff; + testp = 0; + + for (testm = 150; testm >= 6; testm--) { + if (clock * testm > vcomax) + continue; + if (clock * testm < vcomin) + continue; + for (testn = 120; testn >= 60; testn--) { + computed = (pllreffreq * testn) / testm; + if (computed > clock) + tmpdelta = computed - clock; + else + tmpdelta = clock - computed; + if (tmpdelta < delta) { + delta = tmpdelta; + n = testn + 1; + m = testm + 1; + p = testp + 1; + } + if (delta == 0) + break; + } + if (delta == 0) + break; + } + + pixpllc->m = m; + pixpllc->n = n; + pixpllc->p = p; + pixpllc->s = s; + + return 0; +} + +static const struct mgag200_pll_funcs mgag200_pixpll_funcs_g200eh3 = { + .compute = mgag200_pixpll_compute_g200eh3, + .update = mgag200_pixpll_update_g200eh, // same as G200EH +}; + +/* + * G200ER + */ + +static int mgag200_pixpll_compute_g200er(struct mgag200_pll *pixpll, long clock, + struct mgag200_pll_values *pixpllc) +{ + static const unsigned int vcomax = 1488000; + static const unsigned int vcomin = 1056000; + static const unsigned int pllreffreq = 48000; + static const unsigned int m_div_val[] = { 1, 2, 4, 8 }; + + unsigned int delta, tmpdelta; + int testr, testn, testm, testo; + unsigned int p, m, n, s; + unsigned int computed, vco; + + m = n = p = s = 0; + delta = 0xffffffff; + + for (testr = 0; testr < 4; testr++) { + if (delta == 0) + break; + for (testn = 5; testn < 129; testn++) { + if (delta == 0) + break; + for (testm = 3; testm >= 0; testm--) { + if (delta == 0) + break; + for (testo = 5; testo < 33; testo++) { + vco = pllreffreq * (testn + 1) / + (testr + 1); + if (vco < vcomin) + continue; + if (vco > vcomax) + continue; + computed = vco / (m_div_val[testm] * (testo + 1)); + if (computed > clock) + tmpdelta = computed - clock; + else + tmpdelta = clock - computed; + if (tmpdelta < delta) { + delta = tmpdelta; + m = (testm | (testo << 3)) + 1; + n = testn + 1; + p = testr + 1; + s = testr; + } + } + } + } + } + + pixpllc->m = m; + pixpllc->n = n; + pixpllc->p = p; + pixpllc->s = s; + + return 0; +} + +static void +mgag200_pixpll_update_g200er(struct mgag200_pll *pixpll, const struct mgag200_pll_values *pixpllc) +{ + unsigned int pixpllcm, pixpllcn, pixpllcp, pixpllcs; + u8 xpixpllcm, xpixpllcn, xpixpllcp, tmp; + struct mga_device *mdev = pixpll->mdev; + + pixpllcm = pixpllc->m - 1; + pixpllcn = pixpllc->n - 1; + pixpllcp = pixpllc->p - 1; + pixpllcs = pixpllc->s; + + xpixpllcm = pixpllcm; + xpixpllcn = pixpllcn; + xpixpllcp = (pixpllcs << 3) | pixpllcp; + + WREG_MISC_MASKED(MGAREG_MISC_CLKSEL_MGA, MGAREG_MISC_CLKSEL_MASK); + + WREG8(DAC_INDEX, MGA1064_PIX_CLK_CTL); + tmp = RREG8(DAC_DATA); + tmp |= MGA1064_PIX_CLK_CTL_CLK_DIS; + WREG8(DAC_DATA, tmp); + + WREG8(DAC_INDEX, MGA1064_REMHEADCTL); + tmp = RREG8(DAC_DATA); + tmp |= MGA1064_REMHEADCTL_CLKDIS; + WREG8(DAC_DATA, tmp); + + tmp = RREG8(MGAREG_MEM_MISC_READ); + tmp |= (0x3<<2) | 0xc0; + WREG8(MGAREG_MEM_MISC_WRITE, tmp); + + WREG8(DAC_INDEX, MGA1064_PIX_CLK_CTL); + tmp = RREG8(DAC_DATA); + tmp &= ~MGA1064_PIX_CLK_CTL_CLK_DIS; + tmp |= MGA1064_PIX_CLK_CTL_CLK_POW_DOWN; + WREG8(DAC_DATA, tmp); + + udelay(500); + + WREG_DAC(MGA1064_ER_PIX_PLLC_N, xpixpllcn); + WREG_DAC(MGA1064_ER_PIX_PLLC_M, xpixpllcm); + WREG_DAC(MGA1064_ER_PIX_PLLC_P, xpixpllcp); + + udelay(50); +} + +static const struct mgag200_pll_funcs mgag200_pixpll_funcs_g200er = { + .compute = mgag200_pixpll_compute_g200er, + .update = mgag200_pixpll_update_g200er, +}; + +/* + * G200EW3 + */ + +static int mgag200_pixpll_compute_g200ew3(struct mgag200_pll *pixpll, long clock, + struct mgag200_pll_values *pixpllc) +{ + static const unsigned int vcomax = 800000; + static const unsigned int vcomin = 400000; + static const unsigned int pllreffreq = 25000; + + unsigned int delta, tmpdelta; + unsigned int testp, testm, testn, testp2; + unsigned int p, m, n, s; + unsigned int computed; + + m = n = p = s = 0; + delta = 0xffffffff; + + for (testp = 1; testp < 8; testp++) { + for (testp2 = 1; testp2 < 8; testp2++) { + if (testp < testp2) + continue; + if ((clock * testp * testp2) > vcomax) + continue; + if ((clock * testp * testp2) < vcomin) + continue; + for (testm = 1; testm < 26; testm++) { + for (testn = 32; testn < 2048 ; testn++) { + computed = (pllreffreq * testn) / (testm * testp * testp2); + if (computed > clock) + tmpdelta = computed - clock; + else + tmpdelta = clock - computed; + if (tmpdelta < delta) { + delta = tmpdelta; + m = testm + 1; + n = testn + 1; + p = testp + 1; + s = testp2; + } + } + } + } + } + + pixpllc->m = m; + pixpllc->n = n; + pixpllc->p = p; + pixpllc->s = s; + + return 0; +} + +static const struct mgag200_pll_funcs mgag200_pixpll_funcs_g200ew3 = { + .compute = mgag200_pixpll_compute_g200ew3, + .update = mgag200_pixpll_update_g200wb, // same as G200WB +}; + +/* + * PLL initialization + */ + +int mgag200_pixpll_init(struct mgag200_pll *pixpll, struct mga_device *mdev) +{ + struct drm_device *dev = &mdev->base; + + pixpll->mdev = mdev; + + switch (mdev->type) { + case G200_PCI: + case G200_AGP: + pixpll->funcs = &mgag200_pixpll_funcs_g200; + break; + case G200_SE_A: + case G200_SE_B: + if (mdev->model.g200se.unique_rev_id >= 0x04) + pixpll->funcs = &mgag200_pixpll_funcs_g200se_04; + else + pixpll->funcs = &mgag200_pixpll_funcs_g200se_00; + break; + case G200_WB: + pixpll->funcs = &mgag200_pixpll_funcs_g200wb; + break; + case G200_EV: + pixpll->funcs = &mgag200_pixpll_funcs_g200ev; + break; + case G200_EH: + pixpll->funcs = &mgag200_pixpll_funcs_g200eh; + break; + case G200_EH3: + pixpll->funcs = &mgag200_pixpll_funcs_g200eh3; + break; + case G200_ER: + pixpll->funcs = &mgag200_pixpll_funcs_g200er; + break; + case G200_EW3: + pixpll->funcs = &mgag200_pixpll_funcs_g200ew3; + break; + default: + drm_err(dev, "unknown device type %d\n", mdev->type); + return -ENODEV; + } + + return 0; +} diff --git a/drivers/gpu/drm/mgag200/mgag200_reg.h b/drivers/gpu/drm/mgag200/mgag200_reg.h index 977be0565c06..60e705283fe8 100644 --- a/drivers/gpu/drm/mgag200/mgag200_reg.h +++ b/drivers/gpu/drm/mgag200/mgag200_reg.h @@ -222,11 +222,10 @@ #define MGAREG_MISC_IOADSEL (0x1 << 0) #define MGAREG_MISC_RAMMAPEN (0x1 << 1) -#define MGAREG_MISC_CLK_SEL_MASK GENMASK(3, 2) -#define MGAREG_MISC_CLK_SEL_VGA25 (0x0 << 2) -#define MGAREG_MISC_CLK_SEL_VGA28 (0x1 << 2) -#define MGAREG_MISC_CLK_SEL_MGA_PIX (0x2 << 2) -#define MGAREG_MISC_CLK_SEL_MGA_MSK (0x3 << 2) +#define MGAREG_MISC_CLKSEL_MASK GENMASK(3, 2) +#define MGAREG_MISC_CLKSEL_VGA25 (0x0 << 2) +#define MGAREG_MISC_CLKSEL_VGA28 (0x1 << 2) +#define MGAREG_MISC_CLKSEL_MGA (0x3 << 2) #define MGAREG_MISC_VIDEO_DIS (0x1 << 4) #define MGAREG_MISC_HIGH_PG_SEL (0x1 << 5) #define MGAREG_MISC_HSYNCPOL BIT(6) diff --git a/drivers/gpu/drm/msm/Kconfig b/drivers/gpu/drm/msm/Kconfig index 52536e7adb95..e9c6af78b1d7 100644 --- a/drivers/gpu/drm/msm/Kconfig +++ b/drivers/gpu/drm/msm/Kconfig @@ -14,6 +14,7 @@ config DRM_MSM select REGULATOR select DRM_KMS_HELPER select DRM_PANEL + select DRM_SCHED select SHMEM select TMPFS select QCOM_SCM if ARCH_QCOM @@ -115,9 +116,9 @@ config DRM_MSM_DSI_10NM_PHY Choose this option if DSI PHY on SDM845 is used on the platform. config DRM_MSM_DSI_7NM_PHY - bool "Enable DSI 7nm PHY driver in MSM DRM (used by SM8150/SM8250)" + bool "Enable DSI 7nm PHY driver in MSM DRM" depends on DRM_MSM_DSI default y help - Choose this option if DSI PHY on SM8150/SM8250 is used on the - platform. + Choose this option if DSI PHY on SM8150/SM8250/SC7280 is used on + the platform. diff --git a/drivers/gpu/drm/msm/Makefile b/drivers/gpu/drm/msm/Makefile index 2c00aa70b708..904535eda0c4 100644 --- a/drivers/gpu/drm/msm/Makefile +++ b/drivers/gpu/drm/msm/Makefile @@ -90,6 +90,7 @@ msm-y := \ msm_gem_submit.o \ msm_gem_vma.o \ msm_gpu.o \ + msm_gpu_devfreq.o \ msm_iommu.o \ msm_perf.o \ msm_rd.o \ diff --git a/drivers/gpu/drm/msm/adreno/a5xx_debugfs.c b/drivers/gpu/drm/msm/adreno/a5xx_debugfs.c index fc2c905b6c9e..c9d11d57aed6 100644 --- a/drivers/gpu/drm/msm/adreno/a5xx_debugfs.c +++ b/drivers/gpu/drm/msm/adreno/a5xx_debugfs.c @@ -117,13 +117,13 @@ reset_set(void *data, u64 val) if (a5xx_gpu->pm4_bo) { msm_gem_unpin_iova(a5xx_gpu->pm4_bo, gpu->aspace); - drm_gem_object_put_locked(a5xx_gpu->pm4_bo); + drm_gem_object_put(a5xx_gpu->pm4_bo); a5xx_gpu->pm4_bo = NULL; } if (a5xx_gpu->pfp_bo) { msm_gem_unpin_iova(a5xx_gpu->pfp_bo, gpu->aspace); - drm_gem_object_put_locked(a5xx_gpu->pfp_bo); + drm_gem_object_put(a5xx_gpu->pfp_bo); a5xx_gpu->pfp_bo = NULL; } diff --git a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c index 7a271de9a212..5e2750eb3810 100644 --- a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c @@ -18,6 +18,18 @@ static void a5xx_dump(struct msm_gpu *gpu); #define GPU_PAS_ID 13 +static void update_shadow_rptr(struct msm_gpu *gpu, struct msm_ringbuffer *ring) +{ + struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); + struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu); + + if (a5xx_gpu->has_whereami) { + OUT_PKT7(ring, CP_WHERE_AM_I, 2); + OUT_RING(ring, lower_32_bits(shadowptr(a5xx_gpu, ring))); + OUT_RING(ring, upper_32_bits(shadowptr(a5xx_gpu, ring))); + } +} + void a5xx_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring, bool sync) { @@ -30,11 +42,8 @@ void a5xx_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring, * Most flush operations need to issue a WHERE_AM_I opcode to sync up * the rptr shadow */ - if (a5xx_gpu->has_whereami && sync) { - OUT_PKT7(ring, CP_WHERE_AM_I, 2); - OUT_RING(ring, lower_32_bits(shadowptr(a5xx_gpu, ring))); - OUT_RING(ring, upper_32_bits(shadowptr(a5xx_gpu, ring))); - } + if (sync) + update_shadow_rptr(gpu, ring); spin_lock_irqsave(&ring->preempt_lock, flags); @@ -168,6 +177,16 @@ static void a5xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit) ibs++; break; } + + /* + * Periodically update shadow-wptr if needed, so that we + * can see partial progress of submits with large # of + * cmds.. otherwise we could needlessly stall waiting for + * ringbuffer state, simply due to looking at a shadow + * rptr value that has not been updated + */ + if ((ibs % 32) == 0) + update_shadow_rptr(gpu, ring); } /* @@ -1415,7 +1434,7 @@ struct a5xx_gpu_state { static int a5xx_crashdumper_init(struct msm_gpu *gpu, struct a5xx_crashdumper *dumper) { - dumper->ptr = msm_gem_kernel_new_locked(gpu->dev, + dumper->ptr = msm_gem_kernel_new(gpu->dev, SZ_1M, MSM_BO_WC, gpu->aspace, &dumper->bo, &dumper->iova); @@ -1517,7 +1536,7 @@ static void a5xx_gpu_state_get_hlsq_regs(struct msm_gpu *gpu, if (a5xx_crashdumper_run(gpu, &dumper)) { kfree(a5xx_state->hlsqregs); - msm_gem_kernel_put(dumper.bo, gpu->aspace, true); + msm_gem_kernel_put(dumper.bo, gpu->aspace); return; } @@ -1525,7 +1544,7 @@ static void a5xx_gpu_state_get_hlsq_regs(struct msm_gpu *gpu, memcpy(a5xx_state->hlsqregs, dumper.ptr + (256 * SZ_1K), count * sizeof(u32)); - msm_gem_kernel_put(dumper.bo, gpu->aspace, true); + msm_gem_kernel_put(dumper.bo, gpu->aspace); } static struct msm_gpu_state *a5xx_gpu_state_get(struct msm_gpu *gpu) diff --git a/drivers/gpu/drm/msm/adreno/a5xx_power.c b/drivers/gpu/drm/msm/adreno/a5xx_power.c index cdb165236a88..0e63a1429189 100644 --- a/drivers/gpu/drm/msm/adreno/a5xx_power.c +++ b/drivers/gpu/drm/msm/adreno/a5xx_power.c @@ -362,7 +362,7 @@ void a5xx_gpmu_ucode_init(struct msm_gpu *gpu) */ bosize = (cmds_size + (cmds_size / TYPE4_MAX_PAYLOAD) + 1) << 2; - ptr = msm_gem_kernel_new_locked(drm, bosize, + ptr = msm_gem_kernel_new(drm, bosize, MSM_BO_WC | MSM_BO_GPU_READONLY, gpu->aspace, &a5xx_gpu->gpmu_bo, &a5xx_gpu->gpmu_iova); if (IS_ERR(ptr)) diff --git a/drivers/gpu/drm/msm/adreno/a5xx_preempt.c b/drivers/gpu/drm/msm/adreno/a5xx_preempt.c index ee72510ff8ce..8abc9a2b114a 100644 --- a/drivers/gpu/drm/msm/adreno/a5xx_preempt.c +++ b/drivers/gpu/drm/msm/adreno/a5xx_preempt.c @@ -240,7 +240,7 @@ static int preempt_init_ring(struct a5xx_gpu *a5xx_gpu, A5XX_PREEMPT_COUNTER_SIZE, MSM_BO_WC, gpu->aspace, &counters_bo, &counters_iova); if (IS_ERR(counters)) { - msm_gem_kernel_put(bo, gpu->aspace, true); + msm_gem_kernel_put(bo, gpu->aspace); return PTR_ERR(counters); } @@ -272,9 +272,8 @@ void a5xx_preempt_fini(struct msm_gpu *gpu) int i; for (i = 0; i < gpu->nr_rings; i++) { - msm_gem_kernel_put(a5xx_gpu->preempt_bo[i], gpu->aspace, true); - msm_gem_kernel_put(a5xx_gpu->preempt_counters_bo[i], - gpu->aspace, true); + msm_gem_kernel_put(a5xx_gpu->preempt_bo[i], gpu->aspace); + msm_gem_kernel_put(a5xx_gpu->preempt_counters_bo[i], gpu->aspace); } } diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c index b349692219b7..a7c58018959f 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c +++ b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c @@ -519,9 +519,9 @@ static void a6xx_gmu_rpmh_init(struct a6xx_gmu *gmu) if (!pdcptr) goto err; - if (adreno_is_a650(adreno_gpu) || adreno_is_a660(adreno_gpu)) + if (adreno_is_a650(adreno_gpu) || adreno_is_a660_family(adreno_gpu)) pdc_in_aop = true; - else if (adreno_is_a618(adreno_gpu) || adreno_is_a640(adreno_gpu)) + else if (adreno_is_a618(adreno_gpu) || adreno_is_a640_family(adreno_gpu)) pdc_address_offset = 0x30090; else pdc_address_offset = 0x30080; @@ -933,6 +933,7 @@ int a6xx_gmu_resume(struct a6xx_gpu *a6xx_gpu) /* Use a known rate to bring up the GMU */ clk_set_rate(gmu->core_clk, 200000000); + clk_set_rate(gmu->hub_clk, 150000000); ret = clk_bulk_prepare_enable(gmu->nr_clocks, gmu->clocks); if (ret) { pm_runtime_put(gmu->gxpd); @@ -1129,12 +1130,12 @@ int a6xx_gmu_stop(struct a6xx_gpu *a6xx_gpu) static void a6xx_gmu_memory_free(struct a6xx_gmu *gmu) { - msm_gem_kernel_put(gmu->hfi.obj, gmu->aspace, false); - msm_gem_kernel_put(gmu->debug.obj, gmu->aspace, false); - msm_gem_kernel_put(gmu->icache.obj, gmu->aspace, false); - msm_gem_kernel_put(gmu->dcache.obj, gmu->aspace, false); - msm_gem_kernel_put(gmu->dummy.obj, gmu->aspace, false); - msm_gem_kernel_put(gmu->log.obj, gmu->aspace, false); + msm_gem_kernel_put(gmu->hfi.obj, gmu->aspace); + msm_gem_kernel_put(gmu->debug.obj, gmu->aspace); + msm_gem_kernel_put(gmu->icache.obj, gmu->aspace); + msm_gem_kernel_put(gmu->dcache.obj, gmu->aspace); + msm_gem_kernel_put(gmu->dummy.obj, gmu->aspace); + msm_gem_kernel_put(gmu->log.obj, gmu->aspace); gmu->aspace->mmu->funcs->detach(gmu->aspace->mmu); msm_gem_address_space_put(gmu->aspace); @@ -1393,6 +1394,9 @@ static int a6xx_gmu_clocks_probe(struct a6xx_gmu *gmu) gmu->core_clk = msm_clk_bulk_get_clock(gmu->clocks, gmu->nr_clocks, "gmu"); + gmu->hub_clk = msm_clk_bulk_get_clock(gmu->clocks, + gmu->nr_clocks, "hub"); + return 0; } @@ -1504,7 +1508,7 @@ int a6xx_gmu_init(struct a6xx_gpu *a6xx_gpu, struct device_node *node) * are otherwise unused by a660. */ gmu->dummy.size = SZ_4K; - if (adreno_is_a660(adreno_gpu)) { + if (adreno_is_a660_family(adreno_gpu)) { ret = a6xx_gmu_memory_alloc(gmu, &gmu->debug, SZ_4K * 7, 0x60400000); if (ret) goto err_memory; @@ -1522,7 +1526,7 @@ int a6xx_gmu_init(struct a6xx_gpu *a6xx_gpu, struct device_node *node) SZ_16M - SZ_16K, 0x04000); if (ret) goto err_memory; - } else if (adreno_is_a640(adreno_gpu)) { + } else if (adreno_is_a640_family(adreno_gpu)) { ret = a6xx_gmu_memory_alloc(gmu, &gmu->icache, SZ_256K - SZ_16K, 0x04000); if (ret) diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gmu.h b/drivers/gpu/drm/msm/adreno/a6xx_gmu.h index 71dfa60070cc..3c74f64e3126 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gmu.h +++ b/drivers/gpu/drm/msm/adreno/a6xx_gmu.h @@ -66,6 +66,7 @@ struct a6xx_gmu { int nr_clocks; struct clk_bulk_data *clocks; struct clk *core_clk; + struct clk *hub_clk; /* current performance index set externally */ int current_perf_index; diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c index 9c5e4618aa0a..40c9fef457a4 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c @@ -52,21 +52,25 @@ static bool a6xx_idle(struct msm_gpu *gpu, struct msm_ringbuffer *ring) return true; } -static void a6xx_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring) +static void update_shadow_rptr(struct msm_gpu *gpu, struct msm_ringbuffer *ring) { struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); - uint32_t wptr; - unsigned long flags; /* Expanded APRIV doesn't need to issue the WHERE_AM_I opcode */ if (a6xx_gpu->has_whereami && !adreno_gpu->base.hw_apriv) { - struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); - OUT_PKT7(ring, CP_WHERE_AM_I, 2); OUT_RING(ring, lower_32_bits(shadowptr(a6xx_gpu, ring))); OUT_RING(ring, upper_32_bits(shadowptr(a6xx_gpu, ring))); } +} + +static void a6xx_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring) +{ + uint32_t wptr; + unsigned long flags; + + update_shadow_rptr(gpu, ring); spin_lock_irqsave(&ring->preempt_lock, flags); @@ -145,7 +149,7 @@ static void a6xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit) struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); struct msm_ringbuffer *ring = submit->ring; - unsigned int i; + unsigned int i, ibs = 0; a6xx_set_pagetable(a6xx_gpu, ring, submit->queue->ctx); @@ -181,8 +185,19 @@ static void a6xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit) OUT_RING(ring, lower_32_bits(submit->cmd[i].iova)); OUT_RING(ring, upper_32_bits(submit->cmd[i].iova)); OUT_RING(ring, submit->cmd[i].size); + ibs++; break; } + + /* + * Periodically update shadow-wptr if needed, so that we + * can see partial progress of submits with large # of + * cmds.. otherwise we could needlessly stall waiting for + * ringbuffer state, simply due to looking at a shadow + * rptr value that has not been updated + */ + if ((ibs % 32) == 0) + update_shadow_rptr(gpu, ring); } get_stats_counter(ring, REG_A6XX_RBBM_PERFCTR_CP(0), @@ -652,7 +667,7 @@ static void a6xx_set_cp_protect(struct msm_gpu *gpu) regs = a650_protect; count = ARRAY_SIZE(a650_protect); count_max = 48; - } else if (adreno_is_a660(adreno_gpu)) { + } else if (adreno_is_a660_family(adreno_gpu)) { regs = a660_protect; count = ARRAY_SIZE(a660_protect); count_max = 48; @@ -683,7 +698,7 @@ static void a6xx_set_ubwc_config(struct msm_gpu *gpu) if (adreno_is_a618(adreno_gpu)) return; - if (adreno_is_a640(adreno_gpu)) + if (adreno_is_a640_family(adreno_gpu)) amsbc = 1; if (adreno_is_a650(adreno_gpu) || adreno_is_a660(adreno_gpu)) { @@ -694,6 +709,13 @@ static void a6xx_set_ubwc_config(struct msm_gpu *gpu) uavflagprd_inv = 2; } + if (adreno_is_7c3(adreno_gpu)) { + lower_bit = 1; + amsbc = 1; + rgb565_predicator = 1; + uavflagprd_inv = 2; + } + gpu_write(gpu, REG_A6XX_RB_NC_MODE_CNTL, rgb565_predicator << 11 | amsbc << 4 | lower_bit << 1); gpu_write(gpu, REG_A6XX_TPL1_NC_MODE_CNTL, lower_bit << 1); @@ -740,6 +762,7 @@ static bool a6xx_ucode_check_version(struct a6xx_gpu *a6xx_gpu, { struct adreno_gpu *adreno_gpu = &a6xx_gpu->base; struct msm_gpu *gpu = &adreno_gpu->base; + const char *sqe_name = adreno_gpu->info->fw[ADRENO_FW_SQE]; u32 *buf = msm_gem_get_vaddr(obj); bool ret = false; @@ -756,8 +779,7 @@ static bool a6xx_ucode_check_version(struct a6xx_gpu *a6xx_gpu, * * a660 targets have all the critical security fixes from the start */ - if (adreno_is_a618(adreno_gpu) || adreno_is_a630(adreno_gpu) || - adreno_is_a640(adreno_gpu)) { + if (!strcmp(sqe_name, "a630_sqe.fw")) { /* * If the lowest nibble is 0xa that is an indication that this * microcode has been patched. The actual version is in dword @@ -778,7 +800,7 @@ static bool a6xx_ucode_check_version(struct a6xx_gpu *a6xx_gpu, DRM_DEV_ERROR(&gpu->pdev->dev, "a630 SQE ucode is too old. Have version %x need at least %x\n", buf[0] & 0xfff, 0x190); - } else if (adreno_is_a650(adreno_gpu)) { + } else if (!strcmp(sqe_name, "a650_sqe.fw")) { if ((buf[0] & 0xfff) >= 0x095) { ret = true; goto out; @@ -787,7 +809,7 @@ static bool a6xx_ucode_check_version(struct a6xx_gpu *a6xx_gpu, DRM_DEV_ERROR(&gpu->pdev->dev, "a650 SQE ucode is too old. Have version %x need at least %x\n", buf[0] & 0xfff, 0x095); - } else if (adreno_is_a660(adreno_gpu)) { + } else if (!strcmp(sqe_name, "a660_sqe.fw")) { ret = true; } else { DRM_DEV_ERROR(&gpu->pdev->dev, @@ -897,7 +919,8 @@ static int a6xx_hw_init(struct msm_gpu *gpu) a6xx_set_hwcg(gpu, true); /* VBIF/GBIF start*/ - if (adreno_is_a640(adreno_gpu) || adreno_is_a650_family(adreno_gpu)) { + if (adreno_is_a640_family(adreno_gpu) || + adreno_is_a650_family(adreno_gpu)) { gpu_write(gpu, REG_A6XX_GBIF_QSB_SIDE0, 0x00071620); gpu_write(gpu, REG_A6XX_GBIF_QSB_SIDE1, 0x00071620); gpu_write(gpu, REG_A6XX_GBIF_QSB_SIDE2, 0x00071620); @@ -935,13 +958,14 @@ static int a6xx_hw_init(struct msm_gpu *gpu) gpu_write(gpu, REG_A6XX_UCHE_FILTER_CNTL, 0x804); gpu_write(gpu, REG_A6XX_UCHE_CACHE_WAYS, 0x4); - if (adreno_is_a640(adreno_gpu) || adreno_is_a650_family(adreno_gpu)) + if (adreno_is_a640_family(adreno_gpu) || + adreno_is_a650_family(adreno_gpu)) gpu_write(gpu, REG_A6XX_CP_ROQ_THRESHOLDS_2, 0x02000140); else gpu_write(gpu, REG_A6XX_CP_ROQ_THRESHOLDS_2, 0x010000c0); gpu_write(gpu, REG_A6XX_CP_ROQ_THRESHOLDS_1, 0x8040362c); - if (adreno_is_a660(adreno_gpu)) + if (adreno_is_a660_family(adreno_gpu)) gpu_write(gpu, REG_A6XX_CP_LPAC_PROG_FIFO_SIZE, 0x00000020); /* Setting the mem pool size */ @@ -952,8 +976,10 @@ static int a6xx_hw_init(struct msm_gpu *gpu) */ if (adreno_is_a650(adreno_gpu) || adreno_is_a660(adreno_gpu)) gpu_write(gpu, REG_A6XX_PC_DBG_ECO_CNTL, 0x00300200); - else if (adreno_is_a640(adreno_gpu)) + else if (adreno_is_a640_family(adreno_gpu) || adreno_is_7c3(adreno_gpu)) gpu_write(gpu, REG_A6XX_PC_DBG_ECO_CNTL, 0x00200200); + else if (adreno_is_a650(adreno_gpu) || adreno_is_a660(adreno_gpu)) + gpu_write(gpu, REG_A6XX_PC_DBG_ECO_CNTL, 0x00300200); else gpu_write(gpu, REG_A6XX_PC_DBG_ECO_CNTL, 0x00180000); @@ -990,13 +1016,15 @@ static int a6xx_hw_init(struct msm_gpu *gpu) /* Protect registers from the CP */ a6xx_set_cp_protect(gpu); - if (adreno_is_a660(adreno_gpu)) { + if (adreno_is_a660_family(adreno_gpu)) { gpu_write(gpu, REG_A6XX_CP_CHICKEN_DBG, 0x1); gpu_write(gpu, REG_A6XX_RBBM_GBIF_CLIENT_QOS_CNTL, 0x0); - /* Set dualQ + disable afull for A660 GPU but not for A635 */ - gpu_write(gpu, REG_A6XX_UCHE_CMDQ_CONFIG, 0x66906); } + /* Set dualQ + disable afull for A660 GPU */ + if (adreno_is_a660(adreno_gpu)) + gpu_write(gpu, REG_A6XX_UCHE_CMDQ_CONFIG, 0x66906); + /* Enable expanded apriv for targets that support it */ if (gpu->hw_apriv) { gpu_write(gpu, REG_A6XX_CP_APRIV_CNTL, @@ -1035,7 +1063,7 @@ static int a6xx_hw_init(struct msm_gpu *gpu) if (adreno_gpu->base.hw_apriv || a6xx_gpu->has_whereami) { if (!a6xx_gpu->shadow_bo) { - a6xx_gpu->shadow = msm_gem_kernel_new_locked(gpu->dev, + a6xx_gpu->shadow = msm_gem_kernel_new(gpu->dev, sizeof(u32) * gpu->nr_rings, MSM_BO_WC | MSM_BO_MAP_PRIV, gpu->aspace, &a6xx_gpu->shadow_bo, @@ -1383,13 +1411,13 @@ static void a6xx_llc_activate(struct a6xx_gpu *a6xx_gpu) { struct adreno_gpu *adreno_gpu = &a6xx_gpu->base; struct msm_gpu *gpu = &adreno_gpu->base; - u32 cntl1_regval = 0; + u32 gpu_scid, cntl1_regval = 0; if (IS_ERR(a6xx_gpu->llc_mmio)) return; if (!llcc_slice_activate(a6xx_gpu->llc_slice)) { - u32 gpu_scid = llcc_get_slice_id(a6xx_gpu->llc_slice); + gpu_scid = llcc_get_slice_id(a6xx_gpu->llc_slice); gpu_scid &= 0x1f; cntl1_regval = (gpu_scid << 0) | (gpu_scid << 5) | (gpu_scid << 10) | @@ -1409,26 +1437,34 @@ static void a6xx_llc_activate(struct a6xx_gpu *a6xx_gpu) } } - if (cntl1_regval) { + if (!cntl1_regval) + return; + + /* + * Program the slice IDs for the various GPU blocks and GPU MMU + * pagetables + */ + if (!a6xx_gpu->have_mmu500) { + a6xx_llc_write(a6xx_gpu, + REG_A6XX_CX_MISC_SYSTEM_CACHE_CNTL_1, cntl1_regval); + /* - * Program the slice IDs for the various GPU blocks and GPU MMU - * pagetables + * Program cacheability overrides to not allocate cache + * lines on a write miss */ - if (a6xx_gpu->have_mmu500) - gpu_rmw(gpu, REG_A6XX_GBIF_SCACHE_CNTL1, GENMASK(24, 0), - cntl1_regval); - else { - a6xx_llc_write(a6xx_gpu, - REG_A6XX_CX_MISC_SYSTEM_CACHE_CNTL_1, cntl1_regval); - - /* - * Program cacheability overrides to not allocate cache - * lines on a write miss - */ - a6xx_llc_rmw(a6xx_gpu, - REG_A6XX_CX_MISC_SYSTEM_CACHE_CNTL_0, 0xF, 0x03); - } + a6xx_llc_rmw(a6xx_gpu, + REG_A6XX_CX_MISC_SYSTEM_CACHE_CNTL_0, 0xF, 0x03); + return; } + + gpu_rmw(gpu, REG_A6XX_GBIF_SCACHE_CNTL1, GENMASK(24, 0), cntl1_regval); + + /* On A660, the SCID programming for UCHE traffic is done in + * A6XX_GBIF_SCACHE_CNTL0[14:10] + */ + if (adreno_is_a660_family(adreno_gpu)) + gpu_rmw(gpu, REG_A6XX_GBIF_SCACHE_CNTL0, (0x1f << 10) | + (1 << 8), (gpu_scid << 10) | (1 << 8)); } static void a6xx_llc_slices_destroy(struct a6xx_gpu *a6xx_gpu) @@ -1477,7 +1513,7 @@ static int a6xx_pm_resume(struct msm_gpu *gpu) if (ret) return ret; - msm_gpu_resume_devfreq(gpu); + msm_devfreq_resume(gpu); a6xx_llc_activate(a6xx_gpu); @@ -1494,7 +1530,7 @@ static int a6xx_pm_suspend(struct msm_gpu *gpu) a6xx_llc_deactivate(a6xx_gpu); - devfreq_suspend_device(gpu->devfreq.devfreq); + msm_devfreq_suspend(gpu); ret = a6xx_gmu_stop(a6xx_gpu); if (ret) @@ -1667,11 +1703,11 @@ static u32 a618_get_speed_bin(u32 fuse) return UINT_MAX; } -static u32 fuse_to_supp_hw(struct device *dev, u32 revn, u32 fuse) +static u32 fuse_to_supp_hw(struct device *dev, struct adreno_rev rev, u32 fuse) { u32 val = UINT_MAX; - if (revn == 618) + if (adreno_cmp_rev(ADRENO_REV(6, 1, 8, ANY_ID), rev)) val = a618_get_speed_bin(fuse); if (val == UINT_MAX) { @@ -1684,14 +1720,13 @@ static u32 fuse_to_supp_hw(struct device *dev, u32 revn, u32 fuse) return (1 << val); } -static int a6xx_set_supported_hw(struct device *dev, struct a6xx_gpu *a6xx_gpu, - u32 revn) +static int a6xx_set_supported_hw(struct device *dev, struct adreno_rev rev) { u32 supp_hw = UINT_MAX; - u16 speedbin; + u32 speedbin; int ret; - ret = nvmem_cell_read_u16(dev, "speed_bin", &speedbin); + ret = nvmem_cell_read_variable_le_u32(dev, "speed_bin", &speedbin); /* * -ENOENT means that the platform doesn't support speedbin which is * fine @@ -1704,9 +1739,8 @@ static int a6xx_set_supported_hw(struct device *dev, struct a6xx_gpu *a6xx_gpu, ret); goto done; } - speedbin = le16_to_cpu(speedbin); - supp_hw = fuse_to_supp_hw(dev, revn, speedbin); + supp_hw = fuse_to_supp_hw(dev, rev, speedbin); done: ret = devm_pm_opp_set_supported_hw(dev, &supp_hw, 1); @@ -1772,12 +1806,13 @@ struct msm_gpu *a6xx_gpu_init(struct drm_device *dev) */ info = adreno_info(config->rev); - if (info && (info->revn == 650 || info->revn == 660)) + if (info && (info->revn == 650 || info->revn == 660 || + adreno_cmp_rev(ADRENO_REV(6, 3, 5, ANY_ID), info->rev))) adreno_gpu->base.hw_apriv = true; a6xx_llc_slices_init(pdev, a6xx_gpu); - ret = a6xx_set_supported_hw(&pdev->dev, a6xx_gpu, info->revn); + ret = a6xx_set_supported_hw(&pdev->dev, config->rev); if (ret) { a6xx_destroy(&(a6xx_gpu->base.base)); return ERR_PTR(ret); diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c index ad4ea0ed5d99..e8f65cd8eca6 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c @@ -112,7 +112,7 @@ static void *state_kmemdup(struct a6xx_gpu_state *a6xx_state, void *src, static int a6xx_crashdumper_init(struct msm_gpu *gpu, struct a6xx_crashdumper *dumper) { - dumper->ptr = msm_gem_kernel_new_locked(gpu->dev, + dumper->ptr = msm_gem_kernel_new(gpu->dev, SZ_1M, MSM_BO_WC, gpu->aspace, &dumper->bo, &dumper->iova); @@ -961,7 +961,7 @@ struct msm_gpu_state *a6xx_gpu_state_get(struct msm_gpu *gpu) a6xx_get_clusters(gpu, a6xx_state, dumper); a6xx_get_dbgahb_clusters(gpu, a6xx_state, dumper); - msm_gem_kernel_put(dumper->bo, gpu->aspace, true); + msm_gem_kernel_put(dumper->bo, gpu->aspace); } if (snapshot_debugbus) diff --git a/drivers/gpu/drm/msm/adreno/a6xx_hfi.c b/drivers/gpu/drm/msm/adreno/a6xx_hfi.c index 919433732b43..d4c65bf0a1b7 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_hfi.c +++ b/drivers/gpu/drm/msm/adreno/a6xx_hfi.c @@ -382,6 +382,36 @@ static void a660_build_bw_table(struct a6xx_hfi_msg_bw_table *msg) msg->cnoc_cmds_data[1][0] = 0x60000001; } +static void adreno_7c3_build_bw_table(struct a6xx_hfi_msg_bw_table *msg) +{ + /* + * Send a single "off" entry just to get things running + * TODO: bus scaling + */ + msg->bw_level_num = 1; + + msg->ddr_cmds_num = 3; + msg->ddr_wait_bitmask = 0x07; + + msg->ddr_cmds_addrs[0] = 0x50004; + msg->ddr_cmds_addrs[1] = 0x50000; + msg->ddr_cmds_addrs[2] = 0x50088; + + msg->ddr_cmds_data[0][0] = 0x40000000; + msg->ddr_cmds_data[0][1] = 0x40000000; + msg->ddr_cmds_data[0][2] = 0x40000000; + + /* + * These are the CX (CNOC) votes - these are used by the GMU but the + * votes are known and fixed for the target + */ + msg->cnoc_cmds_num = 1; + msg->cnoc_wait_bitmask = 0x01; + + msg->cnoc_cmds_addrs[0] = 0x5006c; + msg->cnoc_cmds_data[0][0] = 0x40000000; + msg->cnoc_cmds_data[1][0] = 0x60000001; +} static void a6xx_build_bw_table(struct a6xx_hfi_msg_bw_table *msg) { /* Send a single "off" entry since the 630 GMU doesn't do bus scaling */ @@ -428,10 +458,12 @@ static int a6xx_hfi_send_bw_table(struct a6xx_gmu *gmu) if (adreno_is_a618(adreno_gpu)) a618_build_bw_table(&msg); - else if (adreno_is_a640(adreno_gpu)) + else if (adreno_is_a640_family(adreno_gpu)) a640_build_bw_table(&msg); else if (adreno_is_a650(adreno_gpu)) a650_build_bw_table(&msg); + else if (adreno_is_7c3(adreno_gpu)) + adreno_7c3_build_bw_table(&msg); else if (adreno_is_a660(adreno_gpu)) a660_build_bw_table(&msg); else diff --git a/drivers/gpu/drm/msm/adreno/adreno_device.c b/drivers/gpu/drm/msm/adreno/adreno_device.c index 6dad8015c9a1..2a6ce76656aa 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_device.c +++ b/drivers/gpu/drm/msm/adreno/adreno_device.c @@ -8,8 +8,6 @@ #include "adreno_gpu.h" -#define ANY_ID 0xff - bool hang_debug = false; MODULE_PARM_DESC(hang_debug, "Dump registers when hang is detected (can be slow!)"); module_param_named(hang_debug, hang_debug, bool, 0600); @@ -300,6 +298,30 @@ static const struct adreno_info gpulist[] = { .init = a6xx_gpu_init, .zapfw = "a660_zap.mdt", .hwcg = a660_hwcg, + }, { + .rev = ADRENO_REV(6, 3, 5, ANY_ID), + .name = "Adreno 7c Gen 3", + .fw = { + [ADRENO_FW_SQE] = "a660_sqe.fw", + [ADRENO_FW_GMU] = "a660_gmu.bin", + }, + .gmem = SZ_512K, + .inactive_period = DRM_MSM_INACTIVE_PERIOD, + .init = a6xx_gpu_init, + .hwcg = a660_hwcg, + }, { + .rev = ADRENO_REV(6, 8, 0, ANY_ID), + .revn = 680, + .name = "A680", + .fw = { + [ADRENO_FW_SQE] = "a630_sqe.fw", + [ADRENO_FW_GMU] = "a640_gmu.bin", + }, + .gmem = SZ_2M, + .inactive_period = DRM_MSM_INACTIVE_PERIOD, + .init = a6xx_gpu_init, + .zapfw = "a640_zap.mdt", + .hwcg = a640_hwcg, }, }; @@ -325,6 +347,15 @@ static inline bool _rev_match(uint8_t entry, uint8_t id) return (entry == ANY_ID) || (entry == id); } +bool adreno_cmp_rev(struct adreno_rev rev1, struct adreno_rev rev2) +{ + + return _rev_match(rev1.core, rev2.core) && + _rev_match(rev1.major, rev2.major) && + _rev_match(rev1.minor, rev2.minor) && + _rev_match(rev1.patchid, rev2.patchid); +} + const struct adreno_info *adreno_info(struct adreno_rev rev) { int i; @@ -332,10 +363,7 @@ const struct adreno_info *adreno_info(struct adreno_rev rev) /* identify gpu: */ for (i = 0; i < ARRAY_SIZE(gpulist); i++) { const struct adreno_info *info = &gpulist[i]; - if (_rev_match(info->rev.core, rev.core) && - _rev_match(info->rev.major, rev.major) && - _rev_match(info->rev.minor, rev.minor) && - _rev_match(info->rev.patchid, rev.patchid)) + if (adreno_cmp_rev(info->rev, rev)) return info; } diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c b/drivers/gpu/drm/msm/adreno/adreno_gpu.c index 9f5a30234b33..748665232d29 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_gpu.c +++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c @@ -261,8 +261,8 @@ int adreno_get_param(struct msm_gpu *gpu, uint32_t param, uint64_t *value) return ret; } return -EINVAL; - case MSM_PARAM_NR_RINGS: - *value = gpu->nr_rings; + case MSM_PARAM_PRIORITIES: + *value = gpu->nr_rings * NR_SCHED_PRIORITIES; return 0; case MSM_PARAM_PP_PGTABLE: *value = 0; @@ -390,7 +390,7 @@ struct drm_gem_object *adreno_fw_create_bo(struct msm_gpu *gpu, struct drm_gem_object *bo; void *ptr; - ptr = msm_gem_kernel_new_locked(gpu->dev, fw->size - 4, + ptr = msm_gem_kernel_new(gpu->dev, fw->size - 4, MSM_BO_WC | MSM_BO_GPU_READONLY, gpu->aspace, &bo, iova); if (IS_ERR(ptr)) diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.h b/drivers/gpu/drm/msm/adreno/adreno_gpu.h index 8dbe0d157520..225c277a6223 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_gpu.h +++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.h @@ -42,6 +42,8 @@ struct adreno_rev { uint8_t patchid; }; +#define ANY_ID 0xff + #define ADRENO_REV(core, major, minor, patchid) \ ((struct adreno_rev){ core, major, minor, patchid }) @@ -141,6 +143,8 @@ struct adreno_platform_config { __ret; \ }) +bool adreno_cmp_rev(struct adreno_rev rev1, struct adreno_rev rev2); + static inline bool adreno_is_a2xx(struct adreno_gpu *gpu) { return (gpu->revn < 300); @@ -237,9 +241,9 @@ static inline int adreno_is_a630(struct adreno_gpu *gpu) return gpu->revn == 630; } -static inline int adreno_is_a640(struct adreno_gpu *gpu) +static inline int adreno_is_a640_family(struct adreno_gpu *gpu) { - return gpu->revn == 640; + return (gpu->revn == 640) || (gpu->revn == 680); } static inline int adreno_is_a650(struct adreno_gpu *gpu) @@ -247,15 +251,27 @@ static inline int adreno_is_a650(struct adreno_gpu *gpu) return gpu->revn == 650; } +static inline int adreno_is_7c3(struct adreno_gpu *gpu) +{ + /* The order of args is important here to handle ANY_ID correctly */ + return adreno_cmp_rev(ADRENO_REV(6, 3, 5, ANY_ID), gpu->rev); +} + static inline int adreno_is_a660(struct adreno_gpu *gpu) { return gpu->revn == 660; } +static inline int adreno_is_a660_family(struct adreno_gpu *gpu) +{ + return adreno_is_a660(gpu) || adreno_is_7c3(gpu); +} + /* check for a650, a660, or any derivatives */ static inline int adreno_is_a650_family(struct adreno_gpu *gpu) { - return gpu->revn == 650 || gpu->revn == 620 || gpu->revn == 660; + return gpu->revn == 650 || gpu->revn == 620 || + adreno_is_a660_family(gpu); } int adreno_get_param(struct msm_gpu *gpu, uint32_t param, uint64_t *value); diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.c index 9a5c70c87cc8..768012243b44 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.c @@ -30,12 +30,6 @@ #include "dpu_core_perf.h" #include "dpu_trace.h" -#define DPU_DRM_BLEND_OP_NOT_DEFINED 0 -#define DPU_DRM_BLEND_OP_OPAQUE 1 -#define DPU_DRM_BLEND_OP_PREMULTIPLIED 2 -#define DPU_DRM_BLEND_OP_COVERAGE 3 -#define DPU_DRM_BLEND_OP_MAX 4 - /* layer mixer index on dpu_crtc */ #define LEFT_MIXER 0 #define RIGHT_MIXER 1 @@ -146,20 +140,43 @@ static void _dpu_crtc_setup_blend_cfg(struct dpu_crtc_mixer *mixer, { struct dpu_hw_mixer *lm = mixer->hw_lm; uint32_t blend_op; + uint32_t fg_alpha, bg_alpha; - /* default to opaque blending */ - blend_op = DPU_BLEND_FG_ALPHA_FG_CONST | - DPU_BLEND_BG_ALPHA_BG_CONST; + fg_alpha = pstate->base.alpha >> 8; + bg_alpha = 0xff - fg_alpha; - if (format->alpha_enable) { + /* default to opaque blending */ + if (pstate->base.pixel_blend_mode == DRM_MODE_BLEND_PIXEL_NONE || + !format->alpha_enable) { + blend_op = DPU_BLEND_FG_ALPHA_FG_CONST | + DPU_BLEND_BG_ALPHA_BG_CONST; + } else if (pstate->base.pixel_blend_mode == DRM_MODE_BLEND_PREMULTI) { + blend_op = DPU_BLEND_FG_ALPHA_FG_CONST | + DPU_BLEND_BG_ALPHA_FG_PIXEL; + if (fg_alpha != 0xff) { + bg_alpha = fg_alpha; + blend_op |= DPU_BLEND_BG_MOD_ALPHA | + DPU_BLEND_BG_INV_MOD_ALPHA; + } else { + blend_op |= DPU_BLEND_BG_INV_ALPHA; + } + } else { /* coverage blending */ blend_op = DPU_BLEND_FG_ALPHA_FG_PIXEL | - DPU_BLEND_BG_ALPHA_FG_PIXEL | - DPU_BLEND_BG_INV_ALPHA; + DPU_BLEND_BG_ALPHA_FG_PIXEL; + if (fg_alpha != 0xff) { + bg_alpha = fg_alpha; + blend_op |= DPU_BLEND_FG_MOD_ALPHA | + DPU_BLEND_FG_INV_MOD_ALPHA | + DPU_BLEND_BG_MOD_ALPHA | + DPU_BLEND_BG_INV_MOD_ALPHA; + } else { + blend_op |= DPU_BLEND_BG_INV_ALPHA; + } } lm->ops.setup_blend_config(lm, pstate->stage, - 0xFF, 0, blend_op); + fg_alpha, bg_alpha, blend_op); DRM_DEBUG_ATOMIC("format:%p4cc, alpha_en:%u blend_op:0x%x\n", &format->base.pixel_format, format->alpha_enable, blend_op); diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c index 1c04b7cce43e..0e9d3fa1544b 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c @@ -274,20 +274,20 @@ int dpu_encoder_helper_wait_for_irq(struct dpu_encoder_phys *phys_enc, /* return EWOULDBLOCK since we know the wait isn't necessary */ if (phys_enc->enable_state == DPU_ENC_DISABLED) { - DRM_ERROR("encoder is disabled id=%u, intr=%d, irq=%d", + DRM_ERROR("encoder is disabled id=%u, intr=%d, irq=%d\n", DRMID(phys_enc->parent), intr_idx, irq->irq_idx); return -EWOULDBLOCK; } if (irq->irq_idx < 0) { - DRM_DEBUG_KMS("skip irq wait id=%u, intr=%d, irq=%s", + DRM_DEBUG_KMS("skip irq wait id=%u, intr=%d, irq=%s\n", DRMID(phys_enc->parent), intr_idx, irq->name); return 0; } - DRM_DEBUG_KMS("id=%u, intr=%d, irq=%d, pp=%d, pending_cnt=%d", + DRM_DEBUG_KMS("id=%u, intr=%d, irq=%d, pp=%d, pending_cnt=%d\n", DRMID(phys_enc->parent), intr_idx, irq->irq_idx, phys_enc->hw_pp->idx - PINGPONG_0, atomic_read(wait_info->atomic_cnt)); @@ -303,8 +303,7 @@ int dpu_encoder_helper_wait_for_irq(struct dpu_encoder_phys *phys_enc, if (irq_status) { unsigned long flags; - DRM_DEBUG_KMS("irq not triggered id=%u, intr=%d, " - "irq=%d, pp=%d, atomic_cnt=%d", + DRM_DEBUG_KMS("irq not triggered id=%u, intr=%d, irq=%d, pp=%d, atomic_cnt=%d\n", DRMID(phys_enc->parent), intr_idx, irq->irq_idx, phys_enc->hw_pp->idx - PINGPONG_0, @@ -315,8 +314,7 @@ int dpu_encoder_helper_wait_for_irq(struct dpu_encoder_phys *phys_enc, ret = 0; } else { ret = -ETIMEDOUT; - DRM_DEBUG_KMS("irq timeout id=%u, intr=%d, " - "irq=%d, pp=%d, atomic_cnt=%d", + DRM_DEBUG_KMS("irq timeout id=%u, intr=%d, irq=%d, pp=%d, atomic_cnt=%d\n", DRMID(phys_enc->parent), intr_idx, irq->irq_idx, phys_enc->hw_pp->idx - PINGPONG_0, diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c index 704dace895cb..b131fd376192 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c @@ -974,6 +974,7 @@ static const struct dpu_perf_cfg sdm845_perf_data = { .amortizable_threshold = 25, .min_prefill_lines = 24, .danger_lut_tbl = {0xf, 0xffff, 0x0}, + .safe_lut_tbl = {0xfff0, 0xf000, 0xffff}, .qos_lut_tbl = { {.nentry = ARRAY_SIZE(sdm845_qos_linear), .entries = sdm845_qos_linear @@ -1001,6 +1002,7 @@ static const struct dpu_perf_cfg sc7180_perf_data = { .min_dram_ib = 1600000, .min_prefill_lines = 24, .danger_lut_tbl = {0xff, 0xffff, 0x0}, + .safe_lut_tbl = {0xfff0, 0xff00, 0xffff}, .qos_lut_tbl = { {.nentry = ARRAY_SIZE(sc7180_qos_linear), .entries = sc7180_qos_linear @@ -1028,6 +1030,7 @@ static const struct dpu_perf_cfg sm8150_perf_data = { .min_dram_ib = 800000, .min_prefill_lines = 24, .danger_lut_tbl = {0xf, 0xffff, 0x0}, + .safe_lut_tbl = {0xfff8, 0xf000, 0xffff}, .qos_lut_tbl = { {.nentry = ARRAY_SIZE(sm8150_qos_linear), .entries = sm8150_qos_linear @@ -1056,6 +1059,7 @@ static const struct dpu_perf_cfg sm8250_perf_data = { .min_dram_ib = 800000, .min_prefill_lines = 35, .danger_lut_tbl = {0xf, 0xffff, 0x0}, + .safe_lut_tbl = {0xfff0, 0xff00, 0xffff}, .qos_lut_tbl = { {.nentry = ARRAY_SIZE(sc7180_qos_linear), .entries = sc7180_qos_linear @@ -1084,6 +1088,7 @@ static const struct dpu_perf_cfg sc7280_perf_data = { .min_dram_ib = 1600000, .min_prefill_lines = 24, .danger_lut_tbl = {0xffff, 0xffff, 0x0}, + .safe_lut_tbl = {0xff00, 0xff00, 0xffff}, .qos_lut_tbl = { {.nentry = ARRAY_SIZE(sc7180_qos_macrotile), .entries = sc7180_qos_macrotile diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_ctl.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_ctl.c index f8a74f6cdc4c..64740ddb983e 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_ctl.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_ctl.c @@ -345,10 +345,12 @@ static void dpu_hw_ctl_clear_all_blendstages(struct dpu_hw_ctl *ctx) int i; for (i = 0; i < ctx->mixer_count; i++) { - DPU_REG_WRITE(c, CTL_LAYER(LM_0 + i), 0); - DPU_REG_WRITE(c, CTL_LAYER_EXT(LM_0 + i), 0); - DPU_REG_WRITE(c, CTL_LAYER_EXT2(LM_0 + i), 0); - DPU_REG_WRITE(c, CTL_LAYER_EXT3(LM_0 + i), 0); + enum dpu_lm mixer_id = ctx->mixer_hw_caps[i].id; + + DPU_REG_WRITE(c, CTL_LAYER(mixer_id), 0); + DPU_REG_WRITE(c, CTL_LAYER_EXT(mixer_id), 0); + DPU_REG_WRITE(c, CTL_LAYER_EXT2(mixer_id), 0); + DPU_REG_WRITE(c, CTL_LAYER_EXT3(mixer_id), 0); } DPU_REG_WRITE(c, CTL_FETCH_PIPE_ACTIVE, 0); diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c index 4fd913522931..ae48f41821cf 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c @@ -471,30 +471,68 @@ static int _dpu_kms_initialize_dsi(struct drm_device *dev, struct dpu_kms *dpu_kms) { struct drm_encoder *encoder = NULL; + struct msm_display_info info; int i, rc = 0; if (!(priv->dsi[0] || priv->dsi[1])) return rc; - /*TODO: Support two independent DSI connectors */ - encoder = dpu_encoder_init(dev, DRM_MODE_ENCODER_DSI); - if (IS_ERR(encoder)) { - DPU_ERROR("encoder init failed for dsi display\n"); - return PTR_ERR(encoder); - } - - priv->encoders[priv->num_encoders++] = encoder; - + /* + * We support following confiurations: + * - Single DSI host (dsi0 or dsi1) + * - Two independent DSI hosts + * - Bonded DSI0 and DSI1 hosts + * + * TODO: Support swapping DSI0 and DSI1 in the bonded setup. + */ for (i = 0; i < ARRAY_SIZE(priv->dsi); i++) { + int other = (i + 1) % 2; + if (!priv->dsi[i]) continue; + if (msm_dsi_is_bonded_dsi(priv->dsi[i]) && + !msm_dsi_is_master_dsi(priv->dsi[i])) + continue; + + encoder = dpu_encoder_init(dev, DRM_MODE_ENCODER_DSI); + if (IS_ERR(encoder)) { + DPU_ERROR("encoder init failed for dsi display\n"); + return PTR_ERR(encoder); + } + + priv->encoders[priv->num_encoders++] = encoder; + + memset(&info, 0, sizeof(info)); + info.intf_type = encoder->encoder_type; + rc = msm_dsi_modeset_init(priv->dsi[i], dev, encoder); if (rc) { DPU_ERROR("modeset_init failed for dsi[%d], rc = %d\n", i, rc); break; } + + info.h_tile_instance[info.num_of_h_tiles++] = i; + info.capabilities = msm_dsi_is_cmd_mode(priv->dsi[i]) ? + MSM_DISPLAY_CAP_CMD_MODE : + MSM_DISPLAY_CAP_VID_MODE; + + if (msm_dsi_is_bonded_dsi(priv->dsi[i]) && priv->dsi[other]) { + rc = msm_dsi_modeset_init(priv->dsi[other], dev, encoder); + if (rc) { + DPU_ERROR("modeset_init failed for dsi[%d], rc = %d\n", + other, rc); + break; + } + + info.h_tile_instance[info.num_of_h_tiles++] = other; + } + + rc = dpu_encoder_setup(dev, encoder, &info); + if (rc) + DPU_ERROR("failed to setup DPU encoder %d: rc:%d\n", + encoder->base.id, rc); } return rc; @@ -505,6 +543,7 @@ static int _dpu_kms_initialize_displayport(struct drm_device *dev, struct dpu_kms *dpu_kms) { struct drm_encoder *encoder = NULL; + struct msm_display_info info; int rc = 0; if (!priv->dp) @@ -516,6 +555,7 @@ static int _dpu_kms_initialize_displayport(struct drm_device *dev, return PTR_ERR(encoder); } + memset(&info, 0, sizeof(info)); rc = msm_dp_modeset_init(priv->dp, dev, encoder); if (rc) { DPU_ERROR("modeset_init failed for DP, rc = %d\n", rc); @@ -524,6 +564,14 @@ static int _dpu_kms_initialize_displayport(struct drm_device *dev, } priv->encoders[priv->num_encoders++] = encoder; + + info.num_of_h_tiles = 1; + info.capabilities = MSM_DISPLAY_CAP_VID_MODE; + info.intf_type = encoder->encoder_type; + rc = dpu_encoder_setup(dev, encoder, &info); + if (rc) + DPU_ERROR("failed to setup DPU encoder %d: rc:%d\n", + encoder->base.id, rc); return rc; } @@ -726,41 +774,6 @@ static void dpu_kms_destroy(struct msm_kms *kms) msm_kms_destroy(&dpu_kms->base); } -static void _dpu_kms_set_encoder_mode(struct msm_kms *kms, - struct drm_encoder *encoder, - bool cmd_mode) -{ - struct msm_display_info info; - struct msm_drm_private *priv = encoder->dev->dev_private; - int i, rc = 0; - - memset(&info, 0, sizeof(info)); - - info.intf_type = encoder->encoder_type; - info.capabilities = cmd_mode ? MSM_DISPLAY_CAP_CMD_MODE : - MSM_DISPLAY_CAP_VID_MODE; - - switch (info.intf_type) { - case DRM_MODE_ENCODER_DSI: - /* TODO: No support for DSI swap */ - for (i = 0; i < ARRAY_SIZE(priv->dsi); i++) { - if (priv->dsi[i]) { - info.h_tile_instance[info.num_of_h_tiles] = i; - info.num_of_h_tiles++; - } - } - break; - case DRM_MODE_ENCODER_TMDS: - info.num_of_h_tiles = 1; - break; - } - - rc = dpu_encoder_setup(encoder->dev, encoder, &info); - if (rc) - DPU_ERROR("failed to setup DPU encoder %d: rc:%d\n", - encoder->base.id, rc); -} - static irqreturn_t dpu_irq(struct msm_kms *kms) { struct dpu_kms *dpu_kms = to_dpu_kms(kms); @@ -863,7 +876,6 @@ static const struct msm_kms_funcs kms_funcs = { .get_format = dpu_get_msm_format, .round_pixclk = dpu_kms_round_pixclk, .destroy = dpu_kms_destroy, - .set_encoder_mode = _dpu_kms_set_encoder_mode, .snapshot = dpu_kms_mdp_snapshot, #ifdef CONFIG_DEBUG_FS .debugfs_init = dpu_kms_debugfs_init, diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c index ec4a6f04394a..c989621209aa 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c @@ -1339,9 +1339,7 @@ static void dpu_plane_reset(struct drm_plane *plane) return; } - pstate->base.plane = plane; - - plane->state = &pstate->base; + __drm_atomic_helper_plane_reset(plane, &pstate->base); } #ifdef CONFIG_DEBUG_FS @@ -1647,6 +1645,12 @@ struct drm_plane *dpu_plane_init(struct drm_device *dev, if (ret) DPU_ERROR("failed to install zpos property, rc = %d\n", ret); + drm_plane_create_alpha_property(plane); + drm_plane_create_blend_mode_property(plane, + BIT(DRM_MODE_BLEND_PIXEL_NONE) | + BIT(DRM_MODE_BLEND_PREMULTI) | + BIT(DRM_MODE_BLEND_COVERAGE)); + drm_plane_create_rotation_property(plane, DRM_MODE_ROTATE_0, DRM_MODE_ROTATE_0 | diff --git a/drivers/gpu/drm/msm/disp/mdp4/mdp4_kms.c b/drivers/gpu/drm/msm/disp/mdp4/mdp4_kms.c index 4a5b518288b0..cdcaf470f148 100644 --- a/drivers/gpu/drm/msm/disp/mdp4/mdp4_kms.c +++ b/drivers/gpu/drm/msm/disp/mdp4/mdp4_kms.c @@ -19,30 +19,12 @@ static int mdp4_hw_init(struct msm_kms *kms) { struct mdp4_kms *mdp4_kms = to_mdp4_kms(to_mdp_kms(kms)); struct drm_device *dev = mdp4_kms->dev; - uint32_t version, major, minor, dmap_cfg, vg_cfg; + u32 dmap_cfg, vg_cfg; unsigned long clk; int ret = 0; pm_runtime_get_sync(dev->dev); - mdp4_enable(mdp4_kms); - version = mdp4_read(mdp4_kms, REG_MDP4_VERSION); - mdp4_disable(mdp4_kms); - - major = FIELD(version, MDP4_VERSION_MAJOR); - minor = FIELD(version, MDP4_VERSION_MINOR); - - DBG("found MDP4 version v%d.%d", major, minor); - - if (major != 4) { - DRM_DEV_ERROR(dev->dev, "unexpected MDP version: v%d.%d\n", - major, minor); - ret = -ENXIO; - goto out; - } - - mdp4_kms->rev = minor; - if (mdp4_kms->rev > 1) { mdp4_write(mdp4_kms, REG_MDP4_CS_CONTROLLER0, 0x0707ffff); mdp4_write(mdp4_kms, REG_MDP4_CS_CONTROLLER1, 0x03073f3f); @@ -88,7 +70,6 @@ static int mdp4_hw_init(struct msm_kms *kms) if (mdp4_kms->rev > 1) mdp4_write(mdp4_kms, REG_MDP4_RESET_STATUS, 1); -out: pm_runtime_put_sync(dev->dev); return ret; @@ -108,13 +89,6 @@ static void mdp4_disable_commit(struct msm_kms *kms) static void mdp4_prepare_commit(struct msm_kms *kms, struct drm_atomic_state *state) { - int i; - struct drm_crtc *crtc; - struct drm_crtc_state *crtc_state; - - /* see 119ecb7fd */ - for_each_new_crtc_in_state(state, crtc, crtc_state, i) - drm_crtc_vblank_get(crtc); } static void mdp4_flush_commit(struct msm_kms *kms, unsigned crtc_mask) @@ -133,12 +107,6 @@ static void mdp4_wait_flush(struct msm_kms *kms, unsigned crtc_mask) static void mdp4_complete_commit(struct msm_kms *kms, unsigned crtc_mask) { - struct mdp4_kms *mdp4_kms = to_mdp4_kms(to_mdp_kms(kms)); - struct drm_crtc *crtc; - - /* see 119ecb7fd */ - for_each_crtc_mask(mdp4_kms->dev, crtc, crtc_mask) - drm_crtc_vblank_put(crtc); } static long mdp4_round_pixclk(struct msm_kms *kms, unsigned long rate, @@ -411,14 +379,32 @@ fail: return ret; } +static void read_mdp_hw_revision(struct mdp4_kms *mdp4_kms, + u32 *major, u32 *minor) +{ + struct drm_device *dev = mdp4_kms->dev; + u32 version; + + mdp4_enable(mdp4_kms); + version = mdp4_read(mdp4_kms, REG_MDP4_VERSION); + mdp4_disable(mdp4_kms); + + *major = FIELD(version, MDP4_VERSION_MAJOR); + *minor = FIELD(version, MDP4_VERSION_MINOR); + + DRM_DEV_INFO(dev->dev, "MDP4 version v%d.%d", *major, *minor); +} + struct msm_kms *mdp4_kms_init(struct drm_device *dev) { struct platform_device *pdev = to_platform_device(dev->dev); struct mdp4_platform_config *config = mdp4_get_config(pdev); + struct msm_drm_private *priv = dev->dev_private; struct mdp4_kms *mdp4_kms; struct msm_kms *kms = NULL; struct msm_gem_address_space *aspace; int irq, ret; + u32 major, minor; mdp4_kms = kzalloc(sizeof(*mdp4_kms), GFP_KERNEL); if (!mdp4_kms) { @@ -433,7 +419,8 @@ struct msm_kms *mdp4_kms_init(struct drm_device *dev) goto fail; } - kms = &mdp4_kms->base.base; + priv->kms = &mdp4_kms->base.base; + kms = priv->kms; mdp4_kms->dev = dev; @@ -479,15 +466,6 @@ struct msm_kms *mdp4_kms_init(struct drm_device *dev) if (IS_ERR(mdp4_kms->pclk)) mdp4_kms->pclk = NULL; - if (mdp4_kms->rev >= 2) { - mdp4_kms->lut_clk = devm_clk_get(&pdev->dev, "lut_clk"); - if (IS_ERR(mdp4_kms->lut_clk)) { - DRM_DEV_ERROR(dev->dev, "failed to get lut_clk\n"); - ret = PTR_ERR(mdp4_kms->lut_clk); - goto fail; - } - } - mdp4_kms->axi_clk = devm_clk_get(&pdev->dev, "bus_clk"); if (IS_ERR(mdp4_kms->axi_clk)) { DRM_DEV_ERROR(dev->dev, "failed to get axi_clk\n"); @@ -496,8 +474,27 @@ struct msm_kms *mdp4_kms_init(struct drm_device *dev) } clk_set_rate(mdp4_kms->clk, config->max_clk); - if (mdp4_kms->lut_clk) + + read_mdp_hw_revision(mdp4_kms, &major, &minor); + + if (major != 4) { + DRM_DEV_ERROR(dev->dev, "unexpected MDP version: v%d.%d\n", + major, minor); + ret = -ENXIO; + goto fail; + } + + mdp4_kms->rev = minor; + + if (mdp4_kms->rev >= 2) { + mdp4_kms->lut_clk = devm_clk_get(&pdev->dev, "lut_clk"); + if (IS_ERR(mdp4_kms->lut_clk)) { + DRM_DEV_ERROR(dev->dev, "failed to get lut_clk\n"); + ret = PTR_ERR(mdp4_kms->lut_clk); + goto fail; + } clk_set_rate(mdp4_kms->lut_clk, config->max_clk); + } pm_runtime_enable(dev->dev); mdp4_kms->rpm_enabled = true; diff --git a/drivers/gpu/drm/msm/disp/mdp5/mdp5_ctl.c b/drivers/gpu/drm/msm/disp/mdp5/mdp5_ctl.c index 81b0c7cf954e..1220f2b20e05 100644 --- a/drivers/gpu/drm/msm/disp/mdp5/mdp5_ctl.c +++ b/drivers/gpu/drm/msm/disp/mdp5/mdp5_ctl.c @@ -737,7 +737,7 @@ struct mdp5_ctl_manager *mdp5_ctlm_init(struct drm_device *dev, } /* - * In Dual DSI case, CTL0 and CTL1 are always assigned to two DSI + * In bonded DSI case, CTL0 and CTL1 are always assigned to two DSI * interfaces to support single FLUSH feature (Flush CTL0 and CTL1 when * only write into CTL0's FLUSH register) to keep two DSI pipes in sync. * Single FLUSH is supported from hw rev v3.0. diff --git a/drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.c b/drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.c index 15aed45022bc..b3b42672b2d4 100644 --- a/drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.c +++ b/drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.c @@ -209,13 +209,6 @@ static int mdp5_set_split_display(struct msm_kms *kms, slave_encoder); } -static void mdp5_set_encoder_mode(struct msm_kms *kms, - struct drm_encoder *encoder, - bool cmd_mode) -{ - mdp5_encoder_set_intf_mode(encoder, cmd_mode); -} - static void mdp5_kms_destroy(struct msm_kms *kms) { struct mdp5_kms *mdp5_kms = to_mdp5_kms(to_mdp_kms(kms)); @@ -287,7 +280,6 @@ static const struct mdp_kms_funcs kms_funcs = { .get_format = mdp_get_format, .round_pixclk = mdp5_round_pixclk, .set_split_display = mdp5_set_split_display, - .set_encoder_mode = mdp5_set_encoder_mode, .destroy = mdp5_kms_destroy, #ifdef CONFIG_DEBUG_FS .debugfs_init = mdp5_kms_debugfs_init, @@ -448,6 +440,9 @@ static int modeset_init_intf(struct mdp5_kms *mdp5_kms, } ret = msm_dsi_modeset_init(priv->dsi[dsi_id], dev, encoder); + if (!ret) + mdp5_encoder_set_intf_mode(encoder, msm_dsi_is_cmd_mode(priv->dsi[dsi_id])); + break; } default: diff --git a/drivers/gpu/drm/msm/disp/msm_disp_snapshot.h b/drivers/gpu/drm/msm/disp/msm_disp_snapshot.h index c92a9508c8d3..c22b07f68670 100644 --- a/drivers/gpu/drm/msm/disp/msm_disp_snapshot.h +++ b/drivers/gpu/drm/msm/disp/msm_disp_snapshot.h @@ -16,7 +16,6 @@ #include <linux/delay.h> #include <linux/spinlock.h> #include <linux/ktime.h> -#include <linux/debugfs.h> #include <linux/uaccess.h> #include <linux/dma-buf.h> #include <linux/slab.h> diff --git a/drivers/gpu/drm/msm/dp/dp_aux.c b/drivers/gpu/drm/msm/dp/dp_aux.c index 4a3293b590b0..eb40d8413bca 100644 --- a/drivers/gpu/drm/msm/dp/dp_aux.c +++ b/drivers/gpu/drm/msm/dp/dp_aux.c @@ -353,6 +353,9 @@ static ssize_t dp_aux_transfer(struct drm_dp_aux *dp_aux, if (!(aux->retry_cnt % MAX_AUX_RETRIES)) dp_catalog_aux_update_cfg(aux->catalog); } + /* reset aux if link is in connected state */ + if (dp_catalog_link_is_connected(aux->catalog)) + dp_catalog_aux_reset(aux->catalog); } else { aux->retry_cnt = 0; switch (aux->aux_error_num) { diff --git a/drivers/gpu/drm/msm/dp/dp_catalog.c b/drivers/gpu/drm/msm/dp/dp_catalog.c index c0423e76eed7..cc2bb8295329 100644 --- a/drivers/gpu/drm/msm/dp/dp_catalog.c +++ b/drivers/gpu/drm/msm/dp/dp_catalog.c @@ -372,6 +372,7 @@ void dp_catalog_ctrl_mainlink_ctrl(struct dp_catalog *dp_catalog, struct dp_catalog_private *catalog = container_of(dp_catalog, struct dp_catalog_private, dp_catalog); + DRM_DEBUG_DP("enable=%d\n", enable); if (enable) { /* * To make sure link reg writes happens before other operation, @@ -580,6 +581,7 @@ void dp_catalog_hpd_config_intr(struct dp_catalog *dp_catalog, config = (en ? config | intr_mask : config & ~intr_mask); + DRM_DEBUG_DP("intr_mask=%#x config=%#x\n", intr_mask, config); dp_write_aux(catalog, REG_DP_DP_HPD_INT_MASK, config & DP_DP_HPD_INT_MASK); } @@ -610,6 +612,7 @@ u32 dp_catalog_link_is_connected(struct dp_catalog *dp_catalog) u32 status; status = dp_read_aux(catalog, REG_DP_DP_HPD_INT_STATUS); + DRM_DEBUG_DP("aux status: %#x\n", status); status >>= DP_DP_HPD_STATE_STATUS_BITS_SHIFT; status &= DP_DP_HPD_STATE_STATUS_BITS_MASK; @@ -685,6 +688,7 @@ void dp_catalog_ctrl_send_phy_pattern(struct dp_catalog *dp_catalog, /* Make sure to clear the current pattern before starting a new one */ dp_write_link(catalog, REG_DP_STATE_CTRL, 0x0); + DRM_DEBUG_DP("pattern: %#x\n", pattern); switch (pattern) { case DP_PHY_TEST_PATTERN_D10_2: dp_write_link(catalog, REG_DP_STATE_CTRL, @@ -745,7 +749,7 @@ void dp_catalog_ctrl_send_phy_pattern(struct dp_catalog *dp_catalog, DP_STATE_CTRL_LINK_TRAINING_PATTERN4); break; default: - DRM_DEBUG_DP("No valid test pattern requested:0x%x\n", pattern); + DRM_DEBUG_DP("No valid test pattern requested: %#x\n", pattern); break; } } @@ -929,7 +933,7 @@ void dp_catalog_audio_config_acr(struct dp_catalog *dp_catalog) select = dp_catalog->audio_data; acr_ctrl = select << 4 | BIT(31) | BIT(8) | BIT(14); - DRM_DEBUG_DP("select = 0x%x, acr_ctrl = 0x%x\n", select, acr_ctrl); + DRM_DEBUG_DP("select: %#x, acr_ctrl: %#x\n", select, acr_ctrl); dp_write_link(catalog, MMSS_DP_AUDIO_ACR_CTRL, acr_ctrl); } diff --git a/drivers/gpu/drm/msm/dp/dp_ctrl.c b/drivers/gpu/drm/msm/dp/dp_ctrl.c index eaddfd739885..62e75dc8afc6 100644 --- a/drivers/gpu/drm/msm/dp/dp_ctrl.c +++ b/drivers/gpu/drm/msm/dp/dp_ctrl.c @@ -81,13 +81,6 @@ struct dp_ctrl_private { struct completion video_comp; }; -struct dp_cr_status { - u8 lane_0_1; - u8 lane_2_3; -}; - -#define DP_LANE0_1_CR_DONE 0x11 - static int dp_aux_link_configure(struct drm_dp_aux *aux, struct dp_link_info *link) { @@ -120,7 +113,7 @@ void dp_ctrl_push_idle(struct dp_ctrl *dp_ctrl) IDLE_PATTERN_COMPLETION_TIMEOUT_JIFFIES)) pr_warn("PUSH_IDLE pattern timedout\n"); - pr_debug("mainlink off done\n"); + DRM_DEBUG_DP("mainlink off done\n"); } static void dp_ctrl_config_ctrl(struct dp_ctrl_private *ctrl) @@ -1011,6 +1004,8 @@ static int dp_ctrl_update_vx_px(struct dp_ctrl_private *ctrl) u32 voltage_swing_level = link->phy_params.v_level; u32 pre_emphasis_level = link->phy_params.p_level; + DRM_DEBUG_DP("voltage level: %d emphasis level: %d\n", voltage_swing_level, + pre_emphasis_level); ret = dp_catalog_ctrl_update_vx_px(ctrl->catalog, voltage_swing_level, pre_emphasis_level); @@ -1078,7 +1073,7 @@ static int dp_ctrl_read_link_status(struct dp_ctrl_private *ctrl, } static int dp_ctrl_link_train_1(struct dp_ctrl_private *ctrl, - struct dp_cr_status *cr, int *training_step) + int *training_step) { int tries, old_v_level, ret = 0; u8 link_status[DP_LINK_STATUS_SIZE]; @@ -1107,9 +1102,6 @@ static int dp_ctrl_link_train_1(struct dp_ctrl_private *ctrl, if (ret) return ret; - cr->lane_0_1 = link_status[0]; - cr->lane_2_3 = link_status[1]; - if (drm_dp_clock_recovery_ok(link_status, ctrl->link->link_params.num_lanes)) { return 0; @@ -1186,7 +1178,7 @@ static void dp_ctrl_clear_training_pattern(struct dp_ctrl_private *ctrl) } static int dp_ctrl_link_train_2(struct dp_ctrl_private *ctrl, - struct dp_cr_status *cr, int *training_step) + int *training_step) { int tries = 0, ret = 0; char pattern; @@ -1202,10 +1194,6 @@ static int dp_ctrl_link_train_2(struct dp_ctrl_private *ctrl, else pattern = DP_TRAINING_PATTERN_2; - ret = dp_ctrl_update_vx_px(ctrl); - if (ret) - return ret; - ret = dp_catalog_ctrl_set_pattern(ctrl->catalog, pattern); if (ret) return ret; @@ -1218,8 +1206,6 @@ static int dp_ctrl_link_train_2(struct dp_ctrl_private *ctrl, ret = dp_ctrl_read_link_status(ctrl, link_status); if (ret) return ret; - cr->lane_0_1 = link_status[0]; - cr->lane_2_3 = link_status[1]; if (drm_dp_channel_eq_ok(link_status, ctrl->link->link_params.num_lanes)) { @@ -1239,7 +1225,7 @@ static int dp_ctrl_link_train_2(struct dp_ctrl_private *ctrl, static int dp_ctrl_reinitialize_mainlink(struct dp_ctrl_private *ctrl); static int dp_ctrl_link_train(struct dp_ctrl_private *ctrl, - struct dp_cr_status *cr, int *training_step) + int *training_step) { int ret = 0; u8 encoding = DP_SET_ANSI_8B10B; @@ -1255,7 +1241,7 @@ static int dp_ctrl_link_train(struct dp_ctrl_private *ctrl, drm_dp_dpcd_write(ctrl->aux, DP_MAIN_LINK_CHANNEL_CODING_SET, &encoding, 1); - ret = dp_ctrl_link_train_1(ctrl, cr, training_step); + ret = dp_ctrl_link_train_1(ctrl, training_step); if (ret) { DRM_ERROR("link training #1 failed. ret=%d\n", ret); goto end; @@ -1264,7 +1250,7 @@ static int dp_ctrl_link_train(struct dp_ctrl_private *ctrl, /* print success info as this is a result of user initiated action */ DRM_DEBUG_DP("link training #1 successful\n"); - ret = dp_ctrl_link_train_2(ctrl, cr, training_step); + ret = dp_ctrl_link_train_2(ctrl, training_step); if (ret) { DRM_ERROR("link training #2 failed. ret=%d\n", ret); goto end; @@ -1280,7 +1266,7 @@ end: } static int dp_ctrl_setup_main_link(struct dp_ctrl_private *ctrl, - struct dp_cr_status *cr, int *training_step) + int *training_step) { int ret = 0; @@ -1295,7 +1281,7 @@ static int dp_ctrl_setup_main_link(struct dp_ctrl_private *ctrl, * a link training pattern, we have to first do soft reset. */ - ret = dp_ctrl_link_train(ctrl, cr, training_step); + ret = dp_ctrl_link_train(ctrl, training_step); return ret; } @@ -1382,6 +1368,7 @@ int dp_ctrl_host_init(struct dp_ctrl *dp_ctrl, bool flip, bool reset) if (reset) dp_catalog_ctrl_reset(ctrl->catalog); + DRM_DEBUG_DP("flip=%d\n", flip); dp_catalog_ctrl_phy_reset(ctrl->catalog); phy_init(phy); dp_catalog_ctrl_enable_irq(ctrl->catalog, true); @@ -1492,14 +1479,16 @@ static int dp_ctrl_deinitialize_mainlink(struct dp_ctrl_private *ctrl) static int dp_ctrl_link_maintenance(struct dp_ctrl_private *ctrl) { int ret = 0; - struct dp_cr_status cr; int training_step = DP_TRAINING_NONE; dp_ctrl_push_idle(&ctrl->dp_ctrl); + ctrl->link->phy_params.p_level = 0; + ctrl->link->phy_params.v_level = 0; + ctrl->dp_ctrl.pixel_rate = ctrl->panel->dp_mode.drm_mode.clock; - ret = dp_ctrl_setup_main_link(ctrl, &cr, &training_step); + ret = dp_ctrl_setup_main_link(ctrl, &training_step); if (ret) goto end; @@ -1630,6 +1619,35 @@ void dp_ctrl_handle_sink_request(struct dp_ctrl *dp_ctrl) } } +static bool dp_ctrl_clock_recovery_any_ok( + const u8 link_status[DP_LINK_STATUS_SIZE], + int lane_count) +{ + int reduced_cnt; + + if (lane_count <= 1) + return false; + + /* + * only interested in the lane number after reduced + * lane_count = 4, then only interested in 2 lanes + * lane_count = 2, then only interested in 1 lane + */ + reduced_cnt = lane_count >> 1; + + return drm_dp_clock_recovery_ok(link_status, reduced_cnt); +} + +static bool dp_ctrl_channel_eq_ok(struct dp_ctrl_private *ctrl) +{ + u8 link_status[DP_LINK_STATUS_SIZE]; + int num_lanes = ctrl->link->link_params.num_lanes; + + dp_ctrl_read_link_status(ctrl, link_status); + + return drm_dp_channel_eq_ok(link_status, num_lanes); +} + int dp_ctrl_on_link(struct dp_ctrl *dp_ctrl) { int rc = 0; @@ -1637,7 +1655,7 @@ int dp_ctrl_on_link(struct dp_ctrl *dp_ctrl) u32 rate = 0; int link_train_max_retries = 5; u32 const phy_cts_pixel_clk_khz = 148500; - struct dp_cr_status cr; + u8 link_status[DP_LINK_STATUS_SIZE]; unsigned int training_step; if (!dp_ctrl) @@ -1664,6 +1682,9 @@ int dp_ctrl_on_link(struct dp_ctrl *dp_ctrl) ctrl->link->link_params.rate, ctrl->link->link_params.num_lanes, ctrl->dp_ctrl.pixel_rate); + ctrl->link->phy_params.p_level = 0; + ctrl->link->phy_params.v_level = 0; + rc = dp_ctrl_enable_mainlink_clocks(ctrl); if (rc) return rc; @@ -1677,19 +1698,21 @@ int dp_ctrl_on_link(struct dp_ctrl *dp_ctrl) } training_step = DP_TRAINING_NONE; - rc = dp_ctrl_setup_main_link(ctrl, &cr, &training_step); + rc = dp_ctrl_setup_main_link(ctrl, &training_step); if (rc == 0) { /* training completed successfully */ break; } else if (training_step == DP_TRAINING_1) { /* link train_1 failed */ - if (!dp_catalog_link_is_connected(ctrl->catalog)) { + if (!dp_catalog_link_is_connected(ctrl->catalog)) break; - } + + dp_ctrl_read_link_status(ctrl, link_status); rc = dp_ctrl_link_rate_down_shift(ctrl); if (rc < 0) { /* already in RBR = 1.6G */ - if (cr.lane_0_1 & DP_LANE0_1_CR_DONE) { + if (dp_ctrl_clock_recovery_any_ok(link_status, + ctrl->link->link_params.num_lanes)) { /* * some lanes are ready, * reduce lane number @@ -1705,12 +1728,18 @@ int dp_ctrl_on_link(struct dp_ctrl *dp_ctrl) } } } else if (training_step == DP_TRAINING_2) { - /* link train_2 failed, lower lane rate */ - if (!dp_catalog_link_is_connected(ctrl->catalog)) { + /* link train_2 failed */ + if (!dp_catalog_link_is_connected(ctrl->catalog)) break; - } - rc = dp_ctrl_link_lane_down_shift(ctrl); + dp_ctrl_read_link_status(ctrl, link_status); + + if (!drm_dp_clock_recovery_ok(link_status, + ctrl->link->link_params.num_lanes)) + rc = dp_ctrl_link_rate_down_shift(ctrl); + else + rc = dp_ctrl_link_lane_down_shift(ctrl); + if (rc < 0) { /* end with failure */ break; /* lane == 1 already */ @@ -1721,17 +1750,19 @@ int dp_ctrl_on_link(struct dp_ctrl *dp_ctrl) if (ctrl->link->sink_request & DP_TEST_LINK_PHY_TEST_PATTERN) return rc; - /* stop txing train pattern */ - dp_ctrl_clear_training_pattern(ctrl); + if (rc == 0) { /* link train successfully */ + /* + * do not stop train pattern here + * stop link training at on_stream + * to pass compliance test + */ + } else { + /* + * link training failed + * end txing train pattern here + */ + dp_ctrl_clear_training_pattern(ctrl); - /* - * keep transmitting idle pattern until video ready - * to avoid main link from loss of sync - */ - if (rc == 0) /* link train successfully */ - dp_ctrl_push_idle(dp_ctrl); - else { - /* link training failed */ dp_ctrl_deinitialize_mainlink(ctrl); rc = -ECONNRESET; } @@ -1739,9 +1770,15 @@ int dp_ctrl_on_link(struct dp_ctrl *dp_ctrl) return rc; } +static int dp_ctrl_link_retrain(struct dp_ctrl_private *ctrl) +{ + int training_step = DP_TRAINING_NONE; + + return dp_ctrl_setup_main_link(ctrl, &training_step); +} + int dp_ctrl_on_stream(struct dp_ctrl *dp_ctrl) { - u32 rate = 0; int ret = 0; bool mainlink_ready = false; struct dp_ctrl_private *ctrl; @@ -1751,10 +1788,6 @@ int dp_ctrl_on_stream(struct dp_ctrl *dp_ctrl) ctrl = container_of(dp_ctrl, struct dp_ctrl_private, dp_ctrl); - rate = ctrl->panel->link_info.rate; - - ctrl->link->link_params.rate = rate; - ctrl->link->link_params.num_lanes = ctrl->panel->link_info.num_lanes; ctrl->dp_ctrl.pixel_rate = ctrl->panel->dp_mode.drm_mode.clock; DRM_DEBUG_DP("rate=%d, num_lanes=%d, pixel_rate=%d\n", @@ -1769,6 +1802,12 @@ int dp_ctrl_on_stream(struct dp_ctrl *dp_ctrl) } } + if (!dp_ctrl_channel_eq_ok(ctrl)) + dp_ctrl_link_retrain(ctrl); + + /* stop txing train pattern to end link training */ + dp_ctrl_clear_training_pattern(ctrl); + ret = dp_ctrl_enable_stream_clocks(ctrl); if (ret) { DRM_ERROR("Failed to start pixel clocks. ret=%d\n", ret); diff --git a/drivers/gpu/drm/msm/dp/dp_display.c b/drivers/gpu/drm/msm/dp/dp_display.c index 867388a399ad..fbe4c2cd52a3 100644 --- a/drivers/gpu/drm/msm/dp/dp_display.c +++ b/drivers/gpu/drm/msm/dp/dp_display.c @@ -55,7 +55,6 @@ enum { EV_HPD_INIT_SETUP, EV_HPD_PLUG_INT, EV_IRQ_HPD_INT, - EV_HPD_REPLUG_INT, EV_HPD_UNPLUG_INT, EV_USER_NOTIFICATION, EV_CONNECT_PENDING_TIMEOUT, @@ -102,8 +101,6 @@ struct dp_display_private { struct dp_display_mode dp_mode; struct msm_dp dp_display; - bool encoder_mode_set; - /* wait for audio signaling */ struct completion audio_comp; @@ -268,6 +265,8 @@ static bool dp_display_is_ds_bridge(struct dp_panel *panel) static bool dp_display_is_sink_count_zero(struct dp_display_private *dp) { + DRM_DEBUG_DP("present=%#x sink_count=%d\n", dp->panel->dpcd[DP_DOWNSTREAMPORT_PRESENT], + dp->link->sink_count); return dp_display_is_ds_bridge(dp->panel) && (dp->link->sink_count == 0); } @@ -284,20 +283,6 @@ static void dp_display_send_hpd_event(struct msm_dp *dp_display) } -static void dp_display_set_encoder_mode(struct dp_display_private *dp) -{ - struct msm_drm_private *priv = dp->dp_display.drm_dev->dev_private; - struct msm_kms *kms = priv->kms; - - if (!dp->encoder_mode_set && dp->dp_display.encoder && - kms->funcs->set_encoder_mode) { - kms->funcs->set_encoder_mode(kms, - dp->dp_display.encoder, false); - - dp->encoder_mode_set = true; - } -} - static int dp_display_send_hpd_notification(struct dp_display_private *dp, bool hpd) { @@ -313,6 +298,7 @@ static int dp_display_send_hpd_notification(struct dp_display_private *dp, dp->dp_display.is_connected = hpd; + DRM_DEBUG_DP("hpd=%d\n", hpd); dp_display_send_hpd_event(&dp->dp_display); return 0; @@ -362,6 +348,7 @@ static void dp_display_host_init(struct dp_display_private *dp, int reset) { bool flip = false; + DRM_DEBUG_DP("core_initialized=%d\n", dp->core_initialized); if (dp->core_initialized) { DRM_DEBUG_DP("DP core already initialized\n"); return; @@ -370,8 +357,6 @@ static void dp_display_host_init(struct dp_display_private *dp, int reset) if (dp->usbpd->orientation == ORIENTATION_CC2) flip = true; - dp_display_set_encoder_mode(dp); - dp_power_init(dp->power, flip); dp_ctrl_host_init(dp->ctrl, flip, reset); dp_aux_init(dp->aux); @@ -466,8 +451,10 @@ static int dp_display_handle_irq_hpd(struct dp_display_private *dp) { u32 sink_request = dp->link->sink_request; + DRM_DEBUG_DP("%d\n", sink_request); if (dp->hpd_state == ST_DISCONNECTED) { if (sink_request & DP_LINK_STATUS_UPDATED) { + DRM_DEBUG_DP("Disconnected sink_request: %d\n", sink_request); DRM_ERROR("Disconnected, no DP_LINK_STATUS_UPDATED\n"); return -EINVAL; } @@ -499,6 +486,7 @@ static int dp_display_usbpd_attention_cb(struct device *dev) rc = dp_link_process_request(dp->link); if (!rc) { sink_request = dp->link->sink_request; + DRM_DEBUG_DP("hpd_state=%d sink_request=%d\n", dp->hpd_state, sink_request); if (sink_request & DS_PORT_STATUS_CHANGED) rc = dp_display_handle_port_ststus_changed(dp); else @@ -521,6 +509,7 @@ static int dp_hpd_plug_handle(struct dp_display_private *dp, u32 data) mutex_lock(&dp->event_mutex); state = dp->hpd_state; + DRM_DEBUG_DP("hpd_state=%d\n", state); if (state == ST_DISPLAY_OFF || state == ST_SUSPENDED) { mutex_unlock(&dp->event_mutex); return 0; @@ -656,6 +645,7 @@ static int dp_hpd_unplug_handle(struct dp_display_private *dp, u32 data) /* start sentinel checking in case of missing uevent */ dp_add_event(dp, EV_DISCONNECT_PENDING_TIMEOUT, 0, DP_TIMEOUT_5_SECOND); + DRM_DEBUG_DP("hpd_state=%d\n", state); /* signal the disconnect event early to ensure proper teardown */ dp_display_handle_plugged_change(g_dp_display, false); @@ -714,6 +704,7 @@ static int dp_irq_hpd_handle(struct dp_display_private *dp, u32 data) if (ret == -ECONNRESET) { /* cable unplugged */ dp->core_initialized = false; } + DRM_DEBUG_DP("hpd_state=%d\n", state); mutex_unlock(&dp->event_mutex); @@ -855,6 +846,7 @@ static int dp_display_enable(struct dp_display_private *dp, u32 data) dp_display = g_dp_display; + DRM_DEBUG_DP("sink_count=%d\n", dp->link->sink_count); if (dp_display->power_on) { DRM_DEBUG_DP("Link already setup, return\n"); return 0; @@ -916,6 +908,7 @@ static int dp_display_disable(struct dp_display_private *dp, u32 data) dp_display->power_on = false; + DRM_DEBUG_DP("sink count: %d\n", dp->link->sink_count); return 0; } @@ -1015,10 +1008,8 @@ int dp_display_get_test_bpp(struct msm_dp *dp) void msm_dp_snapshot(struct msm_disp_state *disp_state, struct msm_dp *dp) { struct dp_display_private *dp_display; - struct drm_device *drm; dp_display = container_of(dp, struct dp_display_private, dp_display); - drm = dp->drm_dev; /* * if we are reading registers we need the link clocks to be on @@ -1119,9 +1110,6 @@ static int hpd_event_thread(void *data) case EV_IRQ_HPD_INT: dp_irq_hpd_handle(dp_priv, todo->data); break; - case EV_HPD_REPLUG_INT: - /* do nothing */ - break; case EV_USER_NOTIFICATION: dp_display_send_hpd_notification(dp_priv, todo->data); @@ -1163,12 +1151,11 @@ static irqreturn_t dp_display_irq_handler(int irq, void *dev_id) hpd_isr_status = dp_catalog_hpd_get_intr_status(dp->catalog); + DRM_DEBUG_DP("hpd isr status=%#x\n", hpd_isr_status); if (hpd_isr_status & 0x0F) { /* hpd related interrupts */ - if (hpd_isr_status & DP_DP_HPD_PLUG_INT_MASK || - hpd_isr_status & DP_DP_HPD_REPLUG_INT_MASK) { + if (hpd_isr_status & DP_DP_HPD_PLUG_INT_MASK) dp_add_event(dp, EV_HPD_PLUG_INT, 0, 0); - } if (hpd_isr_status & DP_DP_IRQ_HPD_INT_MASK) { /* stop sentinel connect pending checking */ @@ -1176,8 +1163,10 @@ static irqreturn_t dp_display_irq_handler(int irq, void *dev_id) dp_add_event(dp, EV_IRQ_HPD_INT, 0, 0); } - if (hpd_isr_status & DP_DP_HPD_REPLUG_INT_MASK) - dp_add_event(dp, EV_HPD_REPLUG_INT, 0, 0); + if (hpd_isr_status & DP_DP_HPD_REPLUG_INT_MASK) { + dp_add_event(dp, EV_HPD_UNPLUG_INT, 0, 0); + dp_add_event(dp, EV_HPD_PLUG_INT, 0, 3); + } if (hpd_isr_status & DP_DP_HPD_UNPLUG_INT_MASK) dp_add_event(dp, EV_HPD_UNPLUG_INT, 0, 0); @@ -1286,12 +1275,15 @@ static int dp_pm_resume(struct device *dev) struct platform_device *pdev = to_platform_device(dev); struct msm_dp *dp_display = platform_get_drvdata(pdev); struct dp_display_private *dp; - u32 status; + int sink_count = 0; dp = container_of(dp_display, struct dp_display_private, dp_display); mutex_lock(&dp->event_mutex); + DRM_DEBUG_DP("Before, core_inited=%d power_on=%d\n", + dp->core_initialized, dp_display->power_on); + /* start from disconnected state */ dp->hpd_state = ST_DISCONNECTED; @@ -1300,14 +1292,25 @@ static int dp_pm_resume(struct device *dev) dp_catalog_ctrl_hpd_config(dp->catalog); - status = dp_catalog_link_is_connected(dp->catalog); + /* + * set sink to normal operation mode -- D0 + * before dpcd read + */ + dp_link_psm_config(dp->link, &dp->panel->link_info, false); + + if (dp_catalog_link_is_connected(dp->catalog)) { + sink_count = drm_dp_read_sink_count(dp->aux); + if (sink_count < 0) + sink_count = 0; + } + dp->link->sink_count = sink_count; /* * can not declared display is connected unless * HDMI cable is plugged in and sink_count of * dongle become 1 */ - if (status && dp->link->sink_count) + if (dp->link->sink_count) dp->dp_display.is_connected = true; else dp->dp_display.is_connected = false; @@ -1315,6 +1318,9 @@ static int dp_pm_resume(struct device *dev) dp_display_handle_plugged_change(g_dp_display, dp->dp_display.is_connected); + DRM_DEBUG_DP("After, sink_count=%d is_connected=%d core_inited=%d power_on=%d\n", + dp->link->sink_count, dp->dp_display.is_connected, + dp->core_initialized, dp_display->power_on); mutex_unlock(&dp->event_mutex); @@ -1331,6 +1337,9 @@ static int dp_pm_suspend(struct device *dev) mutex_lock(&dp->event_mutex); + DRM_DEBUG_DP("Before, core_inited=%d power_on=%d\n", + dp->core_initialized, dp_display->power_on); + if (dp->core_initialized == true) { /* mainlink enabled */ if (dp_power_clk_status(dp->power, DP_CTRL_PM)) @@ -1344,6 +1353,9 @@ static int dp_pm_suspend(struct device *dev) /* host_init will be called at pm_resume */ dp->core_initialized = false; + DRM_DEBUG_DP("After, core_inited=%d power_on=%d\n", + dp->core_initialized, dp_display->power_on); + mutex_unlock(&dp->event_mutex); return 0; diff --git a/drivers/gpu/drm/msm/dp/dp_link.c b/drivers/gpu/drm/msm/dp/dp_link.c index 1195044a7a3b..a5bdfc5029de 100644 --- a/drivers/gpu/drm/msm/dp/dp_link.c +++ b/drivers/gpu/drm/msm/dp/dp_link.c @@ -1027,43 +1027,29 @@ int dp_link_process_request(struct dp_link *dp_link) if (link->request.test_requested == DP_TEST_LINK_EDID_READ) { dp_link->sink_request |= DP_TEST_LINK_EDID_READ; - return ret; - } - - ret = dp_link_process_ds_port_status_change(link); - if (!ret) { + } else if (!dp_link_process_ds_port_status_change(link)) { dp_link->sink_request |= DS_PORT_STATUS_CHANGED; - return ret; - } - - ret = dp_link_process_link_training_request(link); - if (!ret) { + } else if (!dp_link_process_link_training_request(link)) { dp_link->sink_request |= DP_TEST_LINK_TRAINING; - return ret; - } - - ret = dp_link_process_phy_test_pattern_request(link); - if (!ret) { + } else if (!dp_link_process_phy_test_pattern_request(link)) { dp_link->sink_request |= DP_TEST_LINK_PHY_TEST_PATTERN; - return ret; - } - - ret = dp_link_process_link_status_update(link); - if (!ret) { - dp_link->sink_request |= DP_LINK_STATUS_UPDATED; - return ret; - } - - if (dp_link_is_video_pattern_requested(link)) { - ret = 0; - dp_link->sink_request |= DP_TEST_LINK_VIDEO_PATTERN; - } - - if (dp_link_is_audio_pattern_requested(link)) { - dp_link->sink_request |= DP_TEST_LINK_AUDIO_PATTERN; - return -EINVAL; + } else { + ret = dp_link_process_link_status_update(link); + if (!ret) { + dp_link->sink_request |= DP_LINK_STATUS_UPDATED; + } else { + if (dp_link_is_video_pattern_requested(link)) { + ret = 0; + dp_link->sink_request |= DP_TEST_LINK_VIDEO_PATTERN; + } + if (dp_link_is_audio_pattern_requested(link)) { + dp_link->sink_request |= DP_TEST_LINK_AUDIO_PATTERN; + ret = -EINVAL; + } + } } + DRM_DEBUG_DP("sink request=%#x", dp_link->sink_request); return ret; } diff --git a/drivers/gpu/drm/msm/dp/dp_panel.c b/drivers/gpu/drm/msm/dp/dp_panel.c index 440b32753430..2181b60e1d1d 100644 --- a/drivers/gpu/drm/msm/dp/dp_panel.c +++ b/drivers/gpu/drm/msm/dp/dp_panel.c @@ -271,7 +271,7 @@ static u8 dp_panel_get_edid_checksum(struct edid *edid) { struct edid *last_block; u8 *raw_edid; - bool is_edid_corrupt; + bool is_edid_corrupt = false; if (!edid) { DRM_ERROR("invalid edid input\n"); @@ -303,7 +303,12 @@ void dp_panel_handle_sink_request(struct dp_panel *dp_panel) panel = container_of(dp_panel, struct dp_panel_private, dp_panel); if (panel->link->sink_request & DP_TEST_LINK_EDID_READ) { - u8 checksum = dp_panel_get_edid_checksum(dp_panel->edid); + u8 checksum; + + if (dp_panel->edid) + checksum = dp_panel_get_edid_checksum(dp_panel->edid); + else + checksum = dp_panel->connector->real_edid_checksum; dp_link_send_edid_checksum(panel->link, checksum); dp_link_send_test_response(panel->link); diff --git a/drivers/gpu/drm/msm/dp/dp_power.c b/drivers/gpu/drm/msm/dp/dp_power.c index 3961ba4efc3c..b48b45e92bfa 100644 --- a/drivers/gpu/drm/msm/dp/dp_power.c +++ b/drivers/gpu/drm/msm/dp/dp_power.c @@ -208,6 +208,9 @@ static int dp_power_clk_set_rate(struct dp_power_private *power, int dp_power_clk_status(struct dp_power *dp_power, enum dp_pm_type pm_type) { + DRM_DEBUG_DP("core_clk_on=%d link_clk_on=%d stream_clk_on=%d\n", + dp_power->core_clks_on, dp_power->link_clks_on, dp_power->stream_clks_on); + if (pm_type == DP_CORE_PM) return dp_power->core_clks_on; diff --git a/drivers/gpu/drm/msm/dsi/dsi.c b/drivers/gpu/drm/msm/dsi/dsi.c index 75afc12a7b25..614dc7f26f2c 100644 --- a/drivers/gpu/drm/msm/dsi/dsi.c +++ b/drivers/gpu/drm/msm/dsi/dsi.c @@ -13,6 +13,13 @@ struct drm_encoder *msm_dsi_get_encoder(struct msm_dsi *msm_dsi) return msm_dsi->encoder; } +bool msm_dsi_is_cmd_mode(struct msm_dsi *msm_dsi) +{ + unsigned long host_flags = msm_dsi_host_get_mode_flags(msm_dsi->host); + + return !(host_flags & MIPI_DSI_MODE_VIDEO); +} + static int dsi_get_phy(struct msm_dsi *msm_dsi) { struct platform_device *pdev = msm_dsi->pdev; @@ -26,8 +33,10 @@ static int dsi_get_phy(struct msm_dsi *msm_dsi) } phy_pdev = of_find_device_by_node(phy_node); - if (phy_pdev) + if (phy_pdev) { msm_dsi->phy = platform_get_drvdata(phy_pdev); + msm_dsi->phy_dev = &phy_pdev->dev; + } of_node_put(phy_node); @@ -36,8 +45,6 @@ static int dsi_get_phy(struct msm_dsi *msm_dsi) return -EPROBE_DEFER; } - msm_dsi->phy_dev = get_device(&phy_pdev->dev); - return 0; } @@ -244,8 +251,6 @@ int msm_dsi_modeset_init(struct msm_dsi *msm_dsi, struct drm_device *dev, goto fail; } - msm_dsi_manager_setup_encoder(msm_dsi->id); - priv->bridges[priv->num_bridges++] = msm_dsi->bridge; priv->connectors[priv->num_connectors++] = msm_dsi->connector; diff --git a/drivers/gpu/drm/msm/dsi/dsi.h b/drivers/gpu/drm/msm/dsi/dsi.h index 9b8e9b07eced..b50db91cb8a7 100644 --- a/drivers/gpu/drm/msm/dsi/dsi.h +++ b/drivers/gpu/drm/msm/dsi/dsi.h @@ -80,10 +80,10 @@ struct drm_connector *msm_dsi_manager_connector_init(u8 id); struct drm_connector *msm_dsi_manager_ext_bridge_init(u8 id); int msm_dsi_manager_cmd_xfer(int id, const struct mipi_dsi_msg *msg); bool msm_dsi_manager_cmd_xfer_trigger(int id, u32 dma_base, u32 len); -void msm_dsi_manager_setup_encoder(int id); int msm_dsi_manager_register(struct msm_dsi *msm_dsi); void msm_dsi_manager_unregister(struct msm_dsi *msm_dsi); bool msm_dsi_manager_validate_current_config(u8 id); +void msm_dsi_manager_tpg_enable(void); /* msm dsi */ static inline bool msm_dsi_device_connected(struct msm_dsi *msm_dsi) @@ -109,7 +109,7 @@ int msm_dsi_host_enable(struct mipi_dsi_host *host); int msm_dsi_host_disable(struct mipi_dsi_host *host); int msm_dsi_host_power_on(struct mipi_dsi_host *host, struct msm_dsi_phy_shared_timings *phy_shared_timings, - bool is_dual_dsi); + bool is_bonded_dsi, struct msm_dsi_phy *phy); int msm_dsi_host_power_off(struct mipi_dsi_host *host); int msm_dsi_host_set_display_mode(struct mipi_dsi_host *host, const struct drm_display_mode *mode); @@ -123,7 +123,7 @@ int msm_dsi_host_set_src_pll(struct mipi_dsi_host *host, void msm_dsi_host_reset_phy(struct mipi_dsi_host *host); void msm_dsi_host_get_phy_clk_req(struct mipi_dsi_host *host, struct msm_dsi_phy_clk_request *clk_req, - bool is_dual_dsi); + bool is_bonded_dsi); void msm_dsi_host_destroy(struct mipi_dsi_host *host); int msm_dsi_host_modeset_init(struct mipi_dsi_host *host, struct drm_device *dev); @@ -145,9 +145,11 @@ int dsi_dma_base_get_6g(struct msm_dsi_host *msm_host, uint64_t *iova); int dsi_dma_base_get_v2(struct msm_dsi_host *msm_host, uint64_t *iova); int dsi_clk_init_v2(struct msm_dsi_host *msm_host); int dsi_clk_init_6g_v2(struct msm_dsi_host *msm_host); -int dsi_calc_clk_rate_v2(struct msm_dsi_host *msm_host, bool is_dual_dsi); -int dsi_calc_clk_rate_6g(struct msm_dsi_host *msm_host, bool is_dual_dsi); +int dsi_calc_clk_rate_v2(struct msm_dsi_host *msm_host, bool is_bonded_dsi); +int dsi_calc_clk_rate_6g(struct msm_dsi_host *msm_host, bool is_bonded_dsi); void msm_dsi_host_snapshot(struct msm_disp_state *disp_state, struct mipi_dsi_host *host); +void msm_dsi_host_test_pattern_en(struct mipi_dsi_host *host); + /* dsi phy */ struct msm_dsi_phy; struct msm_dsi_phy_shared_timings { @@ -164,10 +166,9 @@ struct msm_dsi_phy_clk_request { void msm_dsi_phy_driver_register(void); void msm_dsi_phy_driver_unregister(void); int msm_dsi_phy_enable(struct msm_dsi_phy *phy, - struct msm_dsi_phy_clk_request *clk_req); + struct msm_dsi_phy_clk_request *clk_req, + struct msm_dsi_phy_shared_timings *shared_timings); void msm_dsi_phy_disable(struct msm_dsi_phy *phy); -void msm_dsi_phy_get_shared_timings(struct msm_dsi_phy *phy, - struct msm_dsi_phy_shared_timings *shared_timing); void msm_dsi_phy_set_usecase(struct msm_dsi_phy *phy, enum msm_dsi_phy_usecase uc); int msm_dsi_phy_get_clk_provider(struct msm_dsi_phy *phy, @@ -175,6 +176,7 @@ int msm_dsi_phy_get_clk_provider(struct msm_dsi_phy *phy, void msm_dsi_phy_pll_save_state(struct msm_dsi_phy *phy); int msm_dsi_phy_pll_restore_state(struct msm_dsi_phy *phy); void msm_dsi_phy_snapshot(struct msm_disp_state *disp_state, struct msm_dsi_phy *phy); +bool msm_dsi_phy_set_continuous_clock(struct msm_dsi_phy *phy, bool enable); #endif /* __DSI_CONNECTOR_H__ */ diff --git a/drivers/gpu/drm/msm/dsi/dsi.xml.h b/drivers/gpu/drm/msm/dsi/dsi.xml.h index eadbcc78fd72..49b551ad1bff 100644 --- a/drivers/gpu/drm/msm/dsi/dsi.xml.h +++ b/drivers/gpu/drm/msm/dsi/dsi.xml.h @@ -105,6 +105,32 @@ enum dsi_lane_swap { LANE_SWAP_3210 = 7, }; +enum video_config_bpp { + VIDEO_CONFIG_18BPP = 0, + VIDEO_CONFIG_24BPP = 1, +}; + +enum video_pattern_sel { + VID_PRBS = 0, + VID_INCREMENTAL = 1, + VID_FIXED = 2, + VID_MDSS_GENERAL_PATTERN = 3, +}; + +enum cmd_mdp_stream0_pattern_sel { + CMD_MDP_PRBS = 0, + CMD_MDP_INCREMENTAL = 1, + CMD_MDP_FIXED = 2, + CMD_MDP_MDSS_GENERAL_PATTERN = 3, +}; + +enum cmd_dma_pattern_sel { + CMD_DMA_PRBS = 0, + CMD_DMA_INCREMENTAL = 1, + CMD_DMA_FIXED = 2, + CMD_DMA_CUSTOM_PATTERN_DMA_FIFO = 3, +}; + #define DSI_IRQ_CMD_DMA_DONE 0x00000001 #define DSI_IRQ_MASK_CMD_DMA_DONE 0x00000002 #define DSI_IRQ_CMD_MDP_DONE 0x00000100 @@ -518,6 +544,7 @@ static inline uint32_t DSI_CLKOUT_TIMING_CTRL_T_CLK_POST(uint32_t val) #define DSI_LANE_STATUS_DLN0_DIRECTION 0x00010000 #define REG_DSI_LANE_CTRL 0x000000a8 +#define DSI_LANE_CTRL_HS_REQ_SEL_PHY 0x01000000 #define DSI_LANE_CTRL_CLKLN_HS_FORCE_REQUEST 0x10000000 #define REG_DSI_LANE_SWAP_CTRL 0x000000ac @@ -564,6 +591,53 @@ static inline uint32_t DSI_LANE_SWAP_CTRL_DLN_SWAP_SEL(enum dsi_lane_swap val) #define REG_DSI_PHY_RESET 0x00000128 #define DSI_PHY_RESET_RESET 0x00000001 +#define REG_DSI_TEST_PATTERN_GEN_VIDEO_INIT_VAL 0x00000160 + +#define REG_DSI_TPG_MAIN_CONTROL 0x00000198 +#define DSI_TPG_MAIN_CONTROL_CHECKERED_RECTANGLE_PATTERN 0x00000100 + +#define REG_DSI_TPG_VIDEO_CONFIG 0x000001a0 +#define DSI_TPG_VIDEO_CONFIG_BPP__MASK 0x00000003 +#define DSI_TPG_VIDEO_CONFIG_BPP__SHIFT 0 +static inline uint32_t DSI_TPG_VIDEO_CONFIG_BPP(enum video_config_bpp val) +{ + return ((val) << DSI_TPG_VIDEO_CONFIG_BPP__SHIFT) & DSI_TPG_VIDEO_CONFIG_BPP__MASK; +} +#define DSI_TPG_VIDEO_CONFIG_RGB 0x00000004 + +#define REG_DSI_TEST_PATTERN_GEN_CTRL 0x00000158 +#define DSI_TEST_PATTERN_GEN_CTRL_CMD_DMA_PATTERN_SEL__MASK 0x00030000 +#define DSI_TEST_PATTERN_GEN_CTRL_CMD_DMA_PATTERN_SEL__SHIFT 16 +static inline uint32_t DSI_TEST_PATTERN_GEN_CTRL_CMD_DMA_PATTERN_SEL(enum cmd_dma_pattern_sel val) +{ + return ((val) << DSI_TEST_PATTERN_GEN_CTRL_CMD_DMA_PATTERN_SEL__SHIFT) & DSI_TEST_PATTERN_GEN_CTRL_CMD_DMA_PATTERN_SEL__MASK; +} +#define DSI_TEST_PATTERN_GEN_CTRL_CMD_MDP_STREAM0_PATTERN_SEL__MASK 0x00000300 +#define DSI_TEST_PATTERN_GEN_CTRL_CMD_MDP_STREAM0_PATTERN_SEL__SHIFT 8 +static inline uint32_t DSI_TEST_PATTERN_GEN_CTRL_CMD_MDP_STREAM0_PATTERN_SEL(enum cmd_mdp_stream0_pattern_sel val) +{ + return ((val) << DSI_TEST_PATTERN_GEN_CTRL_CMD_MDP_STREAM0_PATTERN_SEL__SHIFT) & DSI_TEST_PATTERN_GEN_CTRL_CMD_MDP_STREAM0_PATTERN_SEL__MASK; +} +#define DSI_TEST_PATTERN_GEN_CTRL_VIDEO_PATTERN_SEL__MASK 0x00000030 +#define DSI_TEST_PATTERN_GEN_CTRL_VIDEO_PATTERN_SEL__SHIFT 4 +static inline uint32_t DSI_TEST_PATTERN_GEN_CTRL_VIDEO_PATTERN_SEL(enum video_pattern_sel val) +{ + return ((val) << DSI_TEST_PATTERN_GEN_CTRL_VIDEO_PATTERN_SEL__SHIFT) & DSI_TEST_PATTERN_GEN_CTRL_VIDEO_PATTERN_SEL__MASK; +} +#define DSI_TEST_PATTERN_GEN_CTRL_TPG_DMA_FIFO_MODE 0x00000004 +#define DSI_TEST_PATTERN_GEN_CTRL_CMD_DMA_TPG_EN 0x00000002 +#define DSI_TEST_PATTERN_GEN_CTRL_EN 0x00000001 + +#define REG_DSI_TEST_PATTERN_GEN_CMD_MDP_INIT_VAL0 0x00000168 + +#define REG_DSI_TEST_PATTERN_GEN_CMD_STREAM0_TRIGGER 0x00000180 +#define DSI_TEST_PATTERN_GEN_CMD_STREAM0_TRIGGER_SW_TRIGGER 0x00000001 + +#define REG_DSI_TPG_MAIN_CONTROL2 0x0000019c +#define DSI_TPG_MAIN_CONTROL2_CMD_MDP0_CHECKERED_RECTANGLE_PATTERN 0x00000080 +#define DSI_TPG_MAIN_CONTROL2_CMD_MDP1_CHECKERED_RECTANGLE_PATTERN 0x00010000 +#define DSI_TPG_MAIN_CONTROL2_CMD_MDP2_CHECKERED_RECTANGLE_PATTERN 0x02000000 + #define REG_DSI_T_CLK_PRE_EXTEND 0x0000017c #define DSI_T_CLK_PRE_EXTEND_INC_BY_2_BYTECLK 0x00000001 diff --git a/drivers/gpu/drm/msm/dsi/dsi_cfg.c b/drivers/gpu/drm/msm/dsi/dsi_cfg.c index f3f1c03c7db9..96bbc8b6d009 100644 --- a/drivers/gpu/drm/msm/dsi/dsi_cfg.c +++ b/drivers/gpu/drm/msm/dsi/dsi_cfg.c @@ -32,9 +32,8 @@ static const char * const dsi_6g_bus_clk_names[] = { static const struct msm_dsi_config msm8974_apq8084_dsi_cfg = { .io_offset = DSI_6G_REG_SHIFT, .reg_cfg = { - .num = 4, + .num = 3, .regs = { - {"gdsc", -1, -1}, {"vdd", 150000, 100}, /* 3.0 V */ {"vdda", 100000, 100}, /* 1.2 V */ {"vddio", 100000, 100}, /* 1.8 V */ @@ -53,9 +52,8 @@ static const char * const dsi_8916_bus_clk_names[] = { static const struct msm_dsi_config msm8916_dsi_cfg = { .io_offset = DSI_6G_REG_SHIFT, .reg_cfg = { - .num = 3, + .num = 2, .regs = { - {"gdsc", -1, -1}, {"vdda", 100000, 100}, /* 1.2 V */ {"vddio", 100000, 100}, /* 1.8 V */ }, @@ -73,9 +71,8 @@ static const char * const dsi_8976_bus_clk_names[] = { static const struct msm_dsi_config msm8976_dsi_cfg = { .io_offset = DSI_6G_REG_SHIFT, .reg_cfg = { - .num = 3, + .num = 2, .regs = { - {"gdsc", -1, -1}, {"vdda", 100000, 100}, /* 1.2 V */ {"vddio", 100000, 100}, /* 1.8 V */ }, @@ -89,9 +86,8 @@ static const struct msm_dsi_config msm8976_dsi_cfg = { static const struct msm_dsi_config msm8994_dsi_cfg = { .io_offset = DSI_6G_REG_SHIFT, .reg_cfg = { - .num = 7, + .num = 6, .regs = { - {"gdsc", -1, -1}, {"vdda", 100000, 100}, /* 1.25 V */ {"vddio", 100000, 100}, /* 1.8 V */ {"vcca", 10000, 100}, /* 1.0 V */ @@ -154,7 +150,6 @@ static const struct msm_dsi_config sdm660_dsi_cfg = { .reg_cfg = { .num = 2, .regs = { - {"vdd", 73400, 32 }, /* 0.9 V */ {"vdda", 12560, 4 }, /* 1.2 V */ }, }, @@ -200,6 +195,24 @@ static const struct msm_dsi_config sc7180_dsi_cfg = { .num_dsi = 1, }; +static const char * const dsi_sc7280_bus_clk_names[] = { + "iface", "bus", +}; + +static const struct msm_dsi_config sc7280_dsi_cfg = { + .io_offset = DSI_6G_REG_SHIFT, + .reg_cfg = { + .num = 1, + .regs = { + {"vdda", 8350, 0 }, /* 1.2 V */ + }, + }, + .bus_clk_names = dsi_sc7280_bus_clk_names, + .num_bus_clks = ARRAY_SIZE(dsi_sc7280_bus_clk_names), + .io_start = { 0xae94000 }, + .num_dsi = 1, +}; + static const struct msm_dsi_host_cfg_ops msm_dsi_v2_host_ops = { .link_clk_set_rate = dsi_link_clk_set_rate_v2, .link_clk_enable = dsi_link_clk_enable_v2, @@ -267,6 +280,8 @@ static const struct msm_dsi_cfg_handler dsi_cfg_handlers[] = { &sdm845_dsi_cfg, &msm_dsi_6g_v2_host_ops}, {MSM_DSI_VER_MAJOR_6G, MSM_DSI_6G_VER_MINOR_V2_4_1, &sc7180_dsi_cfg, &msm_dsi_6g_v2_host_ops}, + {MSM_DSI_VER_MAJOR_6G, MSM_DSI_6G_VER_MINOR_V2_5_0, + &sc7280_dsi_cfg, &msm_dsi_6g_v2_host_ops}, }; const struct msm_dsi_cfg_handler *msm_dsi_cfg_get(u32 major, u32 minor) diff --git a/drivers/gpu/drm/msm/dsi/dsi_cfg.h b/drivers/gpu/drm/msm/dsi/dsi_cfg.h index ade9b609c7d9..41e99a9fb5de 100644 --- a/drivers/gpu/drm/msm/dsi/dsi_cfg.h +++ b/drivers/gpu/drm/msm/dsi/dsi_cfg.h @@ -24,6 +24,7 @@ #define MSM_DSI_6G_VER_MINOR_V2_3_0 0x20030000 #define MSM_DSI_6G_VER_MINOR_V2_4_0 0x20040000 #define MSM_DSI_6G_VER_MINOR_V2_4_1 0x20040001 +#define MSM_DSI_6G_VER_MINOR_V2_5_0 0x20050000 #define MSM_DSI_V2_VER_MINOR_8064 0x0 @@ -47,7 +48,7 @@ struct msm_dsi_host_cfg_ops { void* (*tx_buf_get)(struct msm_dsi_host *msm_host); void (*tx_buf_put)(struct msm_dsi_host *msm_host); int (*dma_base_get)(struct msm_dsi_host *msm_host, uint64_t *iova); - int (*calc_clk_rate)(struct msm_dsi_host *msm_host, bool is_dual_dsi); + int (*calc_clk_rate)(struct msm_dsi_host *msm_host, bool is_bonded_dsi); }; struct msm_dsi_cfg_handler { diff --git a/drivers/gpu/drm/msm/dsi/dsi_host.c b/drivers/gpu/drm/msm/dsi/dsi_host.c index ed504fe5074f..e269df285136 100644 --- a/drivers/gpu/drm/msm/dsi/dsi_host.c +++ b/drivers/gpu/drm/msm/dsi/dsi_host.c @@ -27,6 +27,7 @@ #include "dsi_cfg.h" #include "msm_kms.h" #include "msm_gem.h" +#include "phy/dsi_phy.h" #define DSI_RESET_TOGGLE_DELAY_MS 20 @@ -167,6 +168,9 @@ struct msm_dsi_host { int dlane_swap; int num_data_lanes; + /* from phy DT */ + bool cphy_mode; + u32 dma_cmd_ctrl_restore; bool registered; @@ -203,35 +207,22 @@ static const struct msm_dsi_cfg_handler *dsi_get_config( { const struct msm_dsi_cfg_handler *cfg_hnd = NULL; struct device *dev = &msm_host->pdev->dev; - struct regulator *gdsc_reg; struct clk *ahb_clk; int ret; u32 major = 0, minor = 0; - gdsc_reg = regulator_get(dev, "gdsc"); - if (IS_ERR(gdsc_reg)) { - pr_err("%s: cannot get gdsc\n", __func__); - goto exit; - } - ahb_clk = msm_clk_get(msm_host->pdev, "iface"); if (IS_ERR(ahb_clk)) { pr_err("%s: cannot get interface clock\n", __func__); - goto put_gdsc; + goto exit; } pm_runtime_get_sync(dev); - ret = regulator_enable(gdsc_reg); - if (ret) { - pr_err("%s: unable to enable gdsc\n", __func__); - goto put_gdsc; - } - ret = clk_prepare_enable(ahb_clk); if (ret) { pr_err("%s: unable to enable ahb_clk\n", __func__); - goto disable_gdsc; + goto runtime_put; } ret = dsi_get_version(msm_host->ctrl_base, &major, &minor); @@ -246,11 +237,8 @@ static const struct msm_dsi_cfg_handler *dsi_get_config( disable_clks: clk_disable_unprepare(ahb_clk); -disable_gdsc: - regulator_disable(gdsc_reg); +runtime_put: pm_runtime_put_sync(dev); -put_gdsc: - regulator_put(gdsc_reg); exit: return cfg_hnd; } @@ -510,6 +498,7 @@ int msm_dsi_runtime_resume(struct device *dev) int dsi_link_clk_set_rate_6g(struct msm_dsi_host *msm_host) { + u32 byte_intf_rate; int ret; DBG("Set clk rates: pclk=%d, byteclk=%d", @@ -529,8 +518,13 @@ int dsi_link_clk_set_rate_6g(struct msm_dsi_host *msm_host) } if (msm_host->byte_intf_clk) { - ret = clk_set_rate(msm_host->byte_intf_clk, - msm_host->byte_clk_rate / 2); + /* For CPHY, byte_intf_clk is same as byte_clk */ + if (msm_host->cphy_mode) + byte_intf_rate = msm_host->byte_clk_rate; + else + byte_intf_rate = msm_host->byte_clk_rate / 2; + + ret = clk_set_rate(msm_host->byte_intf_clk, byte_intf_rate); if (ret) { pr_err("%s: Failed to set rate byte intf clk, %d\n", __func__, ret); @@ -679,7 +673,7 @@ void dsi_link_clk_disable_v2(struct msm_dsi_host *msm_host) clk_disable_unprepare(msm_host->byte_clk); } -static u32 dsi_get_pclk_rate(struct msm_dsi_host *msm_host, bool is_dual_dsi) +static u32 dsi_get_pclk_rate(struct msm_dsi_host *msm_host, bool is_bonded_dsi) { struct drm_display_mode *mode = msm_host->mode; u32 pclk_rate; @@ -687,22 +681,22 @@ static u32 dsi_get_pclk_rate(struct msm_dsi_host *msm_host, bool is_dual_dsi) pclk_rate = mode->clock * 1000; /* - * For dual DSI mode, the current DRM mode has the complete width of the + * For bonded DSI mode, the current DRM mode has the complete width of the * panel. Since, the complete panel is driven by two DSI controllers, * the clock rates have to be split between the two dsi controllers. * Adjust the byte and pixel clock rates for each dsi host accordingly. */ - if (is_dual_dsi) + if (is_bonded_dsi) pclk_rate /= 2; return pclk_rate; } -static void dsi_calc_pclk(struct msm_dsi_host *msm_host, bool is_dual_dsi) +static void dsi_calc_pclk(struct msm_dsi_host *msm_host, bool is_bonded_dsi) { u8 lanes = msm_host->lanes; u32 bpp = dsi_get_bpp(msm_host->format); - u32 pclk_rate = dsi_get_pclk_rate(msm_host, is_dual_dsi); + u32 pclk_rate = dsi_get_pclk_rate(msm_host, is_bonded_dsi); u64 pclk_bpp = (u64)pclk_rate * bpp; if (lanes == 0) { @@ -710,7 +704,11 @@ static void dsi_calc_pclk(struct msm_dsi_host *msm_host, bool is_dual_dsi) lanes = 1; } - do_div(pclk_bpp, (8 * lanes)); + /* CPHY "byte_clk" is in units of 16 bits */ + if (msm_host->cphy_mode) + do_div(pclk_bpp, (16 * lanes)); + else + do_div(pclk_bpp, (8 * lanes)); msm_host->pixel_clk_rate = pclk_rate; msm_host->byte_clk_rate = pclk_bpp; @@ -720,28 +718,28 @@ static void dsi_calc_pclk(struct msm_dsi_host *msm_host, bool is_dual_dsi) } -int dsi_calc_clk_rate_6g(struct msm_dsi_host *msm_host, bool is_dual_dsi) +int dsi_calc_clk_rate_6g(struct msm_dsi_host *msm_host, bool is_bonded_dsi) { if (!msm_host->mode) { pr_err("%s: mode not set\n", __func__); return -EINVAL; } - dsi_calc_pclk(msm_host, is_dual_dsi); + dsi_calc_pclk(msm_host, is_bonded_dsi); msm_host->esc_clk_rate = clk_get_rate(msm_host->esc_clk); return 0; } -int dsi_calc_clk_rate_v2(struct msm_dsi_host *msm_host, bool is_dual_dsi) +int dsi_calc_clk_rate_v2(struct msm_dsi_host *msm_host, bool is_bonded_dsi) { u32 bpp = dsi_get_bpp(msm_host->format); u64 pclk_bpp; unsigned int esc_mhz, esc_div; unsigned long byte_mhz; - dsi_calc_pclk(msm_host, is_dual_dsi); + dsi_calc_pclk(msm_host, is_bonded_dsi); - pclk_bpp = (u64)dsi_get_pclk_rate(msm_host, is_dual_dsi) * bpp; + pclk_bpp = (u64)dsi_get_pclk_rate(msm_host, is_bonded_dsi) * bpp; do_div(pclk_bpp, 8); msm_host->src_clk_rate = pclk_bpp; @@ -834,7 +832,7 @@ static inline enum dsi_cmd_dst_format dsi_get_cmd_fmt( } static void dsi_ctrl_config(struct msm_dsi_host *msm_host, bool enable, - struct msm_dsi_phy_shared_timings *phy_shared_timings) + struct msm_dsi_phy_shared_timings *phy_shared_timings, struct msm_dsi_phy *phy) { u32 flags = msm_host->mode_flags; enum mipi_dsi_pixel_format mipi_fmt = msm_host->format; @@ -849,11 +847,11 @@ static void dsi_ctrl_config(struct msm_dsi_host *msm_host, bool enable, if (flags & MIPI_DSI_MODE_VIDEO) { if (flags & MIPI_DSI_MODE_VIDEO_HSE) data |= DSI_VID_CFG0_PULSE_MODE_HSA_HE; - if (flags & MIPI_DSI_MODE_VIDEO_HFP) + if (flags & MIPI_DSI_MODE_VIDEO_NO_HFP) data |= DSI_VID_CFG0_HFP_POWER_STOP; - if (flags & MIPI_DSI_MODE_VIDEO_HBP) + if (flags & MIPI_DSI_MODE_VIDEO_NO_HBP) data |= DSI_VID_CFG0_HBP_POWER_STOP; - if (flags & MIPI_DSI_MODE_VIDEO_HSA) + if (flags & MIPI_DSI_MODE_VIDEO_NO_HSA) data |= DSI_VID_CFG0_HSA_POWER_STOP; /* Always set low power stop mode for BLLP * to let command engine send packets @@ -908,7 +906,7 @@ static void dsi_ctrl_config(struct msm_dsi_host *msm_host, bool enable, DSI_T_CLK_PRE_EXTEND_INC_BY_2_BYTECLK); data = 0; - if (!(flags & MIPI_DSI_MODE_EOT_PACKET)) + if (!(flags & MIPI_DSI_MODE_NO_EOT_PACKET)) data |= DSI_EOT_PACKET_CTRL_TX_EOT_APPEND; dsi_write(msm_host, REG_DSI_EOT_PACKET_CTRL, data); @@ -929,6 +927,10 @@ static void dsi_ctrl_config(struct msm_dsi_host *msm_host, bool enable, if (!(flags & MIPI_DSI_CLOCK_NON_CONTINUOUS)) { lane_ctrl = dsi_read(msm_host, REG_DSI_LANE_CTRL); + + if (msm_dsi_phy_set_continuous_clock(phy, enable)) + lane_ctrl &= ~DSI_LANE_CTRL_HS_REQ_SEL_PHY; + dsi_write(msm_host, REG_DSI_LANE_CTRL, lane_ctrl | DSI_LANE_CTRL_CLKLN_HS_FORCE_REQUEST); } @@ -936,9 +938,12 @@ static void dsi_ctrl_config(struct msm_dsi_host *msm_host, bool enable, data |= DSI_CTRL_ENABLE; dsi_write(msm_host, REG_DSI_CTRL, data); + + if (msm_host->cphy_mode) + dsi_write(msm_host, REG_DSI_CPHY_MODE_CTRL, BIT(0)); } -static void dsi_timing_setup(struct msm_dsi_host *msm_host, bool is_dual_dsi) +static void dsi_timing_setup(struct msm_dsi_host *msm_host, bool is_bonded_dsi) { struct drm_display_mode *mode = msm_host->mode; u32 hs_start = 0, vs_start = 0; /* take sync start as 0 */ @@ -956,13 +961,13 @@ static void dsi_timing_setup(struct msm_dsi_host *msm_host, bool is_dual_dsi) DBG(""); /* - * For dual DSI mode, the current DRM mode has + * For bonded DSI mode, the current DRM mode has * the complete width of the panel. Since, the complete * panel is driven by two DSI controllers, the horizontal * timings have to be split between the two dsi controllers. * Adjust the DSI host timing values accordingly. */ - if (is_dual_dsi) { + if (is_bonded_dsi) { h_total /= 2; hs_end /= 2; ha_start /= 2; @@ -2226,6 +2231,8 @@ int msm_dsi_host_set_src_pll(struct mipi_dsi_host *host, struct clk *byte_clk_provider, *pixel_clk_provider; int ret; + msm_host->cphy_mode = src_phy->cphy_mode; + ret = msm_dsi_phy_get_clk_provider(src_phy, &byte_clk_provider, &pixel_clk_provider); if (ret) { @@ -2285,19 +2292,26 @@ void msm_dsi_host_reset_phy(struct mipi_dsi_host *host) void msm_dsi_host_get_phy_clk_req(struct mipi_dsi_host *host, struct msm_dsi_phy_clk_request *clk_req, - bool is_dual_dsi) + bool is_bonded_dsi) { struct msm_dsi_host *msm_host = to_msm_dsi_host(host); const struct msm_dsi_cfg_handler *cfg_hnd = msm_host->cfg_hnd; int ret; - ret = cfg_hnd->ops->calc_clk_rate(msm_host, is_dual_dsi); + ret = cfg_hnd->ops->calc_clk_rate(msm_host, is_bonded_dsi); if (ret) { pr_err("%s: unable to calc clk rate, %d\n", __func__, ret); return; } - clk_req->bitclk_rate = msm_host->byte_clk_rate * 8; + /* CPHY transmits 16 bits over 7 clock cycles + * "byte_clk" is in units of 16-bits (see dsi_calc_pclk), + * so multiply by 7 to get the "bitclk rate" + */ + if (msm_host->cphy_mode) + clk_req->bitclk_rate = msm_host->byte_clk_rate * 7; + else + clk_req->bitclk_rate = msm_host->byte_clk_rate * 8; clk_req->escclk_rate = msm_host->esc_clk_rate; } @@ -2354,7 +2368,7 @@ static void msm_dsi_sfpb_config(struct msm_dsi_host *msm_host, bool enable) int msm_dsi_host_power_on(struct mipi_dsi_host *host, struct msm_dsi_phy_shared_timings *phy_shared_timings, - bool is_dual_dsi) + bool is_bonded_dsi, struct msm_dsi_phy *phy) { struct msm_dsi_host *msm_host = to_msm_dsi_host(host); const struct msm_dsi_cfg_handler *cfg_hnd = msm_host->cfg_hnd; @@ -2392,9 +2406,9 @@ int msm_dsi_host_power_on(struct mipi_dsi_host *host, goto fail_disable_clk; } - dsi_timing_setup(msm_host, is_dual_dsi); + dsi_timing_setup(msm_host, is_bonded_dsi); dsi_sw_reset(msm_host); - dsi_ctrl_config(msm_host, true, phy_shared_timings); + dsi_ctrl_config(msm_host, true, phy_shared_timings, phy); if (msm_host->disp_en_gpio) gpiod_set_value(msm_host->disp_en_gpio, 1); @@ -2425,7 +2439,7 @@ int msm_dsi_host_power_off(struct mipi_dsi_host *host) goto unlock_ret; } - dsi_ctrl_config(msm_host, false, NULL); + dsi_ctrl_config(msm_host, false, NULL, NULL); if (msm_host->disp_en_gpio) gpiod_set_value(msm_host->disp_en_gpio, 0); @@ -2495,3 +2509,64 @@ void msm_dsi_host_snapshot(struct msm_disp_state *disp_state, struct mipi_dsi_ho pm_runtime_put_sync(&msm_host->pdev->dev); } + +static void msm_dsi_host_video_test_pattern_setup(struct msm_dsi_host *msm_host) +{ + u32 reg; + + reg = dsi_read(msm_host, REG_DSI_TEST_PATTERN_GEN_CTRL); + + dsi_write(msm_host, REG_DSI_TEST_PATTERN_GEN_VIDEO_INIT_VAL, 0xff); + /* draw checkered rectangle pattern */ + dsi_write(msm_host, REG_DSI_TPG_MAIN_CONTROL, + DSI_TPG_MAIN_CONTROL_CHECKERED_RECTANGLE_PATTERN); + /* use 24-bit RGB test pttern */ + dsi_write(msm_host, REG_DSI_TPG_VIDEO_CONFIG, + DSI_TPG_VIDEO_CONFIG_BPP(VIDEO_CONFIG_24BPP) | + DSI_TPG_VIDEO_CONFIG_RGB); + + reg |= DSI_TEST_PATTERN_GEN_CTRL_VIDEO_PATTERN_SEL(VID_MDSS_GENERAL_PATTERN); + dsi_write(msm_host, REG_DSI_TEST_PATTERN_GEN_CTRL, reg); + + DBG("Video test pattern setup done\n"); +} + +static void msm_dsi_host_cmd_test_pattern_setup(struct msm_dsi_host *msm_host) +{ + u32 reg; + + reg = dsi_read(msm_host, REG_DSI_TEST_PATTERN_GEN_CTRL); + + /* initial value for test pattern */ + dsi_write(msm_host, REG_DSI_TEST_PATTERN_GEN_CMD_MDP_INIT_VAL0, 0xff); + + reg |= DSI_TEST_PATTERN_GEN_CTRL_CMD_MDP_STREAM0_PATTERN_SEL(CMD_MDP_MDSS_GENERAL_PATTERN); + + dsi_write(msm_host, REG_DSI_TEST_PATTERN_GEN_CTRL, reg); + /* draw checkered rectangle pattern */ + dsi_write(msm_host, REG_DSI_TPG_MAIN_CONTROL2, + DSI_TPG_MAIN_CONTROL2_CMD_MDP0_CHECKERED_RECTANGLE_PATTERN); + + DBG("Cmd test pattern setup done\n"); +} + +void msm_dsi_host_test_pattern_en(struct mipi_dsi_host *host) +{ + struct msm_dsi_host *msm_host = to_msm_dsi_host(host); + bool is_video_mode = !!(msm_host->mode_flags & MIPI_DSI_MODE_VIDEO); + u32 reg; + + if (is_video_mode) + msm_dsi_host_video_test_pattern_setup(msm_host); + else + msm_dsi_host_cmd_test_pattern_setup(msm_host); + + reg = dsi_read(msm_host, REG_DSI_TEST_PATTERN_GEN_CTRL); + /* enable the test pattern generator */ + dsi_write(msm_host, REG_DSI_TEST_PATTERN_GEN_CTRL, (reg | DSI_TEST_PATTERN_GEN_CTRL_EN)); + + /* for command mode need to trigger one frame from tpg */ + if (!is_video_mode) + dsi_write(msm_host, REG_DSI_TEST_PATTERN_GEN_CMD_STREAM0_TRIGGER, + DSI_TEST_PATTERN_GEN_CMD_STREAM0_TRIGGER_SW_TRIGGER); +} diff --git a/drivers/gpu/drm/msm/dsi/dsi_manager.c b/drivers/gpu/drm/msm/dsi/dsi_manager.c index 4ebfedc4a9ac..c41d39f5b7cf 100644 --- a/drivers/gpu/drm/msm/dsi/dsi_manager.c +++ b/drivers/gpu/drm/msm/dsi/dsi_manager.c @@ -21,14 +21,14 @@ struct msm_dsi_manager { struct msm_dsi *dsi[DSI_MAX]; - bool is_dual_dsi; + bool is_bonded_dsi; bool is_sync_needed; int master_dsi_link_id; }; static struct msm_dsi_manager msm_dsim_glb; -#define IS_DUAL_DSI() (msm_dsim_glb.is_dual_dsi) +#define IS_BONDED_DSI() (msm_dsim_glb.is_bonded_dsi) #define IS_SYNC_NEEDED() (msm_dsim_glb.is_sync_needed) #define IS_MASTER_DSI_LINK(id) (msm_dsim_glb.master_dsi_link_id == id) @@ -42,18 +42,17 @@ static inline struct msm_dsi *dsi_mgr_get_other_dsi(int id) return msm_dsim_glb.dsi[(id + 1) % DSI_MAX]; } -static int dsi_mgr_parse_dual_dsi(struct device_node *np, int id) +static int dsi_mgr_parse_of(struct device_node *np, int id) { struct msm_dsi_manager *msm_dsim = &msm_dsim_glb; - /* We assume 2 dsi nodes have the same information of dual-dsi and - * sync-mode, and only one node specifies master in case of dual mode. + /* We assume 2 dsi nodes have the same information of bonded dsi and + * sync-mode, and only one node specifies master in case of bonded mode. */ - if (!msm_dsim->is_dual_dsi) - msm_dsim->is_dual_dsi = of_property_read_bool( - np, "qcom,dual-dsi-mode"); + if (!msm_dsim->is_bonded_dsi) + msm_dsim->is_bonded_dsi = of_property_read_bool(np, "qcom,dual-dsi-mode"); - if (msm_dsim->is_dual_dsi) { + if (msm_dsim->is_bonded_dsi) { if (of_property_read_bool(np, "qcom,master-dsi")) msm_dsim->master_dsi_link_id = id; if (!msm_dsim->is_sync_needed) @@ -72,7 +71,7 @@ static int dsi_mgr_setup_components(int id) struct msm_dsi *clk_slave_dsi = dsi_mgr_get_dsi(DSI_CLOCK_SLAVE); int ret; - if (!IS_DUAL_DSI()) { + if (!IS_BONDED_DSI()) { ret = msm_dsi_host_register(msm_dsi->host, true); if (ret) return ret; @@ -100,7 +99,7 @@ static int dsi_mgr_setup_components(int id) if (ret) return ret; - /* PLL0 is to drive both 2 DSI link clocks in Dual DSI mode. */ + /* PLL0 is to drive both 2 DSI link clocks in bonded DSI mode. */ msm_dsi_phy_set_usecase(clk_master_dsi->phy, MSM_DSI_PHY_MASTER); msm_dsi_phy_set_usecase(clk_slave_dsi->phy, @@ -119,12 +118,11 @@ static int enable_phy(struct msm_dsi *msm_dsi, { struct msm_dsi_phy_clk_request clk_req; int ret; - bool is_dual_dsi = IS_DUAL_DSI(); + bool is_bonded_dsi = IS_BONDED_DSI(); - msm_dsi_host_get_phy_clk_req(msm_dsi->host, &clk_req, is_dual_dsi); + msm_dsi_host_get_phy_clk_req(msm_dsi->host, &clk_req, is_bonded_dsi); - ret = msm_dsi_phy_enable(msm_dsi->phy, &clk_req); - msm_dsi_phy_get_shared_timings(msm_dsi->phy, shared_timings); + ret = msm_dsi_phy_enable(msm_dsi->phy, &clk_req, shared_timings); return ret; } @@ -138,12 +136,12 @@ dsi_mgr_phy_enable(int id, struct msm_dsi *sdsi = dsi_mgr_get_dsi(DSI_CLOCK_SLAVE); int ret; - /* In case of dual DSI, some registers in PHY1 have been programmed + /* In case of bonded DSI, some registers in PHY1 have been programmed * during PLL0 clock's set_rate. The PHY1 reset called by host1 here * will silently reset those PHY1 registers. Therefore we need to reset * and enable both PHYs before any PLL clock operation. */ - if (IS_DUAL_DSI() && mdsi && sdsi) { + if (IS_BONDED_DSI() && mdsi && sdsi) { if (!mdsi->phy_enabled && !sdsi->phy_enabled) { msm_dsi_host_reset_phy(mdsi->host); msm_dsi_host_reset_phy(sdsi->host); @@ -178,11 +176,11 @@ static void dsi_mgr_phy_disable(int id) struct msm_dsi *sdsi = dsi_mgr_get_dsi(DSI_CLOCK_SLAVE); /* disable DSI phy - * In dual-dsi configuration, the phy should be disabled for the + * In bonded dsi configuration, the phy should be disabled for the * first controller only when the second controller is disabled. */ msm_dsi->phy_enabled = false; - if (IS_DUAL_DSI() && mdsi && sdsi) { + if (IS_BONDED_DSI() && mdsi && sdsi) { if (!mdsi->phy_enabled && !sdsi->phy_enabled) { msm_dsi_phy_disable(sdsi->phy); msm_dsi_phy_disable(mdsi->phy); @@ -217,24 +215,6 @@ static int dsi_mgr_bridge_get_id(struct drm_bridge *bridge) return dsi_bridge->id; } -static bool dsi_mgr_is_cmd_mode(struct msm_dsi *msm_dsi) -{ - unsigned long host_flags = msm_dsi_host_get_mode_flags(msm_dsi->host); - return !(host_flags & MIPI_DSI_MODE_VIDEO); -} - -void msm_dsi_manager_setup_encoder(int id) -{ - struct msm_dsi *msm_dsi = dsi_mgr_get_dsi(id); - struct msm_drm_private *priv = msm_dsi->dev->dev_private; - struct msm_kms *kms = priv->kms; - struct drm_encoder *encoder = msm_dsi_get_encoder(msm_dsi); - - if (encoder && kms->funcs->set_encoder_mode) - kms->funcs->set_encoder_mode(kms, encoder, - dsi_mgr_is_cmd_mode(msm_dsi)); -} - static int msm_dsi_manager_panel_init(struct drm_connector *conn, u8 id) { struct msm_drm_private *priv = conn->dev->dev_private; @@ -244,7 +224,7 @@ static int msm_dsi_manager_panel_init(struct drm_connector *conn, u8 id) struct msm_dsi *master_dsi, *slave_dsi; struct drm_panel *panel; - if (IS_DUAL_DSI() && !IS_MASTER_DSI_LINK(id)) { + if (IS_BONDED_DSI() && !IS_MASTER_DSI_LINK(id)) { master_dsi = other_dsi; slave_dsi = msm_dsi; } else { @@ -253,7 +233,7 @@ static int msm_dsi_manager_panel_init(struct drm_connector *conn, u8 id) } /* - * There is only 1 panel in the global panel list for dual DSI mode. + * There is only 1 panel in the global panel list for bonded DSI mode. * Therefore slave dsi should get the drm_panel instance from master * dsi. */ @@ -264,20 +244,20 @@ static int msm_dsi_manager_panel_init(struct drm_connector *conn, u8 id) return PTR_ERR(panel); } - if (!panel || !IS_DUAL_DSI()) + if (!panel || !IS_BONDED_DSI()) goto out; drm_object_attach_property(&conn->base, conn->dev->mode_config.tile_property, 0); /* - * Set split display info to kms once dual DSI panel is connected to + * Set split display info to kms once bonded DSI panel is connected to * both hosts. */ if (other_dsi && other_dsi->panel && kms->funcs->set_split_display) { kms->funcs->set_split_display(kms, master_dsi->encoder, slave_dsi->encoder, - dsi_mgr_is_cmd_mode(msm_dsi)); + msm_dsi_is_cmd_mode(msm_dsi)); } out: @@ -317,7 +297,7 @@ static int dsi_mgr_connector_get_modes(struct drm_connector *connector) return 0; /* - * In dual DSI mode, we have one connector that can be + * In bonded DSI mode, we have one connector that can be * attached to the drm_panel. */ num = drm_panel_get_modes(panel, connector); @@ -366,30 +346,30 @@ static void dsi_mgr_bridge_pre_enable(struct drm_bridge *bridge) struct mipi_dsi_host *host = msm_dsi->host; struct drm_panel *panel = msm_dsi->panel; struct msm_dsi_phy_shared_timings phy_shared_timings[DSI_MAX]; - bool is_dual_dsi = IS_DUAL_DSI(); + bool is_bonded_dsi = IS_BONDED_DSI(); int ret; DBG("id=%d", id); if (!msm_dsi_device_connected(msm_dsi)) return; - /* Do nothing with the host if it is slave-DSI in case of dual DSI */ - if (is_dual_dsi && !IS_MASTER_DSI_LINK(id)) + /* Do nothing with the host if it is slave-DSI in case of bonded DSI */ + if (is_bonded_dsi && !IS_MASTER_DSI_LINK(id)) return; ret = dsi_mgr_phy_enable(id, phy_shared_timings); if (ret) goto phy_en_fail; - ret = msm_dsi_host_power_on(host, &phy_shared_timings[id], is_dual_dsi); + ret = msm_dsi_host_power_on(host, &phy_shared_timings[id], is_bonded_dsi, msm_dsi->phy); if (ret) { pr_err("%s: power on host %d failed, %d\n", __func__, id, ret); goto host_on_fail; } - if (is_dual_dsi && msm_dsi1) { + if (is_bonded_dsi && msm_dsi1) { ret = msm_dsi_host_power_on(msm_dsi1->host, - &phy_shared_timings[DSI_1], is_dual_dsi); + &phy_shared_timings[DSI_1], is_bonded_dsi, msm_dsi1->phy); if (ret) { pr_err("%s: power on host1 failed, %d\n", __func__, ret); @@ -415,7 +395,7 @@ static void dsi_mgr_bridge_pre_enable(struct drm_bridge *bridge) goto host_en_fail; } - if (is_dual_dsi && msm_dsi1) { + if (is_bonded_dsi && msm_dsi1) { ret = msm_dsi_host_enable(msm_dsi1->host); if (ret) { pr_err("%s: enable host1 failed, %d\n", __func__, ret); @@ -431,7 +411,7 @@ host_en_fail: if (panel) drm_panel_unprepare(panel); panel_prep_fail: - if (is_dual_dsi && msm_dsi1) + if (is_bonded_dsi && msm_dsi1) msm_dsi_host_power_off(msm_dsi1->host); host1_on_fail: msm_dsi_host_power_off(host); @@ -441,20 +421,33 @@ phy_en_fail: return; } +void msm_dsi_manager_tpg_enable(void) +{ + struct msm_dsi *m_dsi = dsi_mgr_get_dsi(DSI_0); + struct msm_dsi *s_dsi = dsi_mgr_get_dsi(DSI_1); + + /* if dual dsi, trigger tpg on master first then slave */ + if (m_dsi) { + msm_dsi_host_test_pattern_en(m_dsi->host); + if (IS_BONDED_DSI() && s_dsi) + msm_dsi_host_test_pattern_en(s_dsi->host); + } +} + static void dsi_mgr_bridge_enable(struct drm_bridge *bridge) { int id = dsi_mgr_bridge_get_id(bridge); struct msm_dsi *msm_dsi = dsi_mgr_get_dsi(id); struct drm_panel *panel = msm_dsi->panel; - bool is_dual_dsi = IS_DUAL_DSI(); + bool is_bonded_dsi = IS_BONDED_DSI(); int ret; DBG("id=%d", id); if (!msm_dsi_device_connected(msm_dsi)) return; - /* Do nothing with the host if it is slave-DSI in case of dual DSI */ - if (is_dual_dsi && !IS_MASTER_DSI_LINK(id)) + /* Do nothing with the host if it is slave-DSI in case of bonded DSI */ + if (is_bonded_dsi && !IS_MASTER_DSI_LINK(id)) return; if (panel) { @@ -471,15 +464,15 @@ static void dsi_mgr_bridge_disable(struct drm_bridge *bridge) int id = dsi_mgr_bridge_get_id(bridge); struct msm_dsi *msm_dsi = dsi_mgr_get_dsi(id); struct drm_panel *panel = msm_dsi->panel; - bool is_dual_dsi = IS_DUAL_DSI(); + bool is_bonded_dsi = IS_BONDED_DSI(); int ret; DBG("id=%d", id); if (!msm_dsi_device_connected(msm_dsi)) return; - /* Do nothing with the host if it is slave-DSI in case of dual DSI */ - if (is_dual_dsi && !IS_MASTER_DSI_LINK(id)) + /* Do nothing with the host if it is slave-DSI in case of bonded DSI */ + if (is_bonded_dsi && !IS_MASTER_DSI_LINK(id)) return; if (panel) { @@ -497,7 +490,7 @@ static void dsi_mgr_bridge_post_disable(struct drm_bridge *bridge) struct msm_dsi *msm_dsi1 = dsi_mgr_get_dsi(DSI_1); struct mipi_dsi_host *host = msm_dsi->host; struct drm_panel *panel = msm_dsi->panel; - bool is_dual_dsi = IS_DUAL_DSI(); + bool is_bonded_dsi = IS_BONDED_DSI(); int ret; DBG("id=%d", id); @@ -506,18 +499,18 @@ static void dsi_mgr_bridge_post_disable(struct drm_bridge *bridge) return; /* - * Do nothing with the host if it is slave-DSI in case of dual DSI. + * Do nothing with the host if it is slave-DSI in case of bonded DSI. * It is safe to call dsi_mgr_phy_disable() here because a single PHY * won't be diabled until both PHYs request disable. */ - if (is_dual_dsi && !IS_MASTER_DSI_LINK(id)) + if (is_bonded_dsi && !IS_MASTER_DSI_LINK(id)) goto disable_phy; ret = msm_dsi_host_disable(host); if (ret) pr_err("%s: host %d disable failed, %d\n", __func__, id, ret); - if (is_dual_dsi && msm_dsi1) { + if (is_bonded_dsi && msm_dsi1) { ret = msm_dsi_host_disable(msm_dsi1->host); if (ret) pr_err("%s: host1 disable failed, %d\n", __func__, ret); @@ -537,7 +530,7 @@ static void dsi_mgr_bridge_post_disable(struct drm_bridge *bridge) if (ret) pr_err("%s: host %d power off failed,%d\n", __func__, id, ret); - if (is_dual_dsi && msm_dsi1) { + if (is_bonded_dsi && msm_dsi1) { ret = msm_dsi_host_power_off(msm_dsi1->host); if (ret) pr_err("%s: host1 power off failed, %d\n", @@ -556,15 +549,15 @@ static void dsi_mgr_bridge_mode_set(struct drm_bridge *bridge, struct msm_dsi *msm_dsi = dsi_mgr_get_dsi(id); struct msm_dsi *other_dsi = dsi_mgr_get_other_dsi(id); struct mipi_dsi_host *host = msm_dsi->host; - bool is_dual_dsi = IS_DUAL_DSI(); + bool is_bonded_dsi = IS_BONDED_DSI(); DBG("set mode: " DRM_MODE_FMT, DRM_MODE_ARG(mode)); - if (is_dual_dsi && !IS_MASTER_DSI_LINK(id)) + if (is_bonded_dsi && !IS_MASTER_DSI_LINK(id)) return; msm_dsi_host_set_display_mode(host, adjusted_mode); - if (is_dual_dsi && other_dsi) + if (is_bonded_dsi && other_dsi) msm_dsi_host_set_display_mode(other_dsi->host, adjusted_mode); } @@ -640,15 +633,15 @@ fail: bool msm_dsi_manager_validate_current_config(u8 id) { - bool is_dual_dsi = IS_DUAL_DSI(); + bool is_bonded_dsi = IS_BONDED_DSI(); /* - * For dual DSI, we only have one drm panel. For this + * For bonded DSI, we only have one drm panel. For this * use case, we register only one bridge/connector. * Skip bridge/connector initialisation if it is - * slave-DSI for dual DSI configuration. + * slave-DSI for bonded DSI configuration. */ - if (is_dual_dsi && !IS_MASTER_DSI_LINK(id)) { + if (is_bonded_dsi && !IS_MASTER_DSI_LINK(id)) { DBG("Skip bridge registration for slave DSI->id: %d\n", id); return false; } @@ -740,7 +733,7 @@ int msm_dsi_manager_cmd_xfer(int id, const struct mipi_dsi_msg *msg) if (!msg->tx_buf || !msg->tx_len) return 0; - /* In dual master case, panel requires the same commands sent to + /* In bonded master case, panel requires the same commands sent to * both DSI links. Host issues the command trigger to both links * when DSI_1 calls the cmd transfer function, no matter it happens * before or after DSI_0 cmd transfer. @@ -809,9 +802,9 @@ int msm_dsi_manager_register(struct msm_dsi *msm_dsi) msm_dsim->dsi[id] = msm_dsi; - ret = dsi_mgr_parse_dual_dsi(msm_dsi->pdev->dev.of_node, id); + ret = dsi_mgr_parse_of(msm_dsi->pdev->dev.of_node, id); if (ret) { - pr_err("%s: failed to parse dual DSI info\n", __func__); + pr_err("%s: failed to parse OF DSI info\n", __func__); goto fail; } @@ -840,3 +833,12 @@ void msm_dsi_manager_unregister(struct msm_dsi *msm_dsi) msm_dsim->dsi[msm_dsi->id] = NULL; } +bool msm_dsi_is_bonded_dsi(struct msm_dsi *msm_dsi) +{ + return IS_BONDED_DSI(); +} + +bool msm_dsi_is_master_dsi(struct msm_dsi *msm_dsi) +{ + return IS_MASTER_DSI_LINK(msm_dsi->id); +} diff --git a/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c b/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c index 6ca6bfd4809b..8c65ef6968ca 100644 --- a/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c +++ b/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c @@ -5,6 +5,7 @@ #include <linux/clk-provider.h> #include <linux/platform_device.h> +#include <dt-bindings/phy/phy.h> #include "dsi_phy.h" @@ -461,6 +462,51 @@ int msm_dsi_dphy_timing_calc_v4(struct msm_dsi_dphy_timing *timing, return 0; } +int msm_dsi_cphy_timing_calc_v4(struct msm_dsi_dphy_timing *timing, + struct msm_dsi_phy_clk_request *clk_req) +{ + const unsigned long bit_rate = clk_req->bitclk_rate; + const unsigned long esc_rate = clk_req->escclk_rate; + s32 ui, ui_x7; + s32 tmax, tmin; + s32 coeff = 1000; /* Precision, should avoid overflow */ + s32 temp; + + if (!bit_rate || !esc_rate) + return -EINVAL; + + ui = mult_frac(NSEC_PER_MSEC, coeff, bit_rate / 1000); + ui_x7 = ui * 7; + + temp = S_DIV_ROUND_UP(38 * coeff, ui_x7); + tmin = max_t(s32, temp, 0); + temp = (95 * coeff) / ui_x7; + tmax = max_t(s32, temp, 0); + timing->clk_prepare = linear_inter(tmax, tmin, 50, 0, false); + + tmin = DIV_ROUND_UP(50 * coeff, ui_x7); + tmax = 255; + timing->hs_rqst = linear_inter(tmax, tmin, 1, 0, false); + + tmin = DIV_ROUND_UP(100 * coeff, ui_x7) - 1; + tmax = 255; + timing->hs_exit = linear_inter(tmax, tmin, 10, 0, false); + + tmin = 1; + tmax = 32; + timing->shared_timings.clk_post = linear_inter(tmax, tmin, 80, 0, false); + + tmin = min_t(s32, 64, S_DIV_ROUND_UP(262 * coeff, ui_x7) - 1); + tmax = 64; + timing->shared_timings.clk_pre = linear_inter(tmax, tmin, 20, 0, false); + + DBG("%d, %d, %d, %d, %d", + timing->shared_timings.clk_pre, timing->shared_timings.clk_post, + timing->clk_prepare, timing->hs_exit, timing->hs_rqst); + + return 0; +} + static int dsi_phy_regulator_init(struct msm_dsi_phy *phy) { struct regulator_bulk_data *s = phy->supplies; @@ -593,6 +639,8 @@ static const struct of_device_id dsi_phy_dt_match[] = { .data = &dsi_phy_7nm_cfgs }, { .compatible = "qcom,dsi-phy-7nm-8150", .data = &dsi_phy_7nm_8150_cfgs }, + { .compatible = "qcom,sc7280-dsi-phy-7nm", + .data = &dsi_phy_7nm_7280_cfgs }, #endif {} }; @@ -625,17 +673,13 @@ static int dsi_phy_driver_probe(struct platform_device *pdev) { struct msm_dsi_phy *phy; struct device *dev = &pdev->dev; - const struct of_device_id *match; + u32 phy_type; int ret; phy = devm_kzalloc(dev, sizeof(*phy), GFP_KERNEL); if (!phy) return -ENOMEM; - match = of_match_node(dsi_phy_dt_match, dev->of_node); - if (!match) - return -ENODEV; - phy->provided_clocks = devm_kzalloc(dev, struct_size(phy->provided_clocks, hws, NUM_PROVIDED_CLKS), GFP_KERNEL); @@ -644,7 +688,10 @@ static int dsi_phy_driver_probe(struct platform_device *pdev) phy->provided_clocks->num = NUM_PROVIDED_CLKS; - phy->cfg = match->data; + phy->cfg = of_device_get_match_data(&pdev->dev); + if (!phy->cfg) + return -ENODEV; + phy->pdev = pdev; phy->id = dsi_phy_get_id(phy); @@ -657,6 +704,8 @@ static int dsi_phy_driver_probe(struct platform_device *pdev) phy->regulator_ldo_mode = of_property_read_bool(dev->of_node, "qcom,dsi-phy-regulator-ldo-mode"); + if (!of_property_read_u32(dev->of_node, "phy-type", &phy_type)) + phy->cphy_mode = (phy_type == PHY_TYPE_CPHY); phy->base = msm_ioremap_size(pdev, "dsi_phy", "DSI_PHY", &phy->base_size); if (IS_ERR(phy->base)) { @@ -754,7 +803,8 @@ void __exit msm_dsi_phy_driver_unregister(void) } int msm_dsi_phy_enable(struct msm_dsi_phy *phy, - struct msm_dsi_phy_clk_request *clk_req) + struct msm_dsi_phy_clk_request *clk_req, + struct msm_dsi_phy_shared_timings *shared_timings) { struct device *dev = &phy->pdev->dev; int ret; @@ -782,6 +832,9 @@ int msm_dsi_phy_enable(struct msm_dsi_phy *phy, goto phy_en_fail; } + memcpy(shared_timings, &phy->timing.shared_timings, + sizeof(*shared_timings)); + /* * Resetting DSI PHY silently changes its PLL registers to reset status, * which will confuse clock driver and result in wrong output rate of @@ -821,13 +874,6 @@ void msm_dsi_phy_disable(struct msm_dsi_phy *phy) dsi_phy_disable_resource(phy); } -void msm_dsi_phy_get_shared_timings(struct msm_dsi_phy *phy, - struct msm_dsi_phy_shared_timings *shared_timings) -{ - memcpy(shared_timings, &phy->timing.shared_timings, - sizeof(*shared_timings)); -} - void msm_dsi_phy_set_usecase(struct msm_dsi_phy *phy, enum msm_dsi_phy_usecase uc) { @@ -835,6 +881,15 @@ void msm_dsi_phy_set_usecase(struct msm_dsi_phy *phy, phy->usecase = uc; } +/* Returns true if we have to clear DSI_LANE_CTRL.HS_REQ_SEL_PHY */ +bool msm_dsi_phy_set_continuous_clock(struct msm_dsi_phy *phy, bool enable) +{ + if (!phy || !phy->cfg->ops.set_continuous_clock) + return false; + + return phy->cfg->ops.set_continuous_clock(phy, enable); +} + int msm_dsi_phy_get_clk_provider(struct msm_dsi_phy *phy, struct clk **byte_clk_provider, struct clk **pixel_clk_provider) { diff --git a/drivers/gpu/drm/msm/dsi/phy/dsi_phy.h b/drivers/gpu/drm/msm/dsi/phy/dsi_phy.h index 5b0feef87127..b91303ada74f 100644 --- a/drivers/gpu/drm/msm/dsi/phy/dsi_phy.h +++ b/drivers/gpu/drm/msm/dsi/phy/dsi_phy.h @@ -24,6 +24,7 @@ struct msm_dsi_phy_ops { void (*disable)(struct msm_dsi_phy *phy); void (*save_pll_state)(struct msm_dsi_phy *phy); int (*restore_pll_state)(struct msm_dsi_phy *phy); + bool (*set_continuous_clock)(struct msm_dsi_phy *phy, bool enable); }; struct msm_dsi_phy_cfg { @@ -51,6 +52,7 @@ extern const struct msm_dsi_phy_cfg dsi_phy_10nm_cfgs; extern const struct msm_dsi_phy_cfg dsi_phy_10nm_8998_cfgs; extern const struct msm_dsi_phy_cfg dsi_phy_7nm_cfgs; extern const struct msm_dsi_phy_cfg dsi_phy_7nm_8150_cfgs; +extern const struct msm_dsi_phy_cfg dsi_phy_7nm_7280_cfgs; struct msm_dsi_dphy_timing { u32 clk_zero; @@ -99,6 +101,7 @@ struct msm_dsi_phy { enum msm_dsi_phy_usecase usecase; bool regulator_ldo_mode; + bool cphy_mode; struct clk_hw *vco_hw; bool pll_on; @@ -119,5 +122,7 @@ int msm_dsi_dphy_timing_calc_v3(struct msm_dsi_dphy_timing *timing, struct msm_dsi_phy_clk_request *clk_req); int msm_dsi_dphy_timing_calc_v4(struct msm_dsi_dphy_timing *timing, struct msm_dsi_phy_clk_request *clk_req); +int msm_dsi_cphy_timing_calc_v4(struct msm_dsi_dphy_timing *timing, + struct msm_dsi_phy_clk_request *clk_req); #endif /* __DSI_PHY_H__ */ diff --git a/drivers/gpu/drm/msm/dsi/phy/dsi_phy_10nm.c b/drivers/gpu/drm/msm/dsi/phy/dsi_phy_10nm.c index e46b10fc793a..d8128f50b0dd 100644 --- a/drivers/gpu/drm/msm/dsi/phy/dsi_phy_10nm.c +++ b/drivers/gpu/drm/msm/dsi/phy/dsi_phy_10nm.c @@ -84,7 +84,7 @@ struct dsi_pll_10nm { #define to_pll_10nm(x) container_of(x, struct dsi_pll_10nm, clk_hw) /* - * Global list of private DSI PLL struct pointers. We need this for Dual DSI + * Global list of private DSI PLL struct pointers. We need this for bonded DSI * mode, where the master PLL's clk_ops needs access the slave's private data */ static struct dsi_pll_10nm *pll_10nm_list[DSI_MAX]; diff --git a/drivers/gpu/drm/msm/dsi/phy/dsi_phy_14nm.c b/drivers/gpu/drm/msm/dsi/phy/dsi_phy_14nm.c index a34cf151c517..d13552b2213b 100644 --- a/drivers/gpu/drm/msm/dsi/phy/dsi_phy_14nm.c +++ b/drivers/gpu/drm/msm/dsi/phy/dsi_phy_14nm.c @@ -86,7 +86,7 @@ struct dsi_pll_14nm { /* * Private struct for N1/N2 post-divider clocks. These clocks are similar to * the generic clk_divider class of clocks. The only difference is that it - * also sets the slave DSI PLL's post-dividers if in Dual DSI mode + * also sets the slave DSI PLL's post-dividers if in bonded DSI mode */ struct dsi_pll_14nm_postdiv { struct clk_hw hw; @@ -102,7 +102,7 @@ struct dsi_pll_14nm_postdiv { #define to_pll_14nm_postdiv(_hw) container_of(_hw, struct dsi_pll_14nm_postdiv, hw) /* - * Global list of private DSI PLL struct pointers. We need this for Dual DSI + * Global list of private DSI PLL struct pointers. We need this for bonded DSI * mode, where the master PLL's clk_ops needs access the slave's private data */ static struct dsi_pll_14nm *pll_14nm_list[DSI_MAX]; @@ -658,7 +658,7 @@ static int dsi_pll_14nm_postdiv_set_rate(struct clk_hw *hw, unsigned long rate, val |= value << shift; dsi_phy_write(base + REG_DSI_14nm_PHY_CMN_CLK_CFG0, val); - /* If we're master in dual DSI mode, then the slave PLL's post-dividers + /* If we're master in bonded DSI mode, then the slave PLL's post-dividers * follow the master's post dividers */ if (pll_14nm->phy->usecase == MSM_DSI_PHY_MASTER) { @@ -1050,7 +1050,7 @@ const struct msm_dsi_phy_cfg dsi_phy_14nm_660_cfgs = { .reg_cfg = { .num = 1, .regs = { - {"vcca", 17000, 32}, + {"vcca", 73400, 32}, }, }, .ops = { diff --git a/drivers/gpu/drm/msm/dsi/phy/dsi_phy_7nm.c b/drivers/gpu/drm/msm/dsi/phy/dsi_phy_7nm.c index 7c23d4c47338..cb297b08458e 100644 --- a/drivers/gpu/drm/msm/dsi/phy/dsi_phy_7nm.c +++ b/drivers/gpu/drm/msm/dsi/phy/dsi_phy_7nm.c @@ -83,7 +83,7 @@ struct dsi_pll_7nm { #define to_pll_7nm(x) container_of(x, struct dsi_pll_7nm, clk_hw) /* - * Global list of private DSI PLL struct pointers. We need this for Dual DSI + * Global list of private DSI PLL struct pointers. We need this for bonded DSI * mode, where the master PLL's clk_ops needs access the slave's private data */ static struct dsi_pll_7nm *pll_7nm_list[DSI_MAX]; @@ -256,7 +256,7 @@ static void dsi_pll_commit(struct dsi_pll_7nm *pll, struct dsi_pll_config *confi (config->frac_div_start & 0x30000) >> 16); dsi_phy_write(base + REG_DSI_7nm_PHY_PLL_PLL_LOCKDET_RATE_1, 0x40); dsi_phy_write(base + REG_DSI_7nm_PHY_PLL_PLL_LOCK_DELAY, 0x06); - dsi_phy_write(base + REG_DSI_7nm_PHY_PLL_CMODE_1, 0x10); /* TODO: 0x00 for CPHY */ + dsi_phy_write(base + REG_DSI_7nm_PHY_PLL_CMODE_1, pll->phy->cphy_mode ? 0x00 : 0x10); dsi_phy_write(base + REG_DSI_7nm_PHY_PLL_CLOCK_INVERTERS, config->pll_clock_inverters); } @@ -642,7 +642,8 @@ static int pll_7nm_register(struct dsi_pll_7nm *pll_7nm, struct clk_hw **provide /* DSI Byte clock = VCO_CLK / OUT_DIV / BIT_DIV / 8 */ hw = devm_clk_hw_register_fixed_factor(dev, clk_name, parent, - CLK_SET_RATE_PARENT, 1, 8); + CLK_SET_RATE_PARENT, 1, + pll_7nm->phy->cphy_mode ? 7 : 8); if (IS_ERR(hw)) { ret = PTR_ERR(hw); goto fail; @@ -663,32 +664,47 @@ static int pll_7nm_register(struct dsi_pll_7nm *pll_7nm, struct clk_hw **provide snprintf(clk_name, 32, "dsi%d_pll_post_out_div_clk", pll_7nm->phy->id); snprintf(parent, 32, "dsi%d_pll_out_div_clk", pll_7nm->phy->id); - hw = devm_clk_hw_register_fixed_factor(dev, clk_name, parent, - 0, 1, 4); + if (pll_7nm->phy->cphy_mode) + hw = devm_clk_hw_register_fixed_factor(dev, clk_name, parent, 0, 2, 7); + else + hw = devm_clk_hw_register_fixed_factor(dev, clk_name, parent, 0, 1, 4); if (IS_ERR(hw)) { ret = PTR_ERR(hw); goto fail; } - snprintf(clk_name, 32, "dsi%d_pclk_mux", pll_7nm->phy->id); - snprintf(parent, 32, "dsi%d_pll_bit_clk", pll_7nm->phy->id); - snprintf(parent2, 32, "dsi%d_pll_by_2_bit_clk", pll_7nm->phy->id); - snprintf(parent3, 32, "dsi%d_pll_out_div_clk", pll_7nm->phy->id); - snprintf(parent4, 32, "dsi%d_pll_post_out_div_clk", pll_7nm->phy->id); - - hw = devm_clk_hw_register_mux(dev, clk_name, - ((const char *[]){ - parent, parent2, parent3, parent4 - }), 4, 0, pll_7nm->phy->base + - REG_DSI_7nm_PHY_CMN_CLK_CFG1, - 0, 2, 0, NULL); - if (IS_ERR(hw)) { - ret = PTR_ERR(hw); - goto fail; + /* in CPHY mode, pclk_mux will always have post_out_div as parent + * don't register a pclk_mux clock and just use post_out_div instead + */ + if (pll_7nm->phy->cphy_mode) { + u32 data; + + data = dsi_phy_read(pll_7nm->phy->base + REG_DSI_7nm_PHY_CMN_CLK_CFG1); + dsi_phy_write(pll_7nm->phy->base + REG_DSI_7nm_PHY_CMN_CLK_CFG1, data | 3); + + snprintf(parent, 32, "dsi%d_pll_post_out_div_clk", pll_7nm->phy->id); + } else { + snprintf(clk_name, 32, "dsi%d_pclk_mux", pll_7nm->phy->id); + snprintf(parent, 32, "dsi%d_pll_bit_clk", pll_7nm->phy->id); + snprintf(parent2, 32, "dsi%d_pll_by_2_bit_clk", pll_7nm->phy->id); + snprintf(parent3, 32, "dsi%d_pll_out_div_clk", pll_7nm->phy->id); + snprintf(parent4, 32, "dsi%d_pll_post_out_div_clk", pll_7nm->phy->id); + + hw = devm_clk_hw_register_mux(dev, clk_name, + ((const char *[]){ + parent, parent2, parent3, parent4 + }), 4, 0, pll_7nm->phy->base + + REG_DSI_7nm_PHY_CMN_CLK_CFG1, + 0, 2, 0, NULL); + if (IS_ERR(hw)) { + ret = PTR_ERR(hw); + goto fail; + } + + snprintf(parent, 32, "dsi%d_pclk_mux", pll_7nm->phy->id); } snprintf(clk_name, 32, "dsi%d_phy_pll_out_dsiclk", pll_7nm->phy->id); - snprintf(parent, 32, "dsi%d_pclk_mux", pll_7nm->phy->id); /* PIX CLK DIV : DIV_CTRL_7_4*/ hw = devm_clk_hw_register_divider(dev, clk_name, parent, @@ -813,15 +829,21 @@ static int dsi_7nm_phy_enable(struct msm_dsi_phy *phy, struct msm_dsi_dphy_timing *timing = &phy->timing; void __iomem *base = phy->base; bool less_than_1500_mhz; - u32 vreg_ctrl_0, glbl_str_swi_cal_sel_ctrl, glbl_hstx_str_ctrl_0; + u32 vreg_ctrl_0, vreg_ctrl_1, lane_ctrl0; + u32 glbl_pemph_ctrl_0; + u32 glbl_str_swi_cal_sel_ctrl, glbl_hstx_str_ctrl_0; u32 glbl_rescode_top_ctrl, glbl_rescode_bot_ctrl; u32 data; DBG(""); - if (msm_dsi_dphy_timing_calc_v4(timing, clk_req)) { + if (phy->cphy_mode) + ret = msm_dsi_cphy_timing_calc_v4(timing, clk_req); + else + ret = msm_dsi_dphy_timing_calc_v4(timing, clk_req); + if (ret) { DRM_DEV_ERROR(&phy->pdev->dev, - "%s: D-PHY timing calculation failed\n", __func__); + "%s: PHY timing calculation failed\n", __func__); return -EINVAL; } @@ -842,6 +864,10 @@ static int dsi_7nm_phy_enable(struct msm_dsi_phy *phy, /* Alter PHY configurations if data rate less than 1.5GHZ*/ less_than_1500_mhz = (clk_req->bitclk_rate <= 1500000000); + /* For C-PHY, no low power settings for lower clk rate */ + if (phy->cphy_mode) + less_than_1500_mhz = false; + if (phy->cfg->quirks & DSI_PHY_7NM_QUIRK_V4_1) { vreg_ctrl_0 = less_than_1500_mhz ? 0x53 : 0x52; glbl_rescode_top_ctrl = less_than_1500_mhz ? 0x3d : 0x00; @@ -856,6 +882,17 @@ static int dsi_7nm_phy_enable(struct msm_dsi_phy *phy, glbl_rescode_bot_ctrl = 0x3c; } + if (phy->cphy_mode) { + vreg_ctrl_0 = 0x51; + vreg_ctrl_1 = 0x55; + glbl_pemph_ctrl_0 = 0x11; + lane_ctrl0 = 0x17; + } else { + vreg_ctrl_1 = 0x5c; + glbl_pemph_ctrl_0 = 0x00; + lane_ctrl0 = 0x1f; + } + /* de-assert digital and pll power down */ data = BIT(6) | BIT(5); dsi_phy_write(base + REG_DSI_7nm_PHY_CMN_CTRL_0, data); @@ -876,15 +913,22 @@ static int dsi_7nm_phy_enable(struct msm_dsi_phy *phy, dsi_phy_write(base + REG_DSI_7nm_PHY_CMN_LANE_CFG0, 0x21); dsi_phy_write(base + REG_DSI_7nm_PHY_CMN_LANE_CFG1, 0x84); + if (phy->cphy_mode) + dsi_phy_write(base + REG_DSI_7nm_PHY_CMN_GLBL_CTRL, BIT(6)); + /* Enable LDO */ dsi_phy_write(base + REG_DSI_7nm_PHY_CMN_VREG_CTRL_0, vreg_ctrl_0); - dsi_phy_write(base + REG_DSI_7nm_PHY_CMN_VREG_CTRL_1, 0x5c); + dsi_phy_write(base + REG_DSI_7nm_PHY_CMN_VREG_CTRL_1, vreg_ctrl_1); + dsi_phy_write(base + REG_DSI_7nm_PHY_CMN_CTRL_3, 0x00); dsi_phy_write(base + REG_DSI_7nm_PHY_CMN_GLBL_STR_SWI_CAL_SEL_CTRL, glbl_str_swi_cal_sel_ctrl); dsi_phy_write(base + REG_DSI_7nm_PHY_CMN_GLBL_HSTX_STR_CTRL_0, glbl_hstx_str_ctrl_0); - dsi_phy_write(base + REG_DSI_7nm_PHY_CMN_GLBL_PEMPH_CTRL_0, 0x00); + dsi_phy_write(base + REG_DSI_7nm_PHY_CMN_GLBL_PEMPH_CTRL_0, + glbl_pemph_ctrl_0); + if (phy->cphy_mode) + dsi_phy_write(base + REG_DSI_7nm_PHY_CMN_GLBL_PEMPH_CTRL_1, 0x01); dsi_phy_write(base + REG_DSI_7nm_PHY_CMN_GLBL_RESCODE_OFFSET_TOP_CTRL, glbl_rescode_top_ctrl); dsi_phy_write(base + REG_DSI_7nm_PHY_CMN_GLBL_RESCODE_OFFSET_BOT_CTRL, @@ -894,10 +938,11 @@ static int dsi_7nm_phy_enable(struct msm_dsi_phy *phy, /* Remove power down from all blocks */ dsi_phy_write(base + REG_DSI_7nm_PHY_CMN_CTRL_0, 0x7f); - dsi_phy_write(base + REG_DSI_7nm_PHY_CMN_LANE_CTRL0, 0x1f); + dsi_phy_write(base + REG_DSI_7nm_PHY_CMN_LANE_CTRL0, lane_ctrl0); /* Select full-rate mode */ - dsi_phy_write(base + REG_DSI_7nm_PHY_CMN_CTRL_2, 0x40); + if (!phy->cphy_mode) + dsi_phy_write(base + REG_DSI_7nm_PHY_CMN_CTRL_2, 0x40); ret = dsi_7nm_set_usecase(phy); if (ret) { @@ -907,22 +952,36 @@ static int dsi_7nm_phy_enable(struct msm_dsi_phy *phy, } /* DSI PHY timings */ - dsi_phy_write(base + REG_DSI_7nm_PHY_CMN_TIMING_CTRL_0, 0x00); - dsi_phy_write(base + REG_DSI_7nm_PHY_CMN_TIMING_CTRL_1, timing->clk_zero); - dsi_phy_write(base + REG_DSI_7nm_PHY_CMN_TIMING_CTRL_2, timing->clk_prepare); - dsi_phy_write(base + REG_DSI_7nm_PHY_CMN_TIMING_CTRL_3, timing->clk_trail); - dsi_phy_write(base + REG_DSI_7nm_PHY_CMN_TIMING_CTRL_4, timing->hs_exit); - dsi_phy_write(base + REG_DSI_7nm_PHY_CMN_TIMING_CTRL_5, timing->hs_zero); - dsi_phy_write(base + REG_DSI_7nm_PHY_CMN_TIMING_CTRL_6, timing->hs_prepare); - dsi_phy_write(base + REG_DSI_7nm_PHY_CMN_TIMING_CTRL_7, timing->hs_trail); - dsi_phy_write(base + REG_DSI_7nm_PHY_CMN_TIMING_CTRL_8, timing->hs_rqst); - dsi_phy_write(base + REG_DSI_7nm_PHY_CMN_TIMING_CTRL_9, 0x02); - dsi_phy_write(base + REG_DSI_7nm_PHY_CMN_TIMING_CTRL_10, 0x04); - dsi_phy_write(base + REG_DSI_7nm_PHY_CMN_TIMING_CTRL_11, 0x00); - dsi_phy_write(base + REG_DSI_7nm_PHY_CMN_TIMING_CTRL_12, - timing->shared_timings.clk_pre); - dsi_phy_write(base + REG_DSI_7nm_PHY_CMN_TIMING_CTRL_13, - timing->shared_timings.clk_post); + if (phy->cphy_mode) { + dsi_phy_write(base + REG_DSI_7nm_PHY_CMN_TIMING_CTRL_0, 0x00); + dsi_phy_write(base + REG_DSI_7nm_PHY_CMN_TIMING_CTRL_4, timing->hs_exit); + dsi_phy_write(base + REG_DSI_7nm_PHY_CMN_TIMING_CTRL_5, + timing->shared_timings.clk_pre); + dsi_phy_write(base + REG_DSI_7nm_PHY_CMN_TIMING_CTRL_6, timing->clk_prepare); + dsi_phy_write(base + REG_DSI_7nm_PHY_CMN_TIMING_CTRL_7, + timing->shared_timings.clk_post); + dsi_phy_write(base + REG_DSI_7nm_PHY_CMN_TIMING_CTRL_8, timing->hs_rqst); + dsi_phy_write(base + REG_DSI_7nm_PHY_CMN_TIMING_CTRL_9, 0x02); + dsi_phy_write(base + REG_DSI_7nm_PHY_CMN_TIMING_CTRL_10, 0x04); + dsi_phy_write(base + REG_DSI_7nm_PHY_CMN_TIMING_CTRL_11, 0x00); + } else { + dsi_phy_write(base + REG_DSI_7nm_PHY_CMN_TIMING_CTRL_0, 0x00); + dsi_phy_write(base + REG_DSI_7nm_PHY_CMN_TIMING_CTRL_1, timing->clk_zero); + dsi_phy_write(base + REG_DSI_7nm_PHY_CMN_TIMING_CTRL_2, timing->clk_prepare); + dsi_phy_write(base + REG_DSI_7nm_PHY_CMN_TIMING_CTRL_3, timing->clk_trail); + dsi_phy_write(base + REG_DSI_7nm_PHY_CMN_TIMING_CTRL_4, timing->hs_exit); + dsi_phy_write(base + REG_DSI_7nm_PHY_CMN_TIMING_CTRL_5, timing->hs_zero); + dsi_phy_write(base + REG_DSI_7nm_PHY_CMN_TIMING_CTRL_6, timing->hs_prepare); + dsi_phy_write(base + REG_DSI_7nm_PHY_CMN_TIMING_CTRL_7, timing->hs_trail); + dsi_phy_write(base + REG_DSI_7nm_PHY_CMN_TIMING_CTRL_8, timing->hs_rqst); + dsi_phy_write(base + REG_DSI_7nm_PHY_CMN_TIMING_CTRL_9, 0x02); + dsi_phy_write(base + REG_DSI_7nm_PHY_CMN_TIMING_CTRL_10, 0x04); + dsi_phy_write(base + REG_DSI_7nm_PHY_CMN_TIMING_CTRL_11, 0x00); + dsi_phy_write(base + REG_DSI_7nm_PHY_CMN_TIMING_CTRL_12, + timing->shared_timings.clk_pre); + dsi_phy_write(base + REG_DSI_7nm_PHY_CMN_TIMING_CTRL_13, + timing->shared_timings.clk_post); + } /* DSI lane settings */ dsi_phy_hw_v4_0_lane_settings(phy); @@ -932,6 +991,21 @@ static int dsi_7nm_phy_enable(struct msm_dsi_phy *phy, return 0; } +static bool dsi_7nm_set_continuous_clock(struct msm_dsi_phy *phy, bool enable) +{ + void __iomem *base = phy->base; + u32 data; + + data = dsi_phy_read(base + REG_DSI_7nm_PHY_CMN_LANE_CTRL1); + if (enable) + data |= BIT(5) | BIT(6); + else + data &= ~(BIT(5) | BIT(6)); + dsi_phy_write(base + REG_DSI_7nm_PHY_CMN_LANE_CTRL1, data); + + return enable; +} + static void dsi_7nm_phy_disable(struct msm_dsi_phy *phy) { void __iomem *base = phy->base; @@ -972,6 +1046,7 @@ const struct msm_dsi_phy_cfg dsi_phy_7nm_cfgs = { .pll_init = dsi_pll_7nm_init, .save_pll_state = dsi_7nm_pll_save_state, .restore_pll_state = dsi_7nm_pll_restore_state, + .set_continuous_clock = dsi_7nm_set_continuous_clock, }, .min_pll_rate = 600000000UL, #ifdef CONFIG_64BIT @@ -998,9 +1073,36 @@ const struct msm_dsi_phy_cfg dsi_phy_7nm_8150_cfgs = { .pll_init = dsi_pll_7nm_init, .save_pll_state = dsi_7nm_pll_save_state, .restore_pll_state = dsi_7nm_pll_restore_state, + .set_continuous_clock = dsi_7nm_set_continuous_clock, }, .min_pll_rate = 1000000000UL, .max_pll_rate = 3500000000UL, .io_start = { 0xae94400, 0xae96400 }, .num_dsi_phy = 2, }; + +const struct msm_dsi_phy_cfg dsi_phy_7nm_7280_cfgs = { + .has_phy_lane = true, + .reg_cfg = { + .num = 1, + .regs = { + {"vdds", 37550, 0}, + }, + }, + .ops = { + .enable = dsi_7nm_phy_enable, + .disable = dsi_7nm_phy_disable, + .pll_init = dsi_pll_7nm_init, + .save_pll_state = dsi_7nm_pll_save_state, + .restore_pll_state = dsi_7nm_pll_restore_state, + }, + .min_pll_rate = 600000000UL, +#ifdef CONFIG_64BIT + .max_pll_rate = 5000000000ULL, +#else + .max_pll_rate = ULONG_MAX, +#endif + .io_start = { 0xae94400 }, + .num_dsi_phy = 1, + .quirks = DSI_PHY_7NM_QUIRK_V4_1, +}; diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c index 9b8fa2ad0d84..2e6fc185e54d 100644 --- a/drivers/gpu/drm/msm/msm_drv.c +++ b/drivers/gpu/drm/msm/msm_drv.c @@ -14,7 +14,6 @@ #include <drm/drm_drv.h> #include <drm/drm_file.h> #include <drm/drm_ioctl.h> -#include <drm/drm_irq.h> #include <drm/drm_prime.h> #include <drm/drm_of.h> #include <drm/drm_vblank.h> @@ -201,6 +200,71 @@ void msm_rmw(void __iomem *addr, u32 mask, u32 or) msm_writel(val | or, addr); } +static irqreturn_t msm_irq(int irq, void *arg) +{ + struct drm_device *dev = arg; + struct msm_drm_private *priv = dev->dev_private; + struct msm_kms *kms = priv->kms; + + BUG_ON(!kms); + + return kms->funcs->irq(kms); +} + +static void msm_irq_preinstall(struct drm_device *dev) +{ + struct msm_drm_private *priv = dev->dev_private; + struct msm_kms *kms = priv->kms; + + BUG_ON(!kms); + + kms->funcs->irq_preinstall(kms); +} + +static int msm_irq_postinstall(struct drm_device *dev) +{ + struct msm_drm_private *priv = dev->dev_private; + struct msm_kms *kms = priv->kms; + + BUG_ON(!kms); + + if (kms->funcs->irq_postinstall) + return kms->funcs->irq_postinstall(kms); + + return 0; +} + +static int msm_irq_install(struct drm_device *dev, unsigned int irq) +{ + int ret; + + if (irq == IRQ_NOTCONNECTED) + return -ENOTCONN; + + msm_irq_preinstall(dev); + + ret = request_irq(irq, msm_irq, 0, dev->driver->name, dev); + if (ret) + return ret; + + ret = msm_irq_postinstall(dev); + if (ret) { + free_irq(irq, dev); + return ret; + } + + return 0; +} + +static void msm_irq_uninstall(struct drm_device *dev) +{ + struct msm_drm_private *priv = dev->dev_private; + struct msm_kms *kms = priv->kms; + + kms->funcs->irq_uninstall(kms); + free_irq(kms->irq, dev); +} + struct msm_vblank_work { struct work_struct work; int crtc_id; @@ -265,7 +329,7 @@ static int msm_drm_uninit(struct device *dev) } /* We must cancel and cleanup any pending vblank enable/disable - * work before drm_irq_uninstall() to avoid work re-enabling an + * work before msm_irq_uninstall() to avoid work re-enabling an * irq after uninstall has disabled it. */ @@ -294,7 +358,7 @@ static int msm_drm_uninit(struct device *dev) drm_mode_config_cleanup(ddev); pm_runtime_get_sync(dev); - drm_irq_uninstall(ddev); + msm_irq_uninstall(ddev); pm_runtime_put_sync(dev); if (kms && kms->funcs) @@ -539,6 +603,7 @@ static int msm_drm_init(struct device *dev, const struct drm_driver *drv) if (IS_ERR(priv->event_thread[i].worker)) { ret = PTR_ERR(priv->event_thread[i].worker); DRM_DEV_ERROR(dev, "failed to create crtc_event kthread\n"); + ret = PTR_ERR(priv->event_thread[i].worker); goto err_msm_uninit; } @@ -553,7 +618,7 @@ static int msm_drm_init(struct device *dev, const struct drm_driver *drv) if (kms) { pm_runtime_get_sync(dev); - ret = drm_irq_install(ddev, kms->irq); + ret = msm_irq_install(ddev, kms->irq); pm_runtime_put_sync(dev); if (ret < 0) { DRM_DEV_ERROR(dev, "failed to install IRQ handler\n"); @@ -662,43 +727,6 @@ static void msm_postclose(struct drm_device *dev, struct drm_file *file) context_close(ctx); } -static irqreturn_t msm_irq(int irq, void *arg) -{ - struct drm_device *dev = arg; - struct msm_drm_private *priv = dev->dev_private; - struct msm_kms *kms = priv->kms; - BUG_ON(!kms); - return kms->funcs->irq(kms); -} - -static void msm_irq_preinstall(struct drm_device *dev) -{ - struct msm_drm_private *priv = dev->dev_private; - struct msm_kms *kms = priv->kms; - BUG_ON(!kms); - kms->funcs->irq_preinstall(kms); -} - -static int msm_irq_postinstall(struct drm_device *dev) -{ - struct msm_drm_private *priv = dev->dev_private; - struct msm_kms *kms = priv->kms; - BUG_ON(!kms); - - if (kms->funcs->irq_postinstall) - return kms->funcs->irq_postinstall(kms); - - return 0; -} - -static void msm_irq_uninstall(struct drm_device *dev) -{ - struct msm_drm_private *priv = dev->dev_private; - struct msm_kms *kms = priv->kms; - BUG_ON(!kms); - kms->funcs->irq_uninstall(kms); -} - int msm_crtc_enable_vblank(struct drm_crtc *crtc) { struct drm_device *dev = crtc->dev; @@ -911,6 +939,7 @@ static int msm_ioctl_wait_fence(struct drm_device *dev, void *data, ktime_t timeout = to_ktime(args->timeout); struct msm_gpu_submitqueue *queue; struct msm_gpu *gpu = priv->gpu; + struct dma_fence *fence; int ret; if (args->pad) { @@ -925,10 +954,35 @@ static int msm_ioctl_wait_fence(struct drm_device *dev, void *data, if (!queue) return -ENOENT; - ret = msm_wait_fence(gpu->rb[queue->prio]->fctx, args->fence, &timeout, - true); + /* + * Map submitqueue scoped "seqno" (which is actually an idr key) + * back to underlying dma-fence + * + * The fence is removed from the fence_idr when the submit is + * retired, so if the fence is not found it means there is nothing + * to wait for + */ + ret = mutex_lock_interruptible(&queue->lock); + if (ret) + return ret; + fence = idr_find(&queue->fence_idr, args->fence); + if (fence) + fence = dma_fence_get_rcu(fence); + mutex_unlock(&queue->lock); + + if (!fence) + return 0; + + ret = dma_fence_wait_timeout(fence, true, timeout_to_jiffies(&timeout)); + if (ret == 0) { + ret = -ETIMEDOUT; + } else if (ret != -ERESTARTSYS) { + ret = 0; + } + dma_fence_put(fence); msm_submitqueue_put(queue); + return ret; } @@ -1004,17 +1058,7 @@ static const struct drm_ioctl_desc msm_ioctls[] = { DRM_IOCTL_DEF_DRV(MSM_SUBMITQUEUE_QUERY, msm_ioctl_submitqueue_query, DRM_RENDER_ALLOW), }; -static const struct file_operations fops = { - .owner = THIS_MODULE, - .open = drm_open, - .release = drm_release, - .unlocked_ioctl = drm_ioctl, - .compat_ioctl = drm_compat_ioctl, - .poll = drm_poll, - .read = drm_read, - .llseek = no_llseek, - .mmap = msm_gem_mmap, -}; +DEFINE_DRM_GEM_FOPS(fops); static const struct drm_driver msm_driver = { .driver_features = DRIVER_GEM | @@ -1025,16 +1069,12 @@ static const struct drm_driver msm_driver = { .open = msm_open, .postclose = msm_postclose, .lastclose = drm_fb_helper_lastclose, - .irq_handler = msm_irq, - .irq_preinstall = msm_irq_preinstall, - .irq_postinstall = msm_irq_postinstall, - .irq_uninstall = msm_irq_uninstall, .dumb_create = msm_gem_dumb_create, .dumb_map_offset = msm_gem_dumb_map_offset, .prime_handle_to_fd = drm_gem_prime_handle_to_fd, .prime_fd_to_handle = drm_gem_prime_fd_to_handle, .gem_prime_import_sg_table = msm_gem_prime_import_sg_table, - .gem_prime_mmap = msm_gem_prime_mmap, + .gem_prime_mmap = drm_gem_prime_mmap, #ifdef CONFIG_DEBUG_FS .debugfs_init = msm_debugfs_init, #endif diff --git a/drivers/gpu/drm/msm/msm_drv.h b/drivers/gpu/drm/msm/msm_drv.h index 1a48a709ffb3..8b005d1ac899 100644 --- a/drivers/gpu/drm/msm/msm_drv.h +++ b/drivers/gpu/drm/msm/msm_drv.h @@ -309,7 +309,6 @@ void msm_gem_shrinker_cleanup(struct drm_device *dev); struct sg_table *msm_gem_prime_get_sg_table(struct drm_gem_object *obj); int msm_gem_prime_vmap(struct drm_gem_object *obj, struct dma_buf_map *map); void msm_gem_prime_vunmap(struct drm_gem_object *obj, struct dma_buf_map *map); -int msm_gem_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma); struct drm_gem_object *msm_gem_prime_import_sg_table(struct drm_device *dev, struct dma_buf_attachment *attach, struct sg_table *sg); int msm_gem_prime_pin(struct drm_gem_object *obj); @@ -350,7 +349,9 @@ void __exit msm_dsi_unregister(void); int msm_dsi_modeset_init(struct msm_dsi *msm_dsi, struct drm_device *dev, struct drm_encoder *encoder); void msm_dsi_snapshot(struct msm_disp_state *disp_state, struct msm_dsi *msm_dsi); - +bool msm_dsi_is_cmd_mode(struct msm_dsi *msm_dsi); +bool msm_dsi_is_bonded_dsi(struct msm_dsi *msm_dsi); +bool msm_dsi_is_master_dsi(struct msm_dsi *msm_dsi); #else static inline void __init msm_dsi_register(void) { @@ -367,7 +368,18 @@ static inline int msm_dsi_modeset_init(struct msm_dsi *msm_dsi, static inline void msm_dsi_snapshot(struct msm_disp_state *disp_state, struct msm_dsi *msm_dsi) { } - +static inline bool msm_dsi_is_cmd_mode(struct msm_dsi *msm_dsi) +{ + return false; +} +static inline bool msm_dsi_is_bonded_dsi(struct msm_dsi *msm_dsi) +{ + return false; +} +static inline bool msm_dsi_is_master_dsi(struct msm_dsi *msm_dsi) +{ + return false; +} #endif #ifdef CONFIG_DRM_MSM_DP diff --git a/drivers/gpu/drm/msm/msm_fbdev.c b/drivers/gpu/drm/msm/msm_fbdev.c index 227404077e39..0daaeb54ff6f 100644 --- a/drivers/gpu/drm/msm/msm_fbdev.c +++ b/drivers/gpu/drm/msm/msm_fbdev.c @@ -8,13 +8,12 @@ #include <drm/drm_crtc.h> #include <drm/drm_fb_helper.h> #include <drm/drm_fourcc.h> +#include <drm/drm_prime.h> #include "msm_drv.h" #include "msm_gem.h" #include "msm_kms.h" -extern int msm_gem_mmap_obj(struct drm_gem_object *obj, - struct vm_area_struct *vma); static int msm_fbdev_mmap(struct fb_info *info, struct vm_area_struct *vma); /* @@ -48,15 +47,8 @@ static int msm_fbdev_mmap(struct fb_info *info, struct vm_area_struct *vma) struct drm_fb_helper *helper = (struct drm_fb_helper *)info->par; struct msm_fbdev *fbdev = to_msm_fbdev(helper); struct drm_gem_object *bo = msm_framebuffer_bo(fbdev->fb, 0); - int ret = 0; - ret = drm_gem_mmap_obj(bo, bo->size, vma); - if (ret) { - pr_err("%s:drm_gem_mmap_obj fail\n", __func__); - return ret; - } - - return msm_gem_mmap_obj(bo, vma); + return drm_gem_prime_mmap(bo, vma); } static int msm_fbdev_create(struct drm_fb_helper *helper, @@ -169,7 +161,7 @@ struct drm_fb_helper *msm_fbdev_init(struct drm_device *dev) } /* the fw fb could be anywhere in memory */ - ret = drm_aperture_remove_framebuffers(false, "msm"); + ret = drm_aperture_remove_framebuffers(false, dev->driver); if (ret) goto fini; diff --git a/drivers/gpu/drm/msm/msm_fence.c b/drivers/gpu/drm/msm/msm_fence.c index cd59a5918038..f2cece542c3f 100644 --- a/drivers/gpu/drm/msm/msm_fence.c +++ b/drivers/gpu/drm/msm/msm_fence.c @@ -11,7 +11,8 @@ struct msm_fence_context * -msm_fence_context_alloc(struct drm_device *dev, const char *name) +msm_fence_context_alloc(struct drm_device *dev, volatile uint32_t *fenceptr, + const char *name) { struct msm_fence_context *fctx; @@ -22,7 +23,7 @@ msm_fence_context_alloc(struct drm_device *dev, const char *name) fctx->dev = dev; strncpy(fctx->name, name, sizeof(fctx->name)); fctx->context = dma_fence_context_alloc(1); - init_waitqueue_head(&fctx->event); + fctx->fenceptr = fenceptr; spin_lock_init(&fctx->spinlock); return fctx; @@ -35,46 +36,12 @@ void msm_fence_context_free(struct msm_fence_context *fctx) static inline bool fence_completed(struct msm_fence_context *fctx, uint32_t fence) { - return (int32_t)(fctx->completed_fence - fence) >= 0; -} - -/* legacy path for WAIT_FENCE ioctl: */ -int msm_wait_fence(struct msm_fence_context *fctx, uint32_t fence, - ktime_t *timeout, bool interruptible) -{ - int ret; - - if (fence > fctx->last_fence) { - DRM_ERROR_RATELIMITED("%s: waiting on invalid fence: %u (of %u)\n", - fctx->name, fence, fctx->last_fence); - return -EINVAL; - } - - if (!timeout) { - /* no-wait: */ - ret = fence_completed(fctx, fence) ? 0 : -EBUSY; - } else { - unsigned long remaining_jiffies = timeout_to_jiffies(timeout); - - if (interruptible) - ret = wait_event_interruptible_timeout(fctx->event, - fence_completed(fctx, fence), - remaining_jiffies); - else - ret = wait_event_timeout(fctx->event, - fence_completed(fctx, fence), - remaining_jiffies); - - if (ret == 0) { - DBG("timeout waiting for fence: %u (completed: %u)", - fence, fctx->completed_fence); - ret = -ETIMEDOUT; - } else if (ret != -ERESTARTSYS) { - ret = 0; - } - } - - return ret; + /* + * Note: Check completed_fence first, as fenceptr is in a write-combine + * mapping, so it will be more expensive to read. + */ + return (int32_t)(fctx->completed_fence - fence) >= 0 || + (int32_t)(*fctx->fenceptr - fence) >= 0; } /* called from workqueue */ @@ -83,8 +50,6 @@ void msm_update_fence(struct msm_fence_context *fctx, uint32_t fence) spin_lock(&fctx->spinlock); fctx->completed_fence = max(fence, fctx->completed_fence); spin_unlock(&fctx->spinlock); - - wake_up_all(&fctx->event); } struct msm_fence { diff --git a/drivers/gpu/drm/msm/msm_fence.h b/drivers/gpu/drm/msm/msm_fence.h index 2d9af66dcca5..4783db528bcc 100644 --- a/drivers/gpu/drm/msm/msm_fence.h +++ b/drivers/gpu/drm/msm/msm_fence.h @@ -9,23 +9,53 @@ #include "msm_drv.h" +/** + * struct msm_fence_context - fence context for gpu + * + * Each ringbuffer has a single fence context, with the GPU writing an + * incrementing fence seqno at the end of each submit + */ struct msm_fence_context { struct drm_device *dev; + /** name: human readable name for fence timeline */ char name[32]; + /** context: see dma_fence_context_alloc() */ unsigned context; - /* last_fence == completed_fence --> no pending work */ - uint32_t last_fence; /* last assigned fence */ - uint32_t completed_fence; /* last completed fence */ - wait_queue_head_t event; + + /** + * last_fence: + * + * Last assigned fence, incremented each time a fence is created + * on this fence context. If last_fence == completed_fence, + * there is no remaining pending work + */ + uint32_t last_fence; + + /** + * completed_fence: + * + * The last completed fence, updated from the CPU after interrupt + * from GPU + */ + uint32_t completed_fence; + + /** + * fenceptr: + * + * The address that the GPU directly writes with completed fence + * seqno. This can be ahead of completed_fence. We can peek at + * this to see if a fence has already signaled but the CPU hasn't + * gotten around to handling the irq and updating completed_fence + */ + volatile uint32_t *fenceptr; + spinlock_t spinlock; }; struct msm_fence_context * msm_fence_context_alloc(struct drm_device *dev, - const char *name); + volatile uint32_t *fenceptr, const char *name); void msm_fence_context_free(struct msm_fence_context *fctx); -int msm_wait_fence(struct msm_fence_context *fctx, uint32_t fence, - ktime_t *timeout, bool interruptible); void msm_update_fence(struct msm_fence_context *fctx, uint32_t fence); struct dma_fence * msm_fence_alloc(struct msm_fence_context *fctx); diff --git a/drivers/gpu/drm/msm/msm_gem.c b/drivers/gpu/drm/msm/msm_gem.c index 1e8a971a86f2..22308a1b66fc 100644 --- a/drivers/gpu/drm/msm/msm_gem.c +++ b/drivers/gpu/drm/msm/msm_gem.c @@ -131,7 +131,6 @@ static struct page **get_pages(struct drm_gem_object *obj) if (msm_obj->flags & (MSM_BO_WC|MSM_BO_UNCACHED)) sync_for_device(msm_obj); - GEM_WARN_ON(msm_obj->active_count); update_inactive(msm_obj); } @@ -218,31 +217,6 @@ static pgprot_t msm_gem_pgprot(struct msm_gem_object *msm_obj, pgprot_t prot) return prot; } -int msm_gem_mmap_obj(struct drm_gem_object *obj, - struct vm_area_struct *vma) -{ - struct msm_gem_object *msm_obj = to_msm_bo(obj); - - vma->vm_flags &= ~VM_PFNMAP; - vma->vm_flags |= VM_MIXEDMAP; - vma->vm_page_prot = msm_gem_pgprot(msm_obj, vm_get_page_prot(vma->vm_flags)); - - return 0; -} - -int msm_gem_mmap(struct file *filp, struct vm_area_struct *vma) -{ - int ret; - - ret = drm_gem_mmap(filp, vma); - if (ret) { - DBG("mmap failed: %d", ret); - return ret; - } - - return msm_gem_mmap_obj(vma->vm_private_data, vma); -} - static vm_fault_t msm_gem_fault(struct vm_fault *vmf) { struct vm_area_struct *vma = vmf->vma; @@ -804,41 +778,6 @@ void msm_gem_vunmap(struct drm_gem_object *obj) msm_obj->vaddr = NULL; } -/* must be called before _move_to_active().. */ -int msm_gem_sync_object(struct drm_gem_object *obj, - struct msm_fence_context *fctx, bool exclusive) -{ - struct dma_resv_list *fobj; - struct dma_fence *fence; - int i, ret; - - fobj = dma_resv_shared_list(obj->resv); - if (!fobj || (fobj->shared_count == 0)) { - fence = dma_resv_excl_fence(obj->resv); - /* don't need to wait on our own fences, since ring is fifo */ - if (fence && (fence->context != fctx->context)) { - ret = dma_fence_wait(fence, true); - if (ret) - return ret; - } - } - - if (!exclusive || !fobj) - return 0; - - for (i = 0; i < fobj->shared_count; i++) { - fence = rcu_dereference_protected(fobj->shared[i], - dma_resv_held(obj->resv)); - if (fence->context != fctx->context) { - ret = dma_fence_wait(fence, true); - if (ret) - return ret; - } - } - - return 0; -} - void msm_gem_active_get(struct drm_gem_object *obj, struct msm_gpu *gpu) { struct msm_gem_object *msm_obj = to_msm_bo(obj); @@ -848,14 +787,12 @@ void msm_gem_active_get(struct drm_gem_object *obj, struct msm_gpu *gpu) GEM_WARN_ON(!msm_gem_is_locked(obj)); GEM_WARN_ON(msm_obj->madv != MSM_MADV_WILLNEED); GEM_WARN_ON(msm_obj->dontneed); - GEM_WARN_ON(!msm_obj->sgt); if (msm_obj->active_count++ == 0) { mutex_lock(&priv->mm_lock); if (msm_obj->evictable) mark_unevictable(msm_obj); - list_del(&msm_obj->mm_list); - list_add_tail(&msm_obj->mm_list, &gpu->active_list); + list_move_tail(&msm_obj->mm_list, &gpu->active_list); mutex_unlock(&priv->mm_lock); } } @@ -1062,7 +999,7 @@ void msm_gem_describe_objects(struct list_head *list, struct seq_file *m) } #endif -/* don't call directly! Use drm_gem_object_put_locked() and friends */ +/* don't call directly! Use drm_gem_object_put() */ void msm_gem_free_object(struct drm_gem_object *obj) { struct msm_gem_object *msm_obj = to_msm_bo(obj); @@ -1114,6 +1051,17 @@ void msm_gem_free_object(struct drm_gem_object *obj) kfree(msm_obj); } +static int msm_gem_object_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma) +{ + struct msm_gem_object *msm_obj = to_msm_bo(obj); + + vma->vm_flags &= ~VM_PFNMAP; + vma->vm_flags |= VM_MIXEDMAP | VM_DONTEXPAND; + vma->vm_page_prot = msm_gem_pgprot(msm_obj, vm_get_page_prot(vma->vm_flags)); + + return 0; +} + /* convenience method to construct a GEM buffer object, and userspace handle */ int msm_gem_new_handle(struct drm_device *dev, struct drm_file *file, uint32_t size, uint32_t flags, uint32_t *handle, @@ -1151,6 +1099,7 @@ static const struct drm_gem_object_funcs msm_gem_object_funcs = { .get_sg_table = msm_gem_prime_get_sg_table, .vmap = msm_gem_prime_vmap, .vunmap = msm_gem_prime_vunmap, + .mmap = msm_gem_object_mmap, .vm_ops = &vm_ops, }; @@ -1183,7 +1132,6 @@ static int msm_gem_new_impl(struct drm_device *dev, msm_obj->flags = flags; msm_obj->madv = MSM_MADV_WILLNEED; - INIT_LIST_HEAD(&msm_obj->submit_entry); INIT_LIST_HEAD(&msm_obj->vmas); *obj = &msm_obj->base; @@ -1192,8 +1140,7 @@ static int msm_gem_new_impl(struct drm_device *dev, return 0; } -static struct drm_gem_object *_msm_gem_new(struct drm_device *dev, - uint32_t size, uint32_t flags, bool struct_mutex_locked) +struct drm_gem_object *msm_gem_new(struct drm_device *dev, uint32_t size, uint32_t flags) { struct msm_drm_private *priv = dev->dev_private; struct msm_gem_object *msm_obj; @@ -1280,26 +1227,10 @@ static struct drm_gem_object *_msm_gem_new(struct drm_device *dev, return obj; fail: - if (struct_mutex_locked) { - drm_gem_object_put_locked(obj); - } else { - drm_gem_object_put(obj); - } + drm_gem_object_put(obj); return ERR_PTR(ret); } -struct drm_gem_object *msm_gem_new_locked(struct drm_device *dev, - uint32_t size, uint32_t flags) -{ - return _msm_gem_new(dev, size, flags, true); -} - -struct drm_gem_object *msm_gem_new(struct drm_device *dev, - uint32_t size, uint32_t flags) -{ - return _msm_gem_new(dev, size, flags, false); -} - struct drm_gem_object *msm_gem_import(struct drm_device *dev, struct dma_buf *dmabuf, struct sg_table *sgt) { @@ -1358,12 +1289,12 @@ fail: return ERR_PTR(ret); } -static void *_msm_gem_kernel_new(struct drm_device *dev, uint32_t size, +void *msm_gem_kernel_new(struct drm_device *dev, uint32_t size, uint32_t flags, struct msm_gem_address_space *aspace, - struct drm_gem_object **bo, uint64_t *iova, bool locked) + struct drm_gem_object **bo, uint64_t *iova) { void *vaddr; - struct drm_gem_object *obj = _msm_gem_new(dev, size, flags, locked); + struct drm_gem_object *obj = msm_gem_new(dev, size, flags); int ret; if (IS_ERR(obj)) @@ -1387,42 +1318,21 @@ static void *_msm_gem_kernel_new(struct drm_device *dev, uint32_t size, return vaddr; err: - if (locked) - drm_gem_object_put_locked(obj); - else - drm_gem_object_put(obj); + drm_gem_object_put(obj); return ERR_PTR(ret); } -void *msm_gem_kernel_new(struct drm_device *dev, uint32_t size, - uint32_t flags, struct msm_gem_address_space *aspace, - struct drm_gem_object **bo, uint64_t *iova) -{ - return _msm_gem_kernel_new(dev, size, flags, aspace, bo, iova, false); -} - -void *msm_gem_kernel_new_locked(struct drm_device *dev, uint32_t size, - uint32_t flags, struct msm_gem_address_space *aspace, - struct drm_gem_object **bo, uint64_t *iova) -{ - return _msm_gem_kernel_new(dev, size, flags, aspace, bo, iova, true); -} - void msm_gem_kernel_put(struct drm_gem_object *bo, - struct msm_gem_address_space *aspace, bool locked) + struct msm_gem_address_space *aspace) { if (IS_ERR_OR_NULL(bo)) return; msm_gem_put_vaddr(bo); msm_gem_unpin_iova(bo, aspace); - - if (locked) - drm_gem_object_put_locked(bo); - else - drm_gem_object_put(bo); + drm_gem_object_put(bo); } void msm_gem_object_set_name(struct drm_gem_object *bo, const char *fmt, ...) diff --git a/drivers/gpu/drm/msm/msm_gem.h b/drivers/gpu/drm/msm/msm_gem.h index 405f8411e395..e39a8e7ad843 100644 --- a/drivers/gpu/drm/msm/msm_gem.h +++ b/drivers/gpu/drm/msm/msm_gem.h @@ -9,6 +9,7 @@ #include <linux/kref.h> #include <linux/dma-resv.h> +#include "drm/gpu_scheduler.h" #include "msm_drv.h" /* Make all GEM related WARN_ON()s ratelimited.. when things go wrong they @@ -87,13 +88,6 @@ struct msm_gem_object { */ struct list_head mm_list; - /* Transiently in the process of submit ioctl, objects associated - * with the submit are on submit->bo_list.. this only lasts for - * the duration of the ioctl, so one bo can never be on multiple - * submit lists. - */ - struct list_head submit_entry; - struct page **pages; struct sg_table *sgt; void *vaddr; @@ -112,9 +106,6 @@ struct msm_gem_object { }; #define to_msm_bo(x) container_of(x, struct msm_gem_object, base) -int msm_gem_mmap_obj(struct drm_gem_object *obj, - struct vm_area_struct *vma); -int msm_gem_mmap(struct file *filp, struct vm_area_struct *vma); uint64_t msm_gem_mmap_offset(struct drm_gem_object *obj); int msm_gem_get_iova(struct drm_gem_object *obj, struct msm_gem_address_space *aspace, uint64_t *iova); @@ -143,8 +134,6 @@ void *msm_gem_get_vaddr_active(struct drm_gem_object *obj); void msm_gem_put_vaddr_locked(struct drm_gem_object *obj); void msm_gem_put_vaddr(struct drm_gem_object *obj); int msm_gem_madvise(struct drm_gem_object *obj, unsigned madv); -int msm_gem_sync_object(struct drm_gem_object *obj, - struct msm_fence_context *fctx, bool exclusive); void msm_gem_active_get(struct drm_gem_object *obj, struct msm_gpu *gpu); void msm_gem_active_put(struct drm_gem_object *obj); int msm_gem_cpu_prep(struct drm_gem_object *obj, uint32_t op, ktime_t *timeout); @@ -154,16 +143,11 @@ int msm_gem_new_handle(struct drm_device *dev, struct drm_file *file, uint32_t size, uint32_t flags, uint32_t *handle, char *name); struct drm_gem_object *msm_gem_new(struct drm_device *dev, uint32_t size, uint32_t flags); -struct drm_gem_object *msm_gem_new_locked(struct drm_device *dev, - uint32_t size, uint32_t flags); void *msm_gem_kernel_new(struct drm_device *dev, uint32_t size, uint32_t flags, struct msm_gem_address_space *aspace, struct drm_gem_object **bo, uint64_t *iova); -void *msm_gem_kernel_new_locked(struct drm_device *dev, uint32_t size, - uint32_t flags, struct msm_gem_address_space *aspace, - struct drm_gem_object **bo, uint64_t *iova); void msm_gem_kernel_put(struct drm_gem_object *bo, - struct msm_gem_address_space *aspace, bool locked); + struct msm_gem_address_space *aspace); struct drm_gem_object *msm_gem_import(struct drm_device *dev, struct dma_buf *dmabuf, struct sg_table *sgt); __printf(2, 3) @@ -313,19 +297,34 @@ void msm_gem_vunmap(struct drm_gem_object *obj); /* Created per submit-ioctl, to track bo's and cmdstream bufs, etc, * associated with the cmdstream submission for synchronization (and - * make it easier to unwind when things go wrong, etc). This only - * lasts for the duration of the submit-ioctl. + * make it easier to unwind when things go wrong, etc). */ struct msm_gem_submit { + struct drm_sched_job base; struct kref ref; struct drm_device *dev; struct msm_gpu *gpu; struct msm_gem_address_space *aspace; struct list_head node; /* node in ring submit list */ - struct list_head bo_list; struct ww_acquire_ctx ticket; uint32_t seqno; /* Sequence number of the submit on the ring */ - struct dma_fence *fence; + + /* Array of struct dma_fence * to block on before submitting this job. + */ + struct xarray deps; + unsigned long last_dep; + + /* Hw fence, which is created when the scheduler executes the job, and + * is signaled when the hw finishes (via seqno write from cmdstream) + */ + struct dma_fence *hw_fence; + + /* Userspace visible fence, which is signaled by the scheduler after + * the hw_fence is signaled. + */ + struct dma_fence *user_fence; + + int fence_id; /* key into queue->fence_idr */ struct msm_gpu_submitqueue *queue; struct pid *pid; /* submitting process */ bool fault_dumped; /* Limit devcoredump dumping to one per submit */ @@ -355,6 +354,11 @@ struct msm_gem_submit { } bos[]; }; +static inline struct msm_gem_submit *to_msm_submit(struct drm_sched_job *job) +{ + return container_of(job, struct msm_gem_submit, base); +} + void __msm_gem_submit_destroy(struct kref *kref); static inline void msm_gem_submit_get(struct msm_gem_submit *submit) @@ -367,6 +371,8 @@ static inline void msm_gem_submit_put(struct msm_gem_submit *submit) kref_put(&submit->ref, __msm_gem_submit_destroy); } +void msm_submit_retire(struct msm_gem_submit *submit); + /* helper to determine of a buffer in submit should be dumped, used for both * devcoredump and debugfs cmdstream dumping: */ diff --git a/drivers/gpu/drm/msm/msm_gem_prime.c b/drivers/gpu/drm/msm/msm_gem_prime.c index 9880348a4dc7..fc94e061d6a7 100644 --- a/drivers/gpu/drm/msm/msm_gem_prime.c +++ b/drivers/gpu/drm/msm/msm_gem_prime.c @@ -39,17 +39,6 @@ void msm_gem_prime_vunmap(struct drm_gem_object *obj, struct dma_buf_map *map) msm_gem_put_vaddr(obj); } -int msm_gem_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma) -{ - int ret; - - ret = drm_gem_mmap_obj(obj, obj->size, vma); - if (ret < 0) - return ret; - - return msm_gem_mmap_obj(vma->vm_private_data, vma); -} - struct drm_gem_object *msm_gem_prime_import_sg_table(struct drm_device *dev, struct dma_buf_attachment *attach, struct sg_table *sg) { diff --git a/drivers/gpu/drm/msm/msm_gem_submit.c b/drivers/gpu/drm/msm/msm_gem_submit.c index 44f84bfd0c0e..fdc5367aecaa 100644 --- a/drivers/gpu/drm/msm/msm_gem_submit.c +++ b/drivers/gpu/drm/msm/msm_gem_submit.c @@ -23,8 +23,9 @@ /* make sure these don't conflict w/ MSM_SUBMIT_BO_x */ #define BO_VALID 0x8000 /* is current addr in cmdstream correct/valid? */ -#define BO_LOCKED 0x4000 -#define BO_PINNED 0x2000 +#define BO_LOCKED 0x4000 /* obj lock is held */ +#define BO_ACTIVE 0x2000 /* active refcnt is held */ +#define BO_PINNED 0x1000 /* obj is pinned and on active list */ static struct msm_gem_submit *submit_create(struct drm_device *dev, struct msm_gpu *gpu, @@ -32,32 +33,37 @@ static struct msm_gem_submit *submit_create(struct drm_device *dev, uint32_t nr_cmds) { struct msm_gem_submit *submit; - uint64_t sz = struct_size(submit, bos, nr_bos) + - ((u64)nr_cmds * sizeof(submit->cmd[0])); + uint64_t sz; + int ret; + + sz = struct_size(submit, bos, nr_bos) + + ((u64)nr_cmds * sizeof(submit->cmd[0])); if (sz > SIZE_MAX) - return NULL; + return ERR_PTR(-ENOMEM); - submit = kmalloc(sz, GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY); + submit = kzalloc(sz, GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY); if (!submit) - return NULL; + return ERR_PTR(-ENOMEM); + + ret = drm_sched_job_init(&submit->base, &queue->entity, queue); + if (ret) { + kfree(submit); + return ERR_PTR(ret); + } + + xa_init_flags(&submit->deps, XA_FLAGS_ALLOC); kref_init(&submit->ref); submit->dev = dev; submit->aspace = queue->ctx->aspace; submit->gpu = gpu; - submit->fence = NULL; submit->cmd = (void *)&submit->bos[nr_bos]; submit->queue = queue; - submit->ring = gpu->rb[queue->prio]; + submit->ring = gpu->rb[queue->ring_nr]; submit->fault_dumped = false; - /* initially, until copy_from_user() and bo lookup succeeds: */ - submit->nr_bos = 0; - submit->nr_cmds = 0; - INIT_LIST_HEAD(&submit->node); - INIT_LIST_HEAD(&submit->bo_list); return submit; } @@ -66,9 +72,25 @@ void __msm_gem_submit_destroy(struct kref *kref) { struct msm_gem_submit *submit = container_of(kref, struct msm_gem_submit, ref); + unsigned long index; + struct dma_fence *fence; unsigned i; - dma_fence_put(submit->fence); + if (submit->fence_id) { + mutex_lock(&submit->queue->lock); + idr_remove(&submit->queue->fence_idr, submit->fence_id); + mutex_unlock(&submit->queue->lock); + } + + xa_for_each (&submit->deps, index, fence) { + dma_fence_put(fence); + } + + xa_destroy(&submit->deps); + + dma_fence_put(submit->user_fence); + dma_fence_put(submit->hw_fence); + put_pid(submit->pid); msm_submitqueue_put(submit->queue); @@ -121,7 +143,6 @@ static int submit_lookup_objects(struct msm_gem_submit *submit, for (i = 0; i < args->nr_bos; i++) { struct drm_gem_object *obj; - struct msm_gem_object *msm_obj; /* normally use drm_gem_object_lookup(), but for bulk lookup * all under single table_lock just hit object_idr directly: @@ -133,20 +154,9 @@ static int submit_lookup_objects(struct msm_gem_submit *submit, goto out_unlock; } - msm_obj = to_msm_bo(obj); - - if (!list_empty(&msm_obj->submit_entry)) { - DRM_ERROR("handle %u at index %u already on submit list\n", - submit->bos[i].handle, i); - ret = -EINVAL; - goto out_unlock; - } - drm_gem_object_get(obj); - submit->bos[i].obj = msm_obj; - - list_add_tail(&msm_obj->submit_entry, &submit->bo_list); + submit->bos[i].obj = to_msm_bo(obj); } out_unlock: @@ -220,21 +230,34 @@ out: return ret; } -static void submit_unlock_unpin_bo(struct msm_gem_submit *submit, - int i, bool backoff) +/* Unwind bo state, according to cleanup_flags. In the success case, only + * the lock is dropped at the end of the submit (and active/pin ref is dropped + * later when the submit is retired). + */ +static void submit_cleanup_bo(struct msm_gem_submit *submit, int i, + unsigned cleanup_flags) { - struct msm_gem_object *msm_obj = submit->bos[i].obj; + struct drm_gem_object *obj = &submit->bos[i].obj->base; + unsigned flags = submit->bos[i].flags & cleanup_flags; - if (submit->bos[i].flags & BO_PINNED) - msm_gem_unpin_iova_locked(&msm_obj->base, submit->aspace); + if (flags & BO_PINNED) + msm_gem_unpin_iova_locked(obj, submit->aspace); - if (submit->bos[i].flags & BO_LOCKED) - dma_resv_unlock(msm_obj->base.resv); + if (flags & BO_ACTIVE) + msm_gem_active_put(obj); - if (backoff && !(submit->bos[i].flags & BO_VALID)) - submit->bos[i].iova = 0; + if (flags & BO_LOCKED) + dma_resv_unlock(obj->resv); - submit->bos[i].flags &= ~(BO_LOCKED | BO_PINNED); + submit->bos[i].flags &= ~cleanup_flags; +} + +static void submit_unlock_unpin_bo(struct msm_gem_submit *submit, int i) +{ + submit_cleanup_bo(submit, i, BO_PINNED | BO_ACTIVE | BO_LOCKED); + + if (!(submit->bos[i].flags & BO_VALID)) + submit->bos[i].iova = 0; } /* This is where we make sure all the bo's are reserved and pin'd: */ @@ -265,11 +288,17 @@ retry: return 0; fail: + if (ret == -EALREADY) { + DRM_ERROR("handle %u at index %u already on submit list\n", + submit->bos[i].handle, i); + ret = -EINVAL; + } + for (; i >= 0; i--) - submit_unlock_unpin_bo(submit, i, true); + submit_unlock_unpin_bo(submit, i); if (slow_locked > 0) - submit_unlock_unpin_bo(submit, slow_locked, true); + submit_unlock_unpin_bo(submit, slow_locked); if (ret == -EDEADLK) { struct msm_gem_object *msm_obj = submit->bos[contended].obj; @@ -281,6 +310,12 @@ fail: slow_locked = contended; goto retry; } + + /* Not expecting -EALREADY here, if the bo was already + * locked, we should have gotten -EALREADY already from + * the dma_resv_lock_interruptable() call. + */ + WARN_ON_ONCE(ret == -EALREADY); } return ret; @@ -291,7 +326,7 @@ static int submit_fence_sync(struct msm_gem_submit *submit, bool no_implicit) int i, ret = 0; for (i = 0; i < submit->nr_bos; i++) { - struct msm_gem_object *msm_obj = submit->bos[i].obj; + struct drm_gem_object *obj = &submit->bos[i].obj->base; bool write = submit->bos[i].flags & MSM_SUBMIT_BO_WRITE; if (!write) { @@ -300,8 +335,7 @@ static int submit_fence_sync(struct msm_gem_submit *submit, bool no_implicit) * strange place to call it. OTOH this is a * convenient can-fail point to hook it in. */ - ret = dma_resv_reserve_shared(msm_obj->base.resv, - 1); + ret = dma_resv_reserve_shared(obj->resv, 1); if (ret) return ret; } @@ -309,7 +343,7 @@ static int submit_fence_sync(struct msm_gem_submit *submit, bool no_implicit) if (no_implicit) continue; - ret = msm_gem_sync_object(&msm_obj->base, submit->ring->fctx, + ret = drm_gem_fence_array_add_implicit(&submit->deps, obj, write); if (ret) break; @@ -324,12 +358,24 @@ static int submit_pin_objects(struct msm_gem_submit *submit) submit->valid = true; + /* + * Increment active_count first, so if under memory pressure, we + * don't inadvertently evict a bo needed by the submit in order + * to pin an earlier bo in the same submit. + */ for (i = 0; i < submit->nr_bos; i++) { - struct msm_gem_object *msm_obj = submit->bos[i].obj; + struct drm_gem_object *obj = &submit->bos[i].obj->base; + + msm_gem_active_get(obj, submit->gpu); + submit->bos[i].flags |= BO_ACTIVE; + } + + for (i = 0; i < submit->nr_bos; i++) { + struct drm_gem_object *obj = &submit->bos[i].obj->base; uint64_t iova; /* if locking succeeded, pin bo: */ - ret = msm_gem_get_and_pin_iova_locked(&msm_obj->base, + ret = msm_gem_get_and_pin_iova_locked(obj, submit->aspace, &iova); if (ret) @@ -350,6 +396,20 @@ static int submit_pin_objects(struct msm_gem_submit *submit) return ret; } +static void submit_attach_object_fences(struct msm_gem_submit *submit) +{ + int i; + + for (i = 0; i < submit->nr_bos; i++) { + struct drm_gem_object *obj = &submit->bos[i].obj->base; + + if (submit->bos[i].flags & MSM_SUBMIT_BO_WRITE) + dma_resv_add_excl_fence(obj->resv, submit->user_fence); + else if (submit->bos[i].flags & MSM_SUBMIT_BO_READ) + dma_resv_add_shared_fence(obj->resv, submit->user_fence); + } +} + static int submit_bo(struct msm_gem_submit *submit, uint32_t idx, struct msm_gem_object **obj, uint64_t *iova, bool *valid) { @@ -444,18 +504,39 @@ out: return ret; } -static void submit_cleanup(struct msm_gem_submit *submit) +/* Cleanup submit at end of ioctl. In the error case, this also drops + * references, unpins, and drops active refcnt. In the non-error case, + * this is done when the submit is retired. + */ +static void submit_cleanup(struct msm_gem_submit *submit, bool error) { + unsigned cleanup_flags = BO_LOCKED; unsigned i; + if (error) + cleanup_flags |= BO_PINNED | BO_ACTIVE; + for (i = 0; i < submit->nr_bos; i++) { struct msm_gem_object *msm_obj = submit->bos[i].obj; - submit_unlock_unpin_bo(submit, i, false); - list_del_init(&msm_obj->submit_entry); - drm_gem_object_put_locked(&msm_obj->base); + submit_cleanup_bo(submit, i, cleanup_flags); + if (error) + drm_gem_object_put(&msm_obj->base); } } +void msm_submit_retire(struct msm_gem_submit *submit) +{ + int i; + + for (i = 0; i < submit->nr_bos; i++) { + struct drm_gem_object *obj = &submit->bos[i].obj->base; + + msm_gem_lock(obj); + submit_cleanup_bo(submit, i, BO_PINNED | BO_ACTIVE); + msm_gem_unlock(obj); + drm_gem_object_put(obj); + } +} struct msm_submit_post_dep { struct drm_syncobj *syncobj; @@ -463,12 +544,12 @@ struct msm_submit_post_dep { struct dma_fence_chain *chain; }; -static struct drm_syncobj **msm_wait_deps(struct drm_device *dev, - struct drm_file *file, - uint64_t in_syncobjs_addr, - uint32_t nr_in_syncobjs, - size_t syncobj_stride, - struct msm_ringbuffer *ring) +static struct drm_syncobj **msm_parse_deps(struct msm_gem_submit *submit, + struct drm_file *file, + uint64_t in_syncobjs_addr, + uint32_t nr_in_syncobjs, + size_t syncobj_stride, + struct msm_ringbuffer *ring) { struct drm_syncobj **syncobjs = NULL; struct drm_msm_gem_submit_syncobj syncobj_desc = {0}; @@ -492,7 +573,7 @@ static struct drm_syncobj **msm_wait_deps(struct drm_device *dev, } if (syncobj_desc.point && - !drm_core_check_feature(dev, DRIVER_SYNCOBJ_TIMELINE)) { + !drm_core_check_feature(submit->dev, DRIVER_SYNCOBJ_TIMELINE)) { ret = -EOPNOTSUPP; break; } @@ -507,10 +588,7 @@ static struct drm_syncobj **msm_wait_deps(struct drm_device *dev, if (ret) break; - if (!dma_fence_match_context(fence, ring->fctx->context)) - ret = dma_fence_wait(fence, true); - - dma_fence_put(fence); + ret = drm_gem_fence_array_add(&submit->deps, fence); if (ret) break; @@ -587,9 +665,7 @@ static struct msm_submit_post_dep *msm_parse_post_deps(struct drm_device *dev, break; } - post_deps[i].chain = - kmalloc(sizeof(*post_deps[i].chain), - GFP_KERNEL); + post_deps[i].chain = dma_fence_chain_alloc(); if (!post_deps[i].chain) { ret = -ENOMEM; break; @@ -606,7 +682,7 @@ static struct msm_submit_post_dep *msm_parse_post_deps(struct drm_device *dev, if (ret) { for (j = 0; j <= i; ++j) { - kfree(post_deps[j].chain); + dma_fence_chain_free(post_deps[j].chain); if (post_deps[j].syncobj) drm_syncobj_put(post_deps[j].syncobj); } @@ -643,9 +719,8 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data, struct msm_drm_private *priv = dev->dev_private; struct drm_msm_gem_submit *args = data; struct msm_file_private *ctx = file->driver_priv; - struct msm_gem_submit *submit; + struct msm_gem_submit *submit = NULL; struct msm_gpu *gpu = priv->gpu; - struct sync_file *sync_file = NULL; struct msm_gpu_submitqueue *queue; struct msm_ringbuffer *ring; struct msm_submit_post_dep *post_deps = NULL; @@ -655,6 +730,7 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data, bool has_ww_ticket = false; unsigned i; int ret, submitid; + if (!gpu) return -ENXIO; @@ -683,38 +759,59 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data, /* Get a unique identifier for the submission for logging purposes */ submitid = atomic_inc_return(&ident) - 1; - ring = gpu->rb[queue->prio]; + ring = gpu->rb[queue->ring_nr]; trace_msm_gpu_submit(pid_nr(pid), ring->id, submitid, args->nr_bos, args->nr_cmds); + ret = mutex_lock_interruptible(&queue->lock); + if (ret) + goto out_post_unlock; + + if (args->flags & MSM_SUBMIT_FENCE_FD_OUT) { + out_fence_fd = get_unused_fd_flags(O_CLOEXEC); + if (out_fence_fd < 0) { + ret = out_fence_fd; + goto out_unlock; + } + } + + submit = submit_create(dev, gpu, queue, args->nr_bos, + args->nr_cmds); + if (IS_ERR(submit)) { + ret = PTR_ERR(submit); + goto out_unlock; + } + + submit->pid = pid; + submit->ident = submitid; + + if (args->flags & MSM_SUBMIT_SUDO) + submit->in_rb = true; + if (args->flags & MSM_SUBMIT_FENCE_FD_IN) { struct dma_fence *in_fence; in_fence = sync_file_get_fence(args->fence_fd); - if (!in_fence) - return -EINVAL; - - /* - * Wait if the fence is from a foreign context, or if the fence - * array contains any fence from a foreign context. - */ - ret = 0; - if (!dma_fence_match_context(in_fence, ring->fctx->context)) - ret = dma_fence_wait(in_fence, true); + if (!in_fence) { + ret = -EINVAL; + goto out_unlock; + } - dma_fence_put(in_fence); + ret = drm_gem_fence_array_add(&submit->deps, in_fence); if (ret) - return ret; + goto out_unlock; } if (args->flags & MSM_SUBMIT_SYNCOBJ_IN) { - syncobjs_to_reset = msm_wait_deps(dev, file, - args->in_syncobjs, - args->nr_in_syncobjs, - args->syncobj_stride, ring); - if (IS_ERR(syncobjs_to_reset)) - return PTR_ERR(syncobjs_to_reset); + syncobjs_to_reset = msm_parse_deps(submit, file, + args->in_syncobjs, + args->nr_in_syncobjs, + args->syncobj_stride, ring); + if (IS_ERR(syncobjs_to_reset)) { + ret = PTR_ERR(syncobjs_to_reset); + goto out_unlock; + } } if (args->flags & MSM_SUBMIT_SYNCOBJ_OUT) { @@ -724,51 +821,17 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data, args->syncobj_stride); if (IS_ERR(post_deps)) { ret = PTR_ERR(post_deps); - goto out_post_unlock; - } - } - - ret = mutex_lock_interruptible(&dev->struct_mutex); - if (ret) - goto out_post_unlock; - - if (args->flags & MSM_SUBMIT_FENCE_FD_OUT) { - out_fence_fd = get_unused_fd_flags(O_CLOEXEC); - if (out_fence_fd < 0) { - ret = out_fence_fd; goto out_unlock; } } - submit = submit_create(dev, gpu, queue, args->nr_bos, - args->nr_cmds); - if (!submit) { - ret = -ENOMEM; - goto out_unlock; - } - - submit->pid = pid; - submit->ident = submitid; - - if (args->flags & MSM_SUBMIT_SUDO) - submit->in_rb = true; - ret = submit_lookup_objects(submit, args, file); if (ret) - goto out_pre_pm; + goto out; ret = submit_lookup_cmds(submit, args, file); if (ret) - goto out_pre_pm; - - /* - * Thanks to dev_pm_opp opp_table_lock interactions with mm->mmap_sem - * in the resume path, we need to to rpm get before we lock objs. - * Which unfortunately might involve powering up the GPU sooner than - * is necessary. But at least in the explicit fencing case, we will - * have already done all the fence waiting. - */ - pm_runtime_get_sync(&gpu->pdev->dev); + goto out; /* copy_*_user while holding a ww ticket upsets lockdep */ ww_acquire_init(&submit->ticket, &reservation_ww_class); @@ -815,47 +878,54 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data, submit->nr_cmds = i; - submit->fence = msm_fence_alloc(ring->fctx); - if (IS_ERR(submit->fence)) { - ret = PTR_ERR(submit->fence); - submit->fence = NULL; + submit->user_fence = dma_fence_get(&submit->base.s_fence->finished); + + /* + * Allocate an id which can be used by WAIT_FENCE ioctl to map back + * to the underlying fence. + */ + submit->fence_id = idr_alloc_cyclic(&queue->fence_idr, + submit->user_fence, 0, INT_MAX, GFP_KERNEL); + if (submit->fence_id < 0) { + ret = submit->fence_id = 0; + submit->fence_id = 0; goto out; } if (args->flags & MSM_SUBMIT_FENCE_FD_OUT) { - sync_file = sync_file_create(submit->fence); + struct sync_file *sync_file = sync_file_create(submit->user_fence); if (!sync_file) { ret = -ENOMEM; goto out; } + fd_install(out_fence_fd, sync_file->file); + args->fence_fd = out_fence_fd; } - msm_gpu_submit(gpu, submit); + submit_attach_object_fences(submit); - args->fence = submit->fence->seqno; + /* The scheduler owns a ref now: */ + msm_gem_submit_get(submit); - if (args->flags & MSM_SUBMIT_FENCE_FD_OUT) { - fd_install(out_fence_fd, sync_file->file); - args->fence_fd = out_fence_fd; - } + drm_sched_entity_push_job(&submit->base, &queue->entity); + + args->fence = submit->fence_id; msm_reset_syncobjs(syncobjs_to_reset, args->nr_in_syncobjs); msm_process_post_deps(post_deps, args->nr_out_syncobjs, - submit->fence); + submit->user_fence); out: - pm_runtime_put(&gpu->pdev->dev); -out_pre_pm: - submit_cleanup(submit); + submit_cleanup(submit, !!ret); if (has_ww_ticket) ww_acquire_fini(&submit->ticket); - msm_gem_submit_put(submit); out_unlock: if (ret && (out_fence_fd >= 0)) put_unused_fd(out_fence_fd); - mutex_unlock(&dev->struct_mutex); - + mutex_unlock(&queue->lock); + if (submit) + msm_gem_submit_put(submit); out_post_unlock: if (!IS_ERR_OR_NULL(post_deps)) { for (i = 0; i < args->nr_out_syncobjs; ++i) { diff --git a/drivers/gpu/drm/msm/msm_gpu.c b/drivers/gpu/drm/msm/msm_gpu.c index 0ebf7bc6ad09..8a3a592da3a4 100644 --- a/drivers/gpu/drm/msm/msm_gpu.c +++ b/drivers/gpu/drm/msm/msm_gpu.c @@ -13,8 +13,6 @@ #include <generated/utsrelease.h> #include <linux/string_helpers.h> -#include <linux/devfreq.h> -#include <linux/devfreq_cooling.h> #include <linux/devcoredump.h> #include <linux/sched/task.h> @@ -22,106 +20,6 @@ * Power Management: */ -static int msm_devfreq_target(struct device *dev, unsigned long *freq, - u32 flags) -{ - struct msm_gpu *gpu = dev_to_gpu(dev); - struct dev_pm_opp *opp; - - opp = devfreq_recommended_opp(dev, freq, flags); - - if (IS_ERR(opp)) - return PTR_ERR(opp); - - trace_msm_gpu_freq_change(dev_pm_opp_get_freq(opp)); - - if (gpu->funcs->gpu_set_freq) - gpu->funcs->gpu_set_freq(gpu, opp); - else - clk_set_rate(gpu->core_clk, *freq); - - dev_pm_opp_put(opp); - - return 0; -} - -static int msm_devfreq_get_dev_status(struct device *dev, - struct devfreq_dev_status *status) -{ - struct msm_gpu *gpu = dev_to_gpu(dev); - ktime_t time; - - if (gpu->funcs->gpu_get_freq) - status->current_frequency = gpu->funcs->gpu_get_freq(gpu); - else - status->current_frequency = clk_get_rate(gpu->core_clk); - - status->busy_time = gpu->funcs->gpu_busy(gpu); - - time = ktime_get(); - status->total_time = ktime_us_delta(time, gpu->devfreq.time); - gpu->devfreq.time = time; - - return 0; -} - -static int msm_devfreq_get_cur_freq(struct device *dev, unsigned long *freq) -{ - struct msm_gpu *gpu = dev_to_gpu(dev); - - if (gpu->funcs->gpu_get_freq) - *freq = gpu->funcs->gpu_get_freq(gpu); - else - *freq = clk_get_rate(gpu->core_clk); - - return 0; -} - -static struct devfreq_dev_profile msm_devfreq_profile = { - .polling_ms = 10, - .target = msm_devfreq_target, - .get_dev_status = msm_devfreq_get_dev_status, - .get_cur_freq = msm_devfreq_get_cur_freq, -}; - -static void msm_devfreq_init(struct msm_gpu *gpu) -{ - /* We need target support to do devfreq */ - if (!gpu->funcs->gpu_busy) - return; - - msm_devfreq_profile.initial_freq = gpu->fast_rate; - - /* - * Don't set the freq_table or max_state and let devfreq build the table - * from OPP - * After a deferred probe, these may have be left to non-zero values, - * so set them back to zero before creating the devfreq device - */ - msm_devfreq_profile.freq_table = NULL; - msm_devfreq_profile.max_state = 0; - - gpu->devfreq.devfreq = devm_devfreq_add_device(&gpu->pdev->dev, - &msm_devfreq_profile, DEVFREQ_GOV_SIMPLE_ONDEMAND, - NULL); - - if (IS_ERR(gpu->devfreq.devfreq)) { - DRM_DEV_ERROR(&gpu->pdev->dev, "Couldn't initialize GPU devfreq\n"); - gpu->devfreq.devfreq = NULL; - return; - } - - devfreq_suspend_device(gpu->devfreq.devfreq); - - gpu->cooling = of_devfreq_cooling_register(gpu->pdev->dev.of_node, - gpu->devfreq.devfreq); - if (IS_ERR(gpu->cooling)) { - DRM_DEV_ERROR(&gpu->pdev->dev, - "Couldn't register GPU cooling device\n"); - gpu->cooling = NULL; - } -} - static int enable_pwrrail(struct msm_gpu *gpu) { struct drm_device *dev = gpu->dev; @@ -196,14 +94,6 @@ static int disable_axi(struct msm_gpu *gpu) return 0; } -void msm_gpu_resume_devfreq(struct msm_gpu *gpu) -{ - gpu->devfreq.busy_cycles = 0; - gpu->devfreq.time = ktime_get(); - - devfreq_resume_device(gpu->devfreq.devfreq); -} - int msm_gpu_pm_resume(struct msm_gpu *gpu) { int ret; @@ -223,7 +113,7 @@ int msm_gpu_pm_resume(struct msm_gpu *gpu) if (ret) return ret; - msm_gpu_resume_devfreq(gpu); + msm_devfreq_resume(gpu); gpu->needs_hw_init = true; @@ -237,7 +127,7 @@ int msm_gpu_pm_suspend(struct msm_gpu *gpu) DBG("%s", gpu->name); trace_msm_gpu_suspend(0); - devfreq_suspend_device(gpu->devfreq.devfreq); + msm_devfreq_suspend(gpu); ret = disable_axi(gpu); if (ret) @@ -278,16 +168,18 @@ static void update_fences(struct msm_gpu *gpu, struct msm_ringbuffer *ring, uint32_t fence) { struct msm_gem_submit *submit; + unsigned long flags; - spin_lock(&ring->submit_lock); + spin_lock_irqsave(&ring->submit_lock, flags); list_for_each_entry(submit, &ring->submits, node) { if (submit->seqno > fence) break; msm_update_fence(submit->ring->fctx, - submit->fence->seqno); + submit->hw_fence->seqno); + dma_fence_signal(submit->hw_fence); } - spin_unlock(&ring->submit_lock); + spin_unlock_irqrestore(&ring->submit_lock, flags); } #ifdef CONFIG_DEV_COREDUMP @@ -443,15 +335,16 @@ static struct msm_gem_submit * find_submit(struct msm_ringbuffer *ring, uint32_t fence) { struct msm_gem_submit *submit; + unsigned long flags; - spin_lock(&ring->submit_lock); + spin_lock_irqsave(&ring->submit_lock, flags); list_for_each_entry(submit, &ring->submits, node) { if (submit->seqno == fence) { - spin_unlock(&ring->submit_lock); + spin_unlock_irqrestore(&ring->submit_lock, flags); return submit; } } - spin_unlock(&ring->submit_lock); + spin_unlock_irqrestore(&ring->submit_lock, flags); return NULL; } @@ -487,10 +380,6 @@ static void recover_worker(struct kthread_work *work) put_task_struct(task); } - /* msm_rd_dump_submit() needs bo locked to dump: */ - for (i = 0; i < submit->nr_bos; i++) - msm_gem_lock(&submit->bos[i].obj->base); - if (comm && cmd) { DRM_DEV_ERROR(dev->dev, "%s: offending task: %s (%s)\n", gpu->name, comm, cmd); @@ -500,9 +389,6 @@ static void recover_worker(struct kthread_work *work) } else { msm_rd_dump_submit(priv->hangrd, submit, NULL); } - - for (i = 0; i < submit->nr_bos; i++) - msm_gem_unlock(&submit->bos[i].obj->base); } /* Record the crash state */ @@ -547,11 +433,12 @@ static void recover_worker(struct kthread_work *work) */ for (i = 0; i < gpu->nr_rings; i++) { struct msm_ringbuffer *ring = gpu->rb[i]; + unsigned long flags; - spin_lock(&ring->submit_lock); + spin_lock_irqsave(&ring->submit_lock, flags); list_for_each_entry(submit, &ring->submits, node) gpu->funcs->submit(gpu, submit); - spin_unlock(&ring->submit_lock); + spin_unlock_irqrestore(&ring->submit_lock, flags); } } @@ -641,7 +528,7 @@ static void hangcheck_handler(struct timer_list *t) hangcheck_timer_reset(gpu); /* workaround for missing irq: */ - kthread_queue_work(gpu->worker, &gpu->retire_work); + msm_gpu_retire(gpu); } /* @@ -752,7 +639,7 @@ static void retire_submit(struct msm_gpu *gpu, struct msm_ringbuffer *ring, int index = submit->seqno % MSM_GPU_SUBMIT_STATS_COUNT; volatile struct msm_gpu_submit_stats *stats; u64 elapsed, clock = 0; - int i; + unsigned long flags; stats = &ring->memptrs->stats[index]; /* Convert 19.2Mhz alwayson ticks to nanoseconds for elapsed time */ @@ -768,22 +655,22 @@ static void retire_submit(struct msm_gpu *gpu, struct msm_ringbuffer *ring, trace_msm_gpu_submit_retired(submit, elapsed, clock, stats->alwayson_start, stats->alwayson_end); - for (i = 0; i < submit->nr_bos; i++) { - struct drm_gem_object *obj = &submit->bos[i].obj->base; - - msm_gem_lock(obj); - msm_gem_active_put(obj); - msm_gem_unpin_iova_locked(obj, submit->aspace); - msm_gem_unlock(obj); - drm_gem_object_put(obj); - } + msm_submit_retire(submit); pm_runtime_mark_last_busy(&gpu->pdev->dev); pm_runtime_put_autosuspend(&gpu->pdev->dev); - spin_lock(&ring->submit_lock); + spin_lock_irqsave(&ring->submit_lock, flags); list_del(&submit->node); - spin_unlock(&ring->submit_lock); + spin_unlock_irqrestore(&ring->submit_lock, flags); + + /* Update devfreq on transition from active->idle: */ + mutex_lock(&gpu->active_lock); + gpu->active_submits--; + WARN_ON(gpu->active_submits < 0); + if (!gpu->active_submits) + msm_devfreq_idle(gpu); + mutex_unlock(&gpu->active_lock); msm_gem_submit_put(submit); } @@ -798,18 +685,19 @@ static void retire_submits(struct msm_gpu *gpu) while (true) { struct msm_gem_submit *submit = NULL; + unsigned long flags; - spin_lock(&ring->submit_lock); + spin_lock_irqsave(&ring->submit_lock, flags); submit = list_first_entry_or_null(&ring->submits, struct msm_gem_submit, node); - spin_unlock(&ring->submit_lock); + spin_unlock_irqrestore(&ring->submit_lock, flags); /* * If no submit, we are done. If submit->fence hasn't * been signalled, then later submits are not signalled * either, so we are also done. */ - if (submit && dma_fence_is_signaled(submit->fence)) { + if (submit && dma_fence_is_signaled(submit->hw_fence)) { retire_submit(gpu, ring, submit); } else { break; @@ -821,10 +709,6 @@ static void retire_submits(struct msm_gpu *gpu) static void retire_worker(struct kthread_work *work) { struct msm_gpu *gpu = container_of(work, struct msm_gpu, retire_work); - int i; - - for (i = 0; i < gpu->nr_rings; i++) - update_fences(gpu, gpu->rb[i], gpu->rb[i]->memptrs->fence); retire_submits(gpu); } @@ -832,6 +716,11 @@ static void retire_worker(struct kthread_work *work) /* call from irq handler to schedule work to retire bo's */ void msm_gpu_retire(struct msm_gpu *gpu) { + int i; + + for (i = 0; i < gpu->nr_rings; i++) + update_fences(gpu, gpu->rb[i], gpu->rb[i]->memptrs->fence); + kthread_queue_work(gpu->worker, &gpu->retire_work); update_sw_cntrs(gpu); } @@ -842,7 +731,7 @@ void msm_gpu_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit) struct drm_device *dev = gpu->dev; struct msm_drm_private *priv = dev->dev_private; struct msm_ringbuffer *ring = submit->ring; - int i; + unsigned long flags; WARN_ON(!mutex_is_locked(&dev->struct_mutex)); @@ -856,32 +745,22 @@ void msm_gpu_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit) update_sw_cntrs(gpu); - for (i = 0; i < submit->nr_bos; i++) { - struct msm_gem_object *msm_obj = submit->bos[i].obj; - struct drm_gem_object *drm_obj = &msm_obj->base; - uint64_t iova; - - /* submit takes a reference to the bo and iova until retired: */ - drm_gem_object_get(&msm_obj->base); - msm_gem_get_and_pin_iova_locked(&msm_obj->base, submit->aspace, &iova); - - if (submit->bos[i].flags & MSM_SUBMIT_BO_WRITE) - dma_resv_add_excl_fence(drm_obj->resv, submit->fence); - else if (submit->bos[i].flags & MSM_SUBMIT_BO_READ) - dma_resv_add_shared_fence(drm_obj->resv, submit->fence); - - msm_gem_active_get(drm_obj, gpu); - } - /* * ring->submits holds a ref to the submit, to deal with the case * that a submit completes before msm_ioctl_gem_submit() returns. */ msm_gem_submit_get(submit); - spin_lock(&ring->submit_lock); + spin_lock_irqsave(&ring->submit_lock, flags); list_add_tail(&submit->node, &ring->submits); - spin_unlock(&ring->submit_lock); + spin_unlock_irqrestore(&ring->submit_lock, flags); + + /* Update devfreq on transition from idle->active: */ + mutex_lock(&gpu->active_lock); + if (!gpu->active_submits) + msm_devfreq_active(gpu); + gpu->active_submits++; + mutex_unlock(&gpu->active_lock); gpu->funcs->submit(gpu, submit); priv->lastctx = submit->queue->ctx; @@ -968,6 +847,7 @@ int msm_gpu_init(struct drm_device *drm, struct platform_device *pdev, sched_set_fifo_low(gpu->worker->task); INIT_LIST_HEAD(&gpu->active_list); + mutex_init(&gpu->active_lock); kthread_init_work(&gpu->retire_work, retire_worker); kthread_init_work(&gpu->recover_work, recover_worker); kthread_init_work(&gpu->fault_work, fault_worker); @@ -1078,7 +958,7 @@ fail: gpu->rb[i] = NULL; } - msm_gem_kernel_put(gpu->memptrs_bo, gpu->aspace, false); + msm_gem_kernel_put(gpu->memptrs_bo, gpu->aspace); platform_set_drvdata(pdev, NULL); return ret; @@ -1097,7 +977,7 @@ void msm_gpu_cleanup(struct msm_gpu *gpu) gpu->rb[i] = NULL; } - msm_gem_kernel_put(gpu->memptrs_bo, gpu->aspace, false); + msm_gem_kernel_put(gpu->memptrs_bo, gpu->aspace); if (!IS_ERR_OR_NULL(gpu->aspace)) { gpu->aspace->mmu->funcs->detach(gpu->aspace->mmu); @@ -1108,5 +988,5 @@ void msm_gpu_cleanup(struct msm_gpu *gpu) kthread_destroy_worker(gpu->worker); } - devfreq_cooling_unregister(gpu->cooling); + msm_devfreq_cleanup(gpu); } diff --git a/drivers/gpu/drm/msm/msm_gpu.h b/drivers/gpu/drm/msm/msm_gpu.h index ef41ec09f59c..0e4b45bff2e6 100644 --- a/drivers/gpu/drm/msm/msm_gpu.h +++ b/drivers/gpu/drm/msm/msm_gpu.h @@ -80,6 +80,40 @@ struct msm_gpu_fault_info { const char *block; }; +/** + * struct msm_gpu_devfreq - devfreq related state + */ +struct msm_gpu_devfreq { + /** devfreq: devfreq instance */ + struct devfreq *devfreq; + + /** + * busy_cycles: + * + * Used by implementation of gpu->gpu_busy() to track the last + * busy counter value, for calculating elapsed busy cycles since + * last sampling period. + */ + u64 busy_cycles; + + /** time: Time of last sampling period. */ + ktime_t time; + + /** idle_time: Time of last transition to idle: */ + ktime_t idle_time; + + /** + * idle_freq: + * + * Shadow frequency used while the GPU is idle. From the PoV of + * the devfreq governor, we are continuing to sample busyness and + * adjust frequency while the GPU is idle, but we use this shadow + * value as the GPU is actually clamped to minimum frequency while + * it is inactive. + */ + unsigned long idle_freq; +}; + struct msm_gpu { const char *name; struct drm_device *dev; @@ -109,6 +143,19 @@ struct msm_gpu { */ struct list_head active_list; + /** + * active_submits: + * + * The number of submitted but not yet retired submits, used to + * determine transitions between active and idle. + * + * Protected by lock + */ + int active_submits; + + /** lock: protects active_submits and idle/active transitions */ + struct mutex active_lock; + /* does gpu need hw_init? */ bool needs_hw_init; @@ -151,11 +198,7 @@ struct msm_gpu { struct drm_gem_object *memptrs_bo; - struct { - struct devfreq *devfreq; - u64 busy_cycles; - ktime_t time; - } devfreq; + struct msm_gpu_devfreq devfreq; uint32_t suspend_count; @@ -207,14 +250,90 @@ struct msm_gpu_perfcntr { const char *name; }; +/* + * The number of priority levels provided by drm gpu scheduler. The + * DRM_SCHED_PRIORITY_KERNEL priority level is treated specially in some + * cases, so we don't use it (no need for kernel generated jobs). + */ +#define NR_SCHED_PRIORITIES (1 + DRM_SCHED_PRIORITY_HIGH - DRM_SCHED_PRIORITY_MIN) + +/** + * msm_gpu_convert_priority - Map userspace priority to ring # and sched priority + * + * @gpu: the gpu instance + * @prio: the userspace priority level + * @ring_nr: [out] the ringbuffer the userspace priority maps to + * @sched_prio: [out] the gpu scheduler priority level which the userspace + * priority maps to + * + * With drm/scheduler providing it's own level of prioritization, our total + * number of available priority levels is (nr_rings * NR_SCHED_PRIORITIES). + * Each ring is associated with it's own scheduler instance. However, our + * UABI is that lower numerical values are higher priority. So mapping the + * single userspace priority level into ring_nr and sched_prio takes some + * care. The userspace provided priority (when a submitqueue is created) + * is mapped to ring nr and scheduler priority as such: + * + * ring_nr = userspace_prio / NR_SCHED_PRIORITIES + * sched_prio = NR_SCHED_PRIORITIES - + * (userspace_prio % NR_SCHED_PRIORITIES) - 1 + * + * This allows generations without preemption (nr_rings==1) to have some + * amount of prioritization, and provides more priority levels for gens + * that do have preemption. + */ +static inline int msm_gpu_convert_priority(struct msm_gpu *gpu, int prio, + unsigned *ring_nr, enum drm_sched_priority *sched_prio) +{ + unsigned rn, sp; + + rn = div_u64_rem(prio, NR_SCHED_PRIORITIES, &sp); + + /* invert sched priority to map to higher-numeric-is-higher- + * priority convention + */ + sp = NR_SCHED_PRIORITIES - sp - 1; + + if (rn >= gpu->nr_rings) + return -EINVAL; + + *ring_nr = rn; + *sched_prio = sp; + + return 0; +} + +/** + * A submitqueue is associated with a gl context or vk queue (or equiv) + * in userspace. + * + * @id: userspace id for the submitqueue, unique within the drm_file + * @flags: userspace flags for the submitqueue, specified at creation + * (currently unusued) + * @ring_nr: the ringbuffer used by this submitqueue, which is determined + * by the submitqueue's priority + * @faults: the number of GPU hangs associated with this submitqueue + * @ctx: the per-drm_file context associated with the submitqueue (ie. + * which set of pgtables do submits jobs associated with the + * submitqueue use) + * @node: node in the context's list of submitqueues + * @fence_idr: maps fence-id to dma_fence for userspace visible fence + * seqno, protected by submitqueue lock + * @lock: submitqueue lock + * @ref: reference count + * @entity: the submit job-queue + */ struct msm_gpu_submitqueue { int id; u32 flags; - u32 prio; + u32 ring_nr; int faults; struct msm_file_private *ctx; struct list_head node; + struct idr fence_idr; + struct mutex lock; struct kref ref; + struct drm_sched_entity entity; }; struct msm_gpu_state_bo { @@ -301,7 +420,13 @@ static inline void gpu_write64(struct msm_gpu *gpu, u32 lo, u32 hi, u64 val) int msm_gpu_pm_suspend(struct msm_gpu *gpu); int msm_gpu_pm_resume(struct msm_gpu *gpu); -void msm_gpu_resume_devfreq(struct msm_gpu *gpu); + +void msm_devfreq_init(struct msm_gpu *gpu); +void msm_devfreq_cleanup(struct msm_gpu *gpu); +void msm_devfreq_resume(struct msm_gpu *gpu); +void msm_devfreq_suspend(struct msm_gpu *gpu); +void msm_devfreq_active(struct msm_gpu *gpu); +void msm_devfreq_idle(struct msm_gpu *gpu); int msm_gpu_hw_init(struct msm_gpu *gpu); diff --git a/drivers/gpu/drm/msm/msm_gpu_devfreq.c b/drivers/gpu/drm/msm/msm_gpu_devfreq.c new file mode 100644 index 000000000000..0a1ee20296a2 --- /dev/null +++ b/drivers/gpu/drm/msm/msm_gpu_devfreq.c @@ -0,0 +1,203 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2013 Red Hat + * Author: Rob Clark <robdclark@gmail.com> + */ + +#include "msm_gpu.h" +#include "msm_gpu_trace.h" + +#include <linux/devfreq.h> +#include <linux/devfreq_cooling.h> + +/* + * Power Management: + */ + +static int msm_devfreq_target(struct device *dev, unsigned long *freq, + u32 flags) +{ + struct msm_gpu *gpu = dev_to_gpu(dev); + struct dev_pm_opp *opp; + + opp = devfreq_recommended_opp(dev, freq, flags); + + /* + * If the GPU is idle, devfreq is not aware, so just ignore + * it's requests + */ + if (gpu->devfreq.idle_freq) { + gpu->devfreq.idle_freq = *freq; + return 0; + } + + if (IS_ERR(opp)) + return PTR_ERR(opp); + + trace_msm_gpu_freq_change(dev_pm_opp_get_freq(opp)); + + if (gpu->funcs->gpu_set_freq) + gpu->funcs->gpu_set_freq(gpu, opp); + else + clk_set_rate(gpu->core_clk, *freq); + + dev_pm_opp_put(opp); + + return 0; +} + +static unsigned long get_freq(struct msm_gpu *gpu) +{ + if (gpu->devfreq.idle_freq) + return gpu->devfreq.idle_freq; + + if (gpu->funcs->gpu_get_freq) + return gpu->funcs->gpu_get_freq(gpu); + + return clk_get_rate(gpu->core_clk); +} + +static int msm_devfreq_get_dev_status(struct device *dev, + struct devfreq_dev_status *status) +{ + struct msm_gpu *gpu = dev_to_gpu(dev); + ktime_t time; + + status->current_frequency = get_freq(gpu); + status->busy_time = gpu->funcs->gpu_busy(gpu); + + time = ktime_get(); + status->total_time = ktime_us_delta(time, gpu->devfreq.time); + gpu->devfreq.time = time; + + return 0; +} + +static int msm_devfreq_get_cur_freq(struct device *dev, unsigned long *freq) +{ + *freq = get_freq(dev_to_gpu(dev)); + + return 0; +} + +static struct devfreq_dev_profile msm_devfreq_profile = { + .timer = DEVFREQ_TIMER_DELAYED, + .polling_ms = 50, + .target = msm_devfreq_target, + .get_dev_status = msm_devfreq_get_dev_status, + .get_cur_freq = msm_devfreq_get_cur_freq, +}; + +void msm_devfreq_init(struct msm_gpu *gpu) +{ + /* We need target support to do devfreq */ + if (!gpu->funcs->gpu_busy) + return; + + msm_devfreq_profile.initial_freq = gpu->fast_rate; + + /* + * Don't set the freq_table or max_state and let devfreq build the table + * from OPP + * After a deferred probe, these may have be left to non-zero values, + * so set them back to zero before creating the devfreq device + */ + msm_devfreq_profile.freq_table = NULL; + msm_devfreq_profile.max_state = 0; + + gpu->devfreq.devfreq = devm_devfreq_add_device(&gpu->pdev->dev, + &msm_devfreq_profile, DEVFREQ_GOV_SIMPLE_ONDEMAND, + NULL); + + if (IS_ERR(gpu->devfreq.devfreq)) { + DRM_DEV_ERROR(&gpu->pdev->dev, "Couldn't initialize GPU devfreq\n"); + gpu->devfreq.devfreq = NULL; + return; + } + + devfreq_suspend_device(gpu->devfreq.devfreq); + + gpu->cooling = of_devfreq_cooling_register(gpu->pdev->dev.of_node, + gpu->devfreq.devfreq); + if (IS_ERR(gpu->cooling)) { + DRM_DEV_ERROR(&gpu->pdev->dev, + "Couldn't register GPU cooling device\n"); + gpu->cooling = NULL; + } +} + +void msm_devfreq_cleanup(struct msm_gpu *gpu) +{ + devfreq_cooling_unregister(gpu->cooling); +} + +void msm_devfreq_resume(struct msm_gpu *gpu) +{ + gpu->devfreq.busy_cycles = 0; + gpu->devfreq.time = ktime_get(); + + devfreq_resume_device(gpu->devfreq.devfreq); +} + +void msm_devfreq_suspend(struct msm_gpu *gpu) +{ + devfreq_suspend_device(gpu->devfreq.devfreq); +} + +void msm_devfreq_active(struct msm_gpu *gpu) +{ + struct msm_gpu_devfreq *df = &gpu->devfreq; + struct devfreq_dev_status status; + unsigned int idle_time; + unsigned long target_freq = df->idle_freq; + + /* + * Hold devfreq lock to synchronize with get_dev_status()/ + * target() callbacks + */ + mutex_lock(&df->devfreq->lock); + + idle_time = ktime_to_ms(ktime_sub(ktime_get(), df->idle_time)); + + /* + * If we've been idle for a significant fraction of a polling + * interval, then we won't meet the threshold of busyness for + * the governor to ramp up the freq.. so give some boost + */ + if (idle_time > msm_devfreq_profile.polling_ms/2) { + target_freq *= 2; + } + + df->idle_freq = 0; + + msm_devfreq_target(&gpu->pdev->dev, &target_freq, 0); + + /* + * Reset the polling interval so we aren't inconsistent + * about freq vs busy/total cycles + */ + msm_devfreq_get_dev_status(&gpu->pdev->dev, &status); + + mutex_unlock(&df->devfreq->lock); +} + +void msm_devfreq_idle(struct msm_gpu *gpu) +{ + struct msm_gpu_devfreq *df = &gpu->devfreq; + unsigned long idle_freq, target_freq = 0; + + /* + * Hold devfreq lock to synchronize with get_dev_status()/ + * target() callbacks + */ + mutex_lock(&df->devfreq->lock); + + idle_freq = get_freq(gpu); + + msm_devfreq_target(&gpu->pdev->dev, &target_freq, 0); + + df->idle_time = ktime_get(); + df->idle_freq = idle_freq; + + mutex_unlock(&df->devfreq->lock); +} diff --git a/drivers/gpu/drm/msm/msm_kms.h b/drivers/gpu/drm/msm/msm_kms.h index 086a2d59b8c8..de2bc3467bb5 100644 --- a/drivers/gpu/drm/msm/msm_kms.h +++ b/drivers/gpu/drm/msm/msm_kms.h @@ -117,9 +117,6 @@ struct msm_kms_funcs { struct drm_encoder *encoder, struct drm_encoder *slave_encoder, bool is_cmd_mode); - void (*set_encoder_mode)(struct msm_kms *kms, - struct drm_encoder *encoder, - bool cmd_mode); /* cleanup: */ void (*destroy)(struct msm_kms *kms); @@ -150,7 +147,7 @@ struct msm_kms { const struct msm_kms_funcs *funcs; struct drm_device *dev; - /* irq number to be passed on to drm_irq_install */ + /* irq number to be passed on to msm_irq_install */ int irq; /* mapper-id used to request GEM buffer mapped for scanout: */ diff --git a/drivers/gpu/drm/msm/msm_rd.c b/drivers/gpu/drm/msm/msm_rd.c index 659e5cc4b40a..b55398a34fa4 100644 --- a/drivers/gpu/drm/msm/msm_rd.c +++ b/drivers/gpu/drm/msm/msm_rd.c @@ -325,15 +325,19 @@ static void snapshot_buf(struct msm_rd_state *rd, if (!(submit->bos[idx].flags & MSM_SUBMIT_BO_READ)) return; + msm_gem_lock(&obj->base); buf = msm_gem_get_vaddr_active(&obj->base); if (IS_ERR(buf)) - return; + goto out_unlock; buf += offset; rd_write_section(rd, RD_BUFFER_CONTENTS, buf, size); msm_gem_put_vaddr_locked(&obj->base); + +out_unlock: + msm_gem_unlock(&obj->base); } /* called under struct_mutex */ diff --git a/drivers/gpu/drm/msm/msm_ringbuffer.c b/drivers/gpu/drm/msm/msm_ringbuffer.c index 4d2a2a4abef8..bd54c1412649 100644 --- a/drivers/gpu/drm/msm/msm_ringbuffer.c +++ b/drivers/gpu/drm/msm/msm_ringbuffer.c @@ -7,10 +7,61 @@ #include "msm_ringbuffer.h" #include "msm_gpu.h" +static uint num_hw_submissions = 8; +MODULE_PARM_DESC(num_hw_submissions, "The max # of jobs to write into ringbuffer (default 8)"); +module_param(num_hw_submissions, uint, 0600); + +static struct dma_fence *msm_job_dependency(struct drm_sched_job *job, + struct drm_sched_entity *s_entity) +{ + struct msm_gem_submit *submit = to_msm_submit(job); + + if (!xa_empty(&submit->deps)) + return xa_erase(&submit->deps, submit->last_dep++); + + return NULL; +} + +static struct dma_fence *msm_job_run(struct drm_sched_job *job) +{ + struct msm_gem_submit *submit = to_msm_submit(job); + struct msm_gpu *gpu = submit->gpu; + + submit->hw_fence = msm_fence_alloc(submit->ring->fctx); + + pm_runtime_get_sync(&gpu->pdev->dev); + + /* TODO move submit path over to using a per-ring lock.. */ + mutex_lock(&gpu->dev->struct_mutex); + + msm_gpu_submit(gpu, submit); + + mutex_unlock(&gpu->dev->struct_mutex); + + pm_runtime_put(&gpu->pdev->dev); + + return dma_fence_get(submit->hw_fence); +} + +static void msm_job_free(struct drm_sched_job *job) +{ + struct msm_gem_submit *submit = to_msm_submit(job); + + drm_sched_job_cleanup(job); + msm_gem_submit_put(submit); +} + +const struct drm_sched_backend_ops msm_sched_ops = { + .dependency = msm_job_dependency, + .run_job = msm_job_run, + .free_job = msm_job_free +}; + struct msm_ringbuffer *msm_ringbuffer_new(struct msm_gpu *gpu, int id, void *memptrs, uint64_t memptrs_iova) { struct msm_ringbuffer *ring; + long sched_timeout; char name[32]; int ret; @@ -32,7 +83,7 @@ struct msm_ringbuffer *msm_ringbuffer_new(struct msm_gpu *gpu, int id, if (IS_ERR(ring->start)) { ret = PTR_ERR(ring->start); - ring->start = 0; + ring->start = NULL; goto fail; } @@ -45,13 +96,23 @@ struct msm_ringbuffer *msm_ringbuffer_new(struct msm_gpu *gpu, int id, ring->memptrs = memptrs; ring->memptrs_iova = memptrs_iova; + /* currently managing hangcheck ourselves: */ + sched_timeout = MAX_SCHEDULE_TIMEOUT; + + ret = drm_sched_init(&ring->sched, &msm_sched_ops, + num_hw_submissions, 0, sched_timeout, + NULL, NULL, to_msm_bo(ring->bo)->name); + if (ret) { + goto fail; + } + INIT_LIST_HEAD(&ring->submits); spin_lock_init(&ring->submit_lock); spin_lock_init(&ring->preempt_lock); snprintf(name, sizeof(name), "gpu-ring-%d", ring->id); - ring->fctx = msm_fence_context_alloc(gpu->dev, name); + ring->fctx = msm_fence_context_alloc(gpu->dev, &ring->memptrs->fence, name); return ring; @@ -65,9 +126,11 @@ void msm_ringbuffer_destroy(struct msm_ringbuffer *ring) if (IS_ERR_OR_NULL(ring)) return; + drm_sched_fini(&ring->sched); + msm_fence_context_free(ring->fctx); - msm_gem_kernel_put(ring->bo, ring->gpu->aspace, false); + msm_gem_kernel_put(ring->bo, ring->gpu->aspace); kfree(ring); } diff --git a/drivers/gpu/drm/msm/msm_ringbuffer.h b/drivers/gpu/drm/msm/msm_ringbuffer.h index fe55d4a1aa16..d8c63df4e9ca 100644 --- a/drivers/gpu/drm/msm/msm_ringbuffer.h +++ b/drivers/gpu/drm/msm/msm_ringbuffer.h @@ -7,6 +7,7 @@ #ifndef __MSM_RINGBUFFER_H__ #define __MSM_RINGBUFFER_H__ +#include "drm/gpu_scheduler.h" #include "msm_drv.h" #define rbmemptr(ring, member) \ @@ -41,7 +42,18 @@ struct msm_ringbuffer { uint32_t *start, *end, *cur, *next; /* + * The job scheduler for this ring. + */ + struct drm_gpu_scheduler sched; + + /* * List of in-flight submits on this ring. Protected by submit_lock. + * + * Currently just submits that are already written into the ring, not + * submits that are still in drm_gpu_scheduler's queues. At a later + * step we could probably move to letting drm_gpu_scheduler manage + * hangcheck detection and keep track of submit jobs that are in- + * flight. */ struct list_head submits; spinlock_t submit_lock; diff --git a/drivers/gpu/drm/msm/msm_submitqueue.c b/drivers/gpu/drm/msm/msm_submitqueue.c index c3d206105d28..32a55d81b58b 100644 --- a/drivers/gpu/drm/msm/msm_submitqueue.c +++ b/drivers/gpu/drm/msm/msm_submitqueue.c @@ -12,6 +12,10 @@ void msm_submitqueue_destroy(struct kref *kref) struct msm_gpu_submitqueue *queue = container_of(kref, struct msm_gpu_submitqueue, ref); + idr_destroy(&queue->fence_idr); + + drm_sched_entity_destroy(&queue->entity); + msm_file_private_put(queue->ctx); kfree(queue); @@ -62,10 +66,22 @@ int msm_submitqueue_create(struct drm_device *drm, struct msm_file_private *ctx, { struct msm_drm_private *priv = drm->dev_private; struct msm_gpu_submitqueue *queue; + struct msm_ringbuffer *ring; + struct drm_gpu_scheduler *sched; + enum drm_sched_priority sched_prio; + unsigned ring_nr; + int ret; if (!ctx) return -ENODEV; + if (!priv->gpu) + return -ENODEV; + + ret = msm_gpu_convert_priority(priv->gpu, prio, &ring_nr, &sched_prio); + if (ret) + return ret; + queue = kzalloc(sizeof(*queue), GFP_KERNEL); if (!queue) @@ -73,14 +89,16 @@ int msm_submitqueue_create(struct drm_device *drm, struct msm_file_private *ctx, kref_init(&queue->ref); queue->flags = flags; + queue->ring_nr = ring_nr; - if (priv->gpu) { - if (prio >= priv->gpu->nr_rings) { - kfree(queue); - return -EINVAL; - } + ring = priv->gpu->rb[ring_nr]; + sched = &ring->sched; - queue->prio = prio; + ret = drm_sched_entity_init(&queue->entity, + sched_prio, &sched, 1, NULL); + if (ret) { + kfree(queue); + return ret; } write_lock(&ctx->queuelock); @@ -91,6 +109,9 @@ int msm_submitqueue_create(struct drm_device *drm, struct msm_file_private *ctx, if (id) *id = queue->id; + idr_init(&queue->fence_idr); + mutex_init(&queue->lock); + list_add_tail(&queue->node, &ctx->submitqueues); write_unlock(&ctx->queuelock); @@ -98,20 +119,26 @@ int msm_submitqueue_create(struct drm_device *drm, struct msm_file_private *ctx, return 0; } +/* + * Create the default submit-queue (id==0), used for backwards compatibility + * for userspace that pre-dates the introduction of submitqueues. + */ int msm_submitqueue_init(struct drm_device *drm, struct msm_file_private *ctx) { struct msm_drm_private *priv = drm->dev_private; - int default_prio; + int default_prio, max_priority; - if (!ctx) - return 0; + if (!priv->gpu) + return -ENODEV; + + max_priority = (priv->gpu->nr_rings * NR_SCHED_PRIORITIES) - 1; /* - * Select priority 2 as the "default priority" unless nr_rings is less - * than 2 and then pick the lowest pirority + * Pick a medium priority level as default. Lower numeric value is + * higher priority, so round-up to pick a priority that is not higher + * than the middle priority level. */ - default_prio = priv->gpu ? - clamp_t(uint32_t, 2, 0, priv->gpu->nr_rings - 1) : 0; + default_prio = DIV_ROUND_UP(max_priority, 2); INIT_LIST_HEAD(&ctx->submitqueues); diff --git a/drivers/gpu/drm/mxsfb/mxsfb_drv.c b/drivers/gpu/drm/mxsfb/mxsfb_drv.c index 6da93551e2e5..ec0432fe1bdf 100644 --- a/drivers/gpu/drm/mxsfb/mxsfb_drv.c +++ b/drivers/gpu/drm/mxsfb/mxsfb_drv.c @@ -24,7 +24,6 @@ #include <drm/drm_fourcc.h> #include <drm/drm_gem_cma_helper.h> #include <drm/drm_gem_framebuffer_helper.h> -#include <drm/drm_irq.h> #include <drm/drm_mode_config.h> #include <drm/drm_of.h> #include <drm/drm_probe_helper.h> @@ -51,6 +50,7 @@ static const struct mxsfb_devdata mxsfb_devdata[] = { .hs_wdth_mask = 0xff, .hs_wdth_shift = 24, .has_overlay = false, + .has_ctrl2 = false, }, [MXSFB_V4] = { .transfer_count = LCDC_V4_TRANSFER_COUNT, @@ -59,6 +59,7 @@ static const struct mxsfb_devdata mxsfb_devdata[] = { .hs_wdth_mask = 0x3fff, .hs_wdth_shift = 18, .has_overlay = false, + .has_ctrl2 = true, }, [MXSFB_V6] = { .transfer_count = LCDC_V4_TRANSFER_COUNT, @@ -67,6 +68,7 @@ static const struct mxsfb_devdata mxsfb_devdata[] = { .hs_wdth_mask = 0x3fff, .hs_wdth_shift = 18, .has_overlay = true, + .has_ctrl2 = true, }, }; @@ -150,6 +152,49 @@ static int mxsfb_attach_bridge(struct mxsfb_drm_private *mxsfb) return 0; } +static irqreturn_t mxsfb_irq_handler(int irq, void *data) +{ + struct drm_device *drm = data; + struct mxsfb_drm_private *mxsfb = drm->dev_private; + u32 reg; + + reg = readl(mxsfb->base + LCDC_CTRL1); + + if (reg & CTRL1_CUR_FRAME_DONE_IRQ) + drm_crtc_handle_vblank(&mxsfb->crtc); + + writel(CTRL1_CUR_FRAME_DONE_IRQ, mxsfb->base + LCDC_CTRL1 + REG_CLR); + + return IRQ_HANDLED; +} + +static void mxsfb_irq_disable(struct drm_device *drm) +{ + struct mxsfb_drm_private *mxsfb = drm->dev_private; + + mxsfb_enable_axi_clk(mxsfb); + mxsfb->crtc.funcs->disable_vblank(&mxsfb->crtc); + mxsfb_disable_axi_clk(mxsfb); +} + +static int mxsfb_irq_install(struct drm_device *dev, int irq) +{ + if (irq == IRQ_NOTCONNECTED) + return -ENOTCONN; + + mxsfb_irq_disable(dev); + + return request_irq(irq, mxsfb_irq_handler, 0, dev->driver->name, dev); +} + +static void mxsfb_irq_uninstall(struct drm_device *dev) +{ + struct mxsfb_drm_private *mxsfb = dev->dev_private; + + mxsfb_irq_disable(dev); + free_irq(mxsfb->irq, dev); +} + static int mxsfb_load(struct drm_device *drm, const struct mxsfb_devdata *devdata) { @@ -223,8 +268,13 @@ static int mxsfb_load(struct drm_device *drm, drm_mode_config_reset(drm); + ret = platform_get_irq(pdev, 0); + if (ret < 0) + goto err_vblank; + mxsfb->irq = ret; + pm_runtime_get_sync(drm->dev); - ret = drm_irq_install(drm, platform_get_irq(pdev, 0)); + ret = mxsfb_irq_install(drm, mxsfb->irq); pm_runtime_put_sync(drm->dev); if (ret < 0) { @@ -252,7 +302,7 @@ static void mxsfb_unload(struct drm_device *drm) drm_mode_config_cleanup(drm); pm_runtime_get_sync(drm->dev); - drm_irq_uninstall(drm); + mxsfb_irq_uninstall(drm); pm_runtime_put_sync(drm->dev); drm->dev_private = NULL; @@ -260,38 +310,10 @@ static void mxsfb_unload(struct drm_device *drm) pm_runtime_disable(drm->dev); } -static void mxsfb_irq_disable(struct drm_device *drm) -{ - struct mxsfb_drm_private *mxsfb = drm->dev_private; - - mxsfb_enable_axi_clk(mxsfb); - mxsfb->crtc.funcs->disable_vblank(&mxsfb->crtc); - mxsfb_disable_axi_clk(mxsfb); -} - -static irqreturn_t mxsfb_irq_handler(int irq, void *data) -{ - struct drm_device *drm = data; - struct mxsfb_drm_private *mxsfb = drm->dev_private; - u32 reg; - - reg = readl(mxsfb->base + LCDC_CTRL1); - - if (reg & CTRL1_CUR_FRAME_DONE_IRQ) - drm_crtc_handle_vblank(&mxsfb->crtc); - - writel(CTRL1_CUR_FRAME_DONE_IRQ, mxsfb->base + LCDC_CTRL1 + REG_CLR); - - return IRQ_HANDLED; -} - DEFINE_DRM_GEM_CMA_FOPS(fops); static const struct drm_driver mxsfb_driver = { .driver_features = DRIVER_GEM | DRIVER_MODESET | DRIVER_ATOMIC, - .irq_handler = mxsfb_irq_handler, - .irq_preinstall = mxsfb_irq_disable, - .irq_uninstall = mxsfb_irq_disable, DRM_GEM_CMA_DRIVER_OPS, .fops = &fops, .name = "mxsfb-drm", diff --git a/drivers/gpu/drm/mxsfb/mxsfb_drv.h b/drivers/gpu/drm/mxsfb/mxsfb_drv.h index 399d23e91ed1..ddb5b0417a82 100644 --- a/drivers/gpu/drm/mxsfb/mxsfb_drv.h +++ b/drivers/gpu/drm/mxsfb/mxsfb_drv.h @@ -22,6 +22,7 @@ struct mxsfb_devdata { unsigned int hs_wdth_mask; unsigned int hs_wdth_shift; bool has_overlay; + bool has_ctrl2; }; struct mxsfb_drm_private { @@ -32,6 +33,8 @@ struct mxsfb_drm_private { struct clk *clk_axi; struct clk *clk_disp_axi; + unsigned int irq; + struct drm_device *drm; struct { struct drm_plane primary; diff --git a/drivers/gpu/drm/mxsfb/mxsfb_kms.c b/drivers/gpu/drm/mxsfb/mxsfb_kms.c index 300e7bab0f43..89dd618d78f3 100644 --- a/drivers/gpu/drm/mxsfb/mxsfb_kms.c +++ b/drivers/gpu/drm/mxsfb/mxsfb_kms.c @@ -47,16 +47,13 @@ static u32 set_hsync_pulse_width(struct mxsfb_drm_private *mxsfb, u32 val) * Setup the MXSFB registers for decoding the pixels out of the framebuffer and * outputting them on the bus. */ -static void mxsfb_set_formats(struct mxsfb_drm_private *mxsfb) +static void mxsfb_set_formats(struct mxsfb_drm_private *mxsfb, + const u32 bus_format) { struct drm_device *drm = mxsfb->drm; const u32 format = mxsfb->crtc.primary->state->fb->format->format; - u32 bus_format = MEDIA_BUS_FMT_RGB888_1X24; u32 ctrl, ctrl1; - if (mxsfb->connector->display_info.num_bus_formats) - bus_format = mxsfb->connector->display_info.bus_formats[0]; - DRM_DEV_DEBUG_DRIVER(drm->dev, "Using bus_format: 0x%08X\n", bus_format); @@ -107,6 +104,14 @@ static void mxsfb_enable_controller(struct mxsfb_drm_private *mxsfb) clk_prepare_enable(mxsfb->clk_disp_axi); clk_prepare_enable(mxsfb->clk); + /* Increase number of outstanding requests on all supported IPs */ + if (mxsfb->devdata->has_ctrl2) { + reg = readl(mxsfb->base + LCDC_V4_CTRL2); + reg &= ~CTRL2_SET_OUTSTANDING_REQS_MASK; + reg |= CTRL2_SET_OUTSTANDING_REQS_16; + writel(reg, mxsfb->base + LCDC_V4_CTRL2); + } + /* If it was disabled, re-enable the mode again */ writel(CTRL_DOTCLK_MODE, mxsfb->base + LCDC_CTRL + REG_SET); @@ -115,6 +120,35 @@ static void mxsfb_enable_controller(struct mxsfb_drm_private *mxsfb) reg |= VDCTRL4_SYNC_SIGNALS_ON; writel(reg, mxsfb->base + LCDC_VDCTRL4); + /* + * Enable recovery on underflow. + * + * There is some sort of corner case behavior of the controller, + * which could rarely be triggered at least on i.MX6SX connected + * to 800x480 DPI panel and i.MX8MM connected to DPI->DSI->LVDS + * bridged 1920x1080 panel (and likely on other setups too), where + * the image on the panel shifts to the right and wraps around. + * This happens either when the controller is enabled on boot or + * even later during run time. The condition does not correct + * itself automatically, i.e. the display image remains shifted. + * + * It seems this problem is known and is due to sporadic underflows + * of the LCDIF FIFO. While the LCDIF IP does have underflow/overflow + * IRQs, neither of the IRQs trigger and neither IRQ status bit is + * asserted when this condition occurs. + * + * All known revisions of the LCDIF IP have CTRL1 RECOVER_ON_UNDERFLOW + * bit, which is described in the reference manual since i.MX23 as + * " + * Set this bit to enable the LCDIF block to recover in the next + * field/frame if there was an underflow in the current field/frame. + * " + * Enable this bit to mitigate the sporadic underflows. + */ + reg = readl(mxsfb->base + LCDC_CTRL1); + reg |= CTRL1_RECOVER_ON_UNDERFLOW; + writel(reg, mxsfb->base + LCDC_CTRL1); + writel(CTRL_RUN, mxsfb->base + LCDC_CTRL + REG_SET); } @@ -185,7 +219,8 @@ static dma_addr_t mxsfb_get_fb_paddr(struct drm_plane *plane) return gem->paddr; } -static void mxsfb_crtc_mode_set_nofb(struct mxsfb_drm_private *mxsfb) +static void mxsfb_crtc_mode_set_nofb(struct mxsfb_drm_private *mxsfb, + const u32 bus_format) { struct drm_device *drm = mxsfb->crtc.dev; struct drm_display_mode *m = &mxsfb->crtc.state->adjusted_mode; @@ -206,11 +241,14 @@ static void mxsfb_crtc_mode_set_nofb(struct mxsfb_drm_private *mxsfb) /* Clear the FIFOs */ writel(CTRL1_FIFO_CLEAR, mxsfb->base + LCDC_CTRL1 + REG_SET); + readl(mxsfb->base + LCDC_CTRL1); + writel(CTRL1_FIFO_CLEAR, mxsfb->base + LCDC_CTRL1 + REG_CLR); + readl(mxsfb->base + LCDC_CTRL1); if (mxsfb->devdata->has_overlay) writel(0, mxsfb->base + LCDC_AS_CTRL); - mxsfb_set_formats(mxsfb); + mxsfb_set_formats(mxsfb, bus_format); clk_set_rate(mxsfb->clk, m->crtc_clock * 1000); @@ -308,7 +346,9 @@ static void mxsfb_crtc_atomic_enable(struct drm_crtc *crtc, struct drm_atomic_state *state) { struct mxsfb_drm_private *mxsfb = to_mxsfb_drm_private(crtc->dev); + struct drm_bridge_state *bridge_state; struct drm_device *drm = mxsfb->drm; + u32 bus_format = 0; dma_addr_t paddr; pm_runtime_get_sync(drm->dev); @@ -316,7 +356,23 @@ static void mxsfb_crtc_atomic_enable(struct drm_crtc *crtc, drm_crtc_vblank_on(crtc); - mxsfb_crtc_mode_set_nofb(mxsfb); + /* If there is a bridge attached to the LCDIF, use its bus format */ + if (mxsfb->bridge) { + bridge_state = + drm_atomic_get_new_bridge_state(state, + mxsfb->bridge); + bus_format = bridge_state->input_bus_cfg.format; + } + + /* If there is no bridge, use bus format from connector */ + if (!bus_format && mxsfb->connector->display_info.num_bus_formats) + bus_format = mxsfb->connector->display_info.bus_formats[0]; + + /* If all else fails, default to RGB888_1X24 */ + if (!bus_format) + bus_format = MEDIA_BUS_FMT_RGB888_1X24; + + mxsfb_crtc_mode_set_nofb(mxsfb, bus_format); /* Write cur_buf as well to avoid an initial corrupt frame */ paddr = mxsfb_get_fb_paddr(crtc->primary); @@ -500,13 +556,11 @@ static bool mxsfb_format_mod_supported(struct drm_plane *plane, } static const struct drm_plane_helper_funcs mxsfb_plane_primary_helper_funcs = { - .prepare_fb = drm_gem_plane_helper_prepare_fb, .atomic_check = mxsfb_plane_atomic_check, .atomic_update = mxsfb_plane_primary_atomic_update, }; static const struct drm_plane_helper_funcs mxsfb_plane_overlay_helper_funcs = { - .prepare_fb = drm_gem_plane_helper_prepare_fb, .atomic_check = mxsfb_plane_atomic_check, .atomic_update = mxsfb_plane_overlay_atomic_update, }; diff --git a/drivers/gpu/drm/mxsfb/mxsfb_regs.h b/drivers/gpu/drm/mxsfb/mxsfb_regs.h index 55d28a27f912..694fea13e893 100644 --- a/drivers/gpu/drm/mxsfb/mxsfb_regs.h +++ b/drivers/gpu/drm/mxsfb/mxsfb_regs.h @@ -15,6 +15,7 @@ #define LCDC_CTRL 0x00 #define LCDC_CTRL1 0x10 #define LCDC_V3_TRANSFER_COUNT 0x20 +#define LCDC_V4_CTRL2 0x20 #define LCDC_V4_TRANSFER_COUNT 0x30 #define LCDC_V4_CUR_BUF 0x40 #define LCDC_V4_NEXT_BUF 0x50 @@ -54,12 +55,20 @@ #define CTRL_DF24 BIT(1) #define CTRL_RUN BIT(0) +#define CTRL1_RECOVER_ON_UNDERFLOW BIT(24) #define CTRL1_FIFO_CLEAR BIT(21) #define CTRL1_SET_BYTE_PACKAGING(x) (((x) & 0xf) << 16) #define CTRL1_GET_BYTE_PACKAGING(x) (((x) >> 16) & 0xf) #define CTRL1_CUR_FRAME_DONE_IRQ_EN BIT(13) #define CTRL1_CUR_FRAME_DONE_IRQ BIT(9) +#define CTRL2_SET_OUTSTANDING_REQS_1 0 +#define CTRL2_SET_OUTSTANDING_REQS_2 (0x1 << 21) +#define CTRL2_SET_OUTSTANDING_REQS_4 (0x2 << 21) +#define CTRL2_SET_OUTSTANDING_REQS_8 (0x3 << 21) +#define CTRL2_SET_OUTSTANDING_REQS_16 (0x4 << 21) +#define CTRL2_SET_OUTSTANDING_REQS_MASK (0x7 << 21) + #define TRANSFER_COUNT_SET_VCOUNT(x) (((x) & 0xffff) << 16) #define TRANSFER_COUNT_GET_VCOUNT(x) (((x) >> 16) & 0xffff) #define TRANSFER_COUNT_SET_HCOUNT(x) ((x) & 0xffff) diff --git a/drivers/gpu/drm/nouveau/dispnv50/disp.c b/drivers/gpu/drm/nouveau/dispnv50/disp.c index bcb0310a41b6..d7b9f7f8c9e3 100644 --- a/drivers/gpu/drm/nouveau/dispnv50/disp.c +++ b/drivers/gpu/drm/nouveau/dispnv50/disp.c @@ -31,6 +31,7 @@ #include <linux/dma-mapping.h> #include <linux/hdmi.h> #include <linux/component.h> +#include <linux/iopoll.h> #include <drm/drm_atomic.h> #include <drm/drm_atomic_helper.h> @@ -1649,17 +1650,36 @@ nv50_sor_update(struct nouveau_encoder *nv_encoder, u8 head, core->func->sor->ctrl(core, nv_encoder->or, nv_encoder->ctrl, asyh); } +/* TODO: Should we extend this to PWM-only backlights? + * As well, should we add a DRM helper for waiting for the backlight to acknowledge + * the panel backlight has been shut off? Intel doesn't seem to do this, and uses a + * fixed time delay from the vbios… + */ static void nv50_sor_atomic_disable(struct drm_encoder *encoder, struct drm_atomic_state *state) { struct nouveau_encoder *nv_encoder = nouveau_encoder(encoder); struct nouveau_crtc *nv_crtc = nouveau_crtc(nv_encoder->crtc); struct nouveau_connector *nv_connector = nv50_outp_get_old_connector(state, nv_encoder); +#ifdef CONFIG_DRM_NOUVEAU_BACKLIGHT + struct nouveau_drm *drm = nouveau_drm(nv_encoder->base.base.dev); + struct nouveau_backlight *backlight = nv_connector->backlight; +#endif struct drm_dp_aux *aux = &nv_connector->aux; + int ret; u8 pwr; +#ifdef CONFIG_DRM_NOUVEAU_BACKLIGHT + if (backlight && backlight->uses_dpcd) { + ret = drm_edp_backlight_disable(aux, &backlight->edp_info); + if (ret < 0) + NV_ERROR(drm, "Failed to disable backlight on [CONNECTOR:%d:%s]: %d\n", + nv_connector->base.base.id, nv_connector->base.name, ret); + } +#endif + if (nv_encoder->dcb->type == DCB_OUTPUT_DP) { - int ret = drm_dp_dpcd_readb(aux, DP_SET_POWER, &pwr); + ret = drm_dp_dpcd_readb(aux, DP_SET_POWER, &pwr); if (ret == 0) { pwr &= ~DP_SET_POWER_MASK; @@ -1696,6 +1716,9 @@ nv50_sor_atomic_enable(struct drm_encoder *encoder, struct drm_atomic_state *sta struct drm_device *dev = encoder->dev; struct nouveau_drm *drm = nouveau_drm(dev); struct nouveau_connector *nv_connector; +#ifdef CONFIG_DRM_NOUVEAU_BACKLIGHT + struct nouveau_backlight *backlight; +#endif struct nvbios *bios = &drm->vbios; bool hda = false; u8 proto = NV507D_SOR_SET_CONTROL_PROTOCOL_CUSTOM; @@ -1770,6 +1793,14 @@ nv50_sor_atomic_enable(struct drm_encoder *encoder, struct drm_atomic_state *sta proto = NV887D_SOR_SET_CONTROL_PROTOCOL_DP_B; nv50_audio_enable(encoder, nv_crtc, nv_connector, state, mode); + +#ifdef CONFIG_DRM_NOUVEAU_BACKLIGHT + backlight = nv_connector->backlight; + if (backlight && backlight->uses_dpcd) + drm_edp_backlight_enable(&nv_connector->aux, &backlight->edp_info, + (u16)backlight->dev->props.brightness); +#endif + break; default: BUG(); @@ -2322,6 +2353,7 @@ nv50_disp_atomic_commit_tail(struct drm_atomic_state *state) nv50_crc_atomic_start_reporting(state); if (!flushed) nv50_crc_atomic_release_notifier_contexts(state); + drm_atomic_helper_commit_hw_done(state); drm_atomic_helper_cleanup_planes(dev, state); drm_atomic_helper_commit_cleanup_done(state); diff --git a/drivers/gpu/drm/nouveau/nouveau_backlight.c b/drivers/gpu/drm/nouveau/nouveau_backlight.c index 72f35a2babcb..1cbd71abc80a 100644 --- a/drivers/gpu/drm/nouveau/nouveau_backlight.c +++ b/drivers/gpu/drm/nouveau/nouveau_backlight.c @@ -42,11 +42,6 @@ static struct ida bl_ida; #define BL_NAME_SIZE 15 // 12 for name + 2 for digits + 1 for '\0' -struct nouveau_backlight { - struct backlight_device *dev; - int id; -}; - static bool nouveau_get_backlight_name(char backlight_name[BL_NAME_SIZE], struct nouveau_backlight *bl) @@ -148,6 +143,98 @@ static const struct backlight_ops nv50_bl_ops = { .update_status = nv50_set_intensity, }; +/* + * eDP brightness callbacks need to happen under lock, since we need to + * enable/disable the backlight ourselves for modesets + */ +static int +nv50_edp_get_brightness(struct backlight_device *bd) +{ + struct drm_connector *connector = dev_get_drvdata(bd->dev.parent); + struct drm_device *dev = connector->dev; + struct drm_crtc *crtc; + struct drm_modeset_acquire_ctx ctx; + int ret = 0; + + drm_modeset_acquire_init(&ctx, 0); + +retry: + ret = drm_modeset_lock(&dev->mode_config.connection_mutex, &ctx); + if (ret == -EDEADLK) + goto deadlock; + else if (ret < 0) + goto out; + + crtc = connector->state->crtc; + if (!crtc) + goto out; + + ret = drm_modeset_lock(&crtc->mutex, &ctx); + if (ret == -EDEADLK) + goto deadlock; + else if (ret < 0) + goto out; + + if (!crtc->state->active) + goto out; + + ret = bd->props.brightness; +out: + drm_modeset_drop_locks(&ctx); + drm_modeset_acquire_fini(&ctx); + return ret; +deadlock: + drm_modeset_backoff(&ctx); + goto retry; +} + +static int +nv50_edp_set_brightness(struct backlight_device *bd) +{ + struct drm_connector *connector = dev_get_drvdata(bd->dev.parent); + struct nouveau_connector *nv_connector = nouveau_connector(connector); + struct drm_device *dev = connector->dev; + struct drm_crtc *crtc; + struct drm_dp_aux *aux = &nv_connector->aux; + struct nouveau_backlight *nv_bl = nv_connector->backlight; + struct drm_modeset_acquire_ctx ctx; + int ret = 0; + + drm_modeset_acquire_init(&ctx, 0); +retry: + ret = drm_modeset_lock(&dev->mode_config.connection_mutex, &ctx); + if (ret == -EDEADLK) + goto deadlock; + else if (ret < 0) + goto out; + + crtc = connector->state->crtc; + if (!crtc) + goto out; + + ret = drm_modeset_lock(&crtc->mutex, &ctx); + if (ret == -EDEADLK) + goto deadlock; + else if (ret < 0) + goto out; + + if (crtc->state->active) + ret = drm_edp_backlight_set_level(aux, &nv_bl->edp_info, bd->props.brightness); + +out: + drm_modeset_drop_locks(&ctx); + drm_modeset_acquire_fini(&ctx); + return ret; +deadlock: + drm_modeset_backoff(&ctx); + goto retry; +} + +static const struct backlight_ops nv50_edp_bl_ops = { + .get_brightness = nv50_edp_get_brightness, + .update_status = nv50_edp_set_brightness, +}; + static int nva3_get_intensity(struct backlight_device *bd) { @@ -194,8 +281,13 @@ static const struct backlight_ops nva3_bl_ops = { .update_status = nva3_set_intensity, }; +/* FIXME: perform backlight probing for eDP _before_ this, this only gets called after connector + * registration which happens after the initial modeset + */ static int -nv50_backlight_init(struct nouveau_encoder *nv_encoder, +nv50_backlight_init(struct nouveau_backlight *bl, + struct nouveau_connector *nv_conn, + struct nouveau_encoder *nv_encoder, struct backlight_properties *props, const struct backlight_ops **ops) { @@ -205,6 +297,41 @@ nv50_backlight_init(struct nouveau_encoder *nv_encoder, if (!nvif_rd32(device, NV50_PDISP_SOR_PWM_CTL(ffs(nv_encoder->dcb->or) - 1))) return -ENODEV; + if (nv_conn->type == DCB_CONNECTOR_eDP) { + int ret; + u16 current_level; + u8 edp_dpcd[EDP_DISPLAY_CTL_CAP_SIZE]; + u8 current_mode; + + ret = drm_dp_dpcd_read(&nv_conn->aux, DP_EDP_DPCD_REV, edp_dpcd, + EDP_DISPLAY_CTL_CAP_SIZE); + if (ret < 0) + return ret; + + if (drm_edp_backlight_supported(edp_dpcd)) { + NV_DEBUG(drm, "DPCD backlight controls supported on %s\n", + nv_conn->base.name); + + ret = drm_edp_backlight_init(&nv_conn->aux, &bl->edp_info, 0, edp_dpcd, + ¤t_level, ¤t_mode); + if (ret < 0) + return ret; + + ret = drm_edp_backlight_enable(&nv_conn->aux, &bl->edp_info, current_level); + if (ret < 0) { + NV_ERROR(drm, "Failed to enable backlight on %s: %d\n", + nv_conn->base.name, ret); + return ret; + } + + *ops = &nv50_edp_bl_ops; + props->brightness = current_level; + props->max_brightness = bl->edp_info.max; + bl->uses_dpcd = true; + return 0; + } + } + if (drm->client.device.info.chipset <= 0xa0 || drm->client.device.info.chipset == 0xaa || drm->client.device.info.chipset == 0xac) @@ -245,6 +372,10 @@ nouveau_backlight_init(struct drm_connector *connector) if (!nv_encoder) return 0; + bl = kzalloc(sizeof(*bl), GFP_KERNEL); + if (!bl) + return -ENOMEM; + switch (device->info.family) { case NV_DEVICE_INFO_V0_CURIE: ret = nv40_backlight_init(nv_encoder, &props, &ops); @@ -257,20 +388,19 @@ nouveau_backlight_init(struct drm_connector *connector) case NV_DEVICE_INFO_V0_VOLTA: case NV_DEVICE_INFO_V0_TURING: case NV_DEVICE_INFO_V0_AMPERE: //XXX: not confirmed - ret = nv50_backlight_init(nv_encoder, &props, &ops); + ret = nv50_backlight_init(bl, nouveau_connector(connector), + nv_encoder, &props, &ops); break; default: - return 0; + ret = 0; + goto fail_alloc; } - if (ret == -ENODEV) - return 0; - else if (ret) - return ret; - - bl = kzalloc(sizeof(*bl), GFP_KERNEL); - if (!bl) - return -ENOMEM; + if (ret) { + if (ret == -ENODEV) + ret = 0; + goto fail_alloc; + } if (!nouveau_get_backlight_name(backlight_name, bl)) { NV_ERROR(drm, "Failed to retrieve a unique name for the backlight interface\n"); @@ -287,7 +417,9 @@ nouveau_backlight_init(struct drm_connector *connector) } nouveau_connector(connector)->backlight = bl; - bl->dev->props.brightness = bl->dev->ops->get_brightness(bl->dev); + if (!bl->dev->props.brightness) + bl->dev->props.brightness = + bl->dev->ops->get_brightness(bl->dev); backlight_update_status(bl->dev); return 0; diff --git a/drivers/gpu/drm/nouveau/nouveau_connector.h b/drivers/gpu/drm/nouveau/nouveau_connector.h index d0b859c4a80e..40f90e353540 100644 --- a/drivers/gpu/drm/nouveau/nouveau_connector.h +++ b/drivers/gpu/drm/nouveau/nouveau_connector.h @@ -46,7 +46,14 @@ struct nvkm_i2c_port; struct dcb_output; #ifdef CONFIG_DRM_NOUVEAU_BACKLIGHT -struct nouveau_backlight; +struct nouveau_backlight { + struct backlight_device *dev; + + struct drm_edp_backlight_info edp_info; + bool uses_dpcd : 1; + + int id; +}; #endif #define nouveau_conn_atom(p) \ diff --git a/drivers/gpu/drm/nouveau/nouveau_drm.c b/drivers/gpu/drm/nouveau/nouveau_drm.c index ba4cd5f83725..1f828c9f691c 100644 --- a/drivers/gpu/drm/nouveau/nouveau_drm.c +++ b/drivers/gpu/drm/nouveau/nouveau_drm.c @@ -554,8 +554,6 @@ nouveau_drm_device_init(struct drm_device *dev) if (ret) goto fail_master; - dev->irq_enabled = true; - nvxx_client(&drm->client.base)->debug = nvkm_dbgopt(nouveau_debug, "DRM"); @@ -739,7 +737,7 @@ static int nouveau_drm_probe(struct pci_dev *pdev, nvkm_device_del(&device); /* Remove conflicting drivers (vesafb, efifb etc). */ - ret = drm_aperture_remove_conflicting_pci_framebuffers(pdev, "nouveaufb"); + ret = drm_aperture_remove_conflicting_pci_framebuffers(pdev, &driver_pci); if (ret) return ret; @@ -796,7 +794,6 @@ nouveau_drm_device_remove(struct drm_device *dev) drm_dev_unregister(dev); - dev->irq_enabled = false; client = nvxx_client(&drm->client.base); device = nvkm_device_find(client->device); diff --git a/drivers/gpu/drm/nouveau/nouveau_encoder.h b/drivers/gpu/drm/nouveau/nouveau_encoder.h index 1ffcc0a491fd..77c2fed76e8b 100644 --- a/drivers/gpu/drm/nouveau/nouveau_encoder.h +++ b/drivers/gpu/drm/nouveau/nouveau_encoder.h @@ -30,6 +30,7 @@ #include <subdev/bios/dcb.h> #include <drm/drm_encoder_slave.h> +#include <drm/drm_dp_helper.h> #include <drm/drm_dp_mst_helper.h> #include "dispnv04/disp.h" struct nv50_head_atom; diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.c b/drivers/gpu/drm/nouveau/nouveau_fence.c index 6b43918035df..05d0b3eb3690 100644 --- a/drivers/gpu/drm/nouveau/nouveau_fence.c +++ b/drivers/gpu/drm/nouveau/nouveau_fence.c @@ -358,7 +358,7 @@ nouveau_fence_sync(struct nouveau_bo *nvbo, struct nouveau_channel *chan, bool e fobj = dma_resv_shared_list(resv); fence = dma_resv_excl_fence(resv); - if (fence && (!exclusive || !fobj || !fobj->shared_count)) { + if (fence) { struct nouveau_channel *prev = NULL; bool must_wait = true; diff --git a/drivers/gpu/drm/nouveau/nouveau_vga.c b/drivers/gpu/drm/nouveau/nouveau_vga.c index 7c4b374b3eca..60cd8c0463df 100644 --- a/drivers/gpu/drm/nouveau/nouveau_vga.c +++ b/drivers/gpu/drm/nouveau/nouveau_vga.c @@ -11,9 +11,9 @@ #include "nouveau_vga.h" static unsigned int -nouveau_vga_set_decode(void *priv, bool state) +nouveau_vga_set_decode(struct pci_dev *pdev, bool state) { - struct nouveau_drm *drm = nouveau_drm(priv); + struct nouveau_drm *drm = nouveau_drm(pci_get_drvdata(pdev)); struct nvif_object *device = &drm->client.device.object; if (drm->client.device.info.family == NV_DEVICE_INFO_V0_CURIE && @@ -94,7 +94,7 @@ nouveau_vga_init(struct nouveau_drm *drm) return; pdev = to_pci_dev(dev->dev); - vga_client_register(pdev, dev, NULL, nouveau_vga_set_decode); + vga_client_register(pdev, nouveau_vga_set_decode); /* don't register Thunderbolt eGPU with vga_switcheroo */ if (pci_is_thunderbolt_attached(pdev)) @@ -118,7 +118,7 @@ nouveau_vga_fini(struct nouveau_drm *drm) return; pdev = to_pci_dev(dev->dev); - vga_client_register(pdev, NULL, NULL, NULL); + vga_client_unregister(pdev); if (pci_is_thunderbolt_attached(pdev)) return; diff --git a/drivers/gpu/drm/omapdrm/omap_drv.c b/drivers/gpu/drm/omapdrm/omap_drv.c index 8632139e0f01..f86e20578143 100644 --- a/drivers/gpu/drm/omapdrm/omap_drv.c +++ b/drivers/gpu/drm/omapdrm/omap_drv.c @@ -290,12 +290,8 @@ static int omap_modeset_init(struct drm_device *dev) ret = drm_bridge_attach(pipe->encoder, pipe->output->bridge, NULL, DRM_BRIDGE_ATTACH_NO_CONNECTOR); - if (ret < 0) { - dev_err(priv->dev, - "unable to attach bridge %pOF\n", - pipe->output->bridge->of_node); + if (ret < 0) return ret; - } } id = omap_display_id(pipe->output); diff --git a/drivers/gpu/drm/omapdrm/omap_drv.h b/drivers/gpu/drm/omapdrm/omap_drv.h index d6f136984da9..591d4c273f02 100644 --- a/drivers/gpu/drm/omapdrm/omap_drv.h +++ b/drivers/gpu/drm/omapdrm/omap_drv.h @@ -48,6 +48,8 @@ struct omap_drm_private { struct dss_device *dss; struct dispc_device *dispc; + bool irq_enabled; + unsigned int num_pipes; struct omap_drm_pipeline pipes[8]; struct omap_drm_pipeline *channels[8]; diff --git a/drivers/gpu/drm/omapdrm/omap_irq.c b/drivers/gpu/drm/omapdrm/omap_irq.c index 15148d4b35b5..4aca14dab927 100644 --- a/drivers/gpu/drm/omapdrm/omap_irq.c +++ b/drivers/gpu/drm/omapdrm/omap_irq.c @@ -253,13 +253,6 @@ static const u32 omap_underflow_irqs[] = { [OMAP_DSS_VIDEO3] = DISPC_IRQ_VID3_FIFO_UNDERFLOW, }; -/* - * We need a special version, instead of just using drm_irq_install(), - * because we need to register the irq via omapdss. Once omapdss and - * omapdrm are merged together we can assign the dispc hwmod data to - * ourselves and drop these and just use drm_irq_{install,uninstall}() - */ - int omap_drm_irq_install(struct drm_device *dev) { struct omap_drm_private *priv = dev->dev_private; @@ -291,7 +284,7 @@ int omap_drm_irq_install(struct drm_device *dev) if (ret < 0) return ret; - dev->irq_enabled = true; + priv->irq_enabled = true; return 0; } @@ -300,10 +293,10 @@ void omap_drm_irq_uninstall(struct drm_device *dev) { struct omap_drm_private *priv = dev->dev_private; - if (!dev->irq_enabled) + if (!priv->irq_enabled) return; - dev->irq_enabled = false; + priv->irq_enabled = false; dispc_free_irq(priv->dispc, dev); } diff --git a/drivers/gpu/drm/omapdrm/omap_plane.c b/drivers/gpu/drm/omapdrm/omap_plane.c index 801da917507d..512af976b7e9 100644 --- a/drivers/gpu/drm/omapdrm/omap_plane.c +++ b/drivers/gpu/drm/omapdrm/omap_plane.c @@ -6,6 +6,7 @@ #include <drm/drm_atomic.h> #include <drm/drm_atomic_helper.h> +#include <drm/drm_gem_atomic_helper.h> #include <drm/drm_plane_helper.h> #include "omap_dmm_tiler.h" @@ -29,6 +30,8 @@ static int omap_plane_prepare_fb(struct drm_plane *plane, if (!new_state->fb) return 0; + drm_gem_plane_helper_prepare_fb(plane, new_state); + return omap_framebuffer_pin(new_state->fb); } diff --git a/drivers/gpu/drm/panel/Kconfig b/drivers/gpu/drm/panel/Kconfig index ef87d92cdf49..beb581b96ecd 100644 --- a/drivers/gpu/drm/panel/Kconfig +++ b/drivers/gpu/drm/panel/Kconfig @@ -82,6 +82,7 @@ config DRM_PANEL_SIMPLE depends on BACKLIGHT_CLASS_DEVICE depends on PM select VIDEOMODE_HELPERS + select DRM_DP_AUX_BUS help DRM panel driver for dumb panels that need at most a regulator and a GPIO to be powered up. Optionally a backlight can be attached so @@ -124,6 +125,18 @@ config DRM_PANEL_ILITEK_IL9322 Say Y here if you want to enable support for Ilitek IL9322 QVGA (320x240) RGB, YUV and ITU-T BT.656 panels. +config DRM_PANEL_ILITEK_ILI9341 + tristate "Ilitek ILI9341 240x320 QVGA panels" + depends on OF && SPI + depends on DRM_KMS_HELPER + depends on DRM_KMS_CMA_HELPER + depends on BACKLIGHT_CLASS_DEVICE + select DRM_MIPI_DBI + help + Say Y here if you want to enable support for Ilitek IL9341 + QVGA (240x320) RGB panels. support serial & parallel rgb + interface. + config DRM_PANEL_ILITEK_ILI9881C tristate "Ilitek ILI9881C-based panels" depends on OF @@ -133,6 +146,15 @@ config DRM_PANEL_ILITEK_ILI9881C Say Y if you want to enable support for panels based on the Ilitek ILI9881c controller. +config DRM_PANEL_INNOLUX_EJ030NA + tristate "Innolux EJ030NA 320x480 LCD panel" + depends on OF && SPI + select REGMAP_SPI + help + Say Y here to enable support for the Innolux/Chimei EJ030NA + 320x480 3.0" panel as found in the RS97 V2.1, RG300(non-ips) + and LDK handheld gaming consoles. + config DRM_PANEL_INNOLUX_P079ZCA tristate "Innolux P079ZCA panel" depends on OF @@ -343,6 +365,27 @@ config DRM_PANEL_RONBO_RB070D30 Say Y here if you want to enable support for Ronbo Electronics RB070D30 1024x600 DSI panel. +config DRM_PANEL_SAMSUNG_ATNA33XC20 + tristate "Samsung ATNA33XC20 eDP panel" + depends on OF + depends on BACKLIGHT_CLASS_DEVICE + depends on PM + select DRM_DP_AUX_BUS + help + DRM panel driver for the Samsung ATNA33XC20 panel. This panel can't + be handled by the DRM_PANEL_SIMPLE driver because its power + sequencing is non-standard. + +config DRM_PANEL_SAMSUNG_DB7430 + tristate "Samsung DB7430-based DPI panels" + depends on OF && SPI && GPIOLIB + depends on BACKLIGHT_CLASS_DEVICE + select DRM_MIPI_DBI + help + Say Y here if you want to enable support for the Samsung + DB7430 DPI display controller used in such devices as the + LMS397KF04 480x800 DPI panel. + config DRM_PANEL_SAMSUNG_S6D16D0 tristate "Samsung S6D16D0 DSI video mode panel" depends on OF @@ -377,6 +420,7 @@ config DRM_PANEL_SAMSUNG_S6E63M0_SPI depends on SPI depends on DRM_PANEL_SAMSUNG_S6E63M0 default DRM_PANEL_SAMSUNG_S6E63M0 + select DRM_MIPI_DBI help Say Y here if you want to be able to access the Samsung S6E63M0 panel using SPI. @@ -553,6 +597,16 @@ config DRM_PANEL_VISIONOX_RM69299 Say Y here if you want to enable support for Visionox RM69299 DSI Video Mode panel. +config DRM_PANEL_WIDECHIPS_WS2401 + tristate "Widechips WS2401 DPI panel driver" + depends on SPI && GPIOLIB + depends on BACKLIGHT_CLASS_DEVICE + select DRM_MIPI_DBI + help + Say Y here if you want to enable support for the Widechips WS2401 DPI + 480x800 display controller used in panels such as Samsung LMS380KF01. + This display is used in the Samsung Galaxy Ace 2 GT-I8160 (Codina). + config DRM_PANEL_XINPENG_XPP055C272 tristate "Xinpeng XPP055C272 panel driver" depends on OF diff --git a/drivers/gpu/drm/panel/Makefile b/drivers/gpu/drm/panel/Makefile index cae4d976c069..c8132050bcec 100644 --- a/drivers/gpu/drm/panel/Makefile +++ b/drivers/gpu/drm/panel/Makefile @@ -11,7 +11,9 @@ obj-$(CONFIG_DRM_PANEL_ELIDA_KD35T133) += panel-elida-kd35t133.o obj-$(CONFIG_DRM_PANEL_FEIXIN_K101_IM2BA02) += panel-feixin-k101-im2ba02.o obj-$(CONFIG_DRM_PANEL_FEIYANG_FY07024DI26A30D) += panel-feiyang-fy07024di26a30d.o obj-$(CONFIG_DRM_PANEL_ILITEK_IL9322) += panel-ilitek-ili9322.o +obj-$(CONFIG_DRM_PANEL_ILITEK_ILI9341) += panel-ilitek-ili9341.o obj-$(CONFIG_DRM_PANEL_ILITEK_ILI9881C) += panel-ilitek-ili9881c.o +obj-$(CONFIG_DRM_PANEL_INNOLUX_EJ030NA) += panel-innolux-ej030na.o obj-$(CONFIG_DRM_PANEL_INNOLUX_P079ZCA) += panel-innolux-p079zca.o obj-$(CONFIG_DRM_PANEL_JDI_LT070ME05000) += panel-jdi-lt070me05000.o obj-$(CONFIG_DRM_PANEL_KHADAS_TS050) += panel-khadas-ts050.o @@ -33,6 +35,8 @@ obj-$(CONFIG_DRM_PANEL_RASPBERRYPI_TOUCHSCREEN) += panel-raspberrypi-touchscreen obj-$(CONFIG_DRM_PANEL_RAYDIUM_RM67191) += panel-raydium-rm67191.o obj-$(CONFIG_DRM_PANEL_RAYDIUM_RM68200) += panel-raydium-rm68200.o obj-$(CONFIG_DRM_PANEL_RONBO_RB070D30) += panel-ronbo-rb070d30.o +obj-$(CONFIG_DRM_PANEL_SAMSUNG_ATNA33XC20) += panel-samsung-atna33xc20.o +obj-$(CONFIG_DRM_PANEL_SAMSUNG_DB7430) += panel-samsung-db7430.o obj-$(CONFIG_DRM_PANEL_SAMSUNG_LD9040) += panel-samsung-ld9040.o obj-$(CONFIG_DRM_PANEL_SAMSUNG_S6D16D0) += panel-samsung-s6d16d0.o obj-$(CONFIG_DRM_PANEL_SAMSUNG_S6E3HA2) += panel-samsung-s6e3ha2.o @@ -58,4 +62,5 @@ obj-$(CONFIG_DRM_PANEL_TPO_TD043MTEA1) += panel-tpo-td043mtea1.o obj-$(CONFIG_DRM_PANEL_TPO_TPG110) += panel-tpo-tpg110.o obj-$(CONFIG_DRM_PANEL_TRULY_NT35597_WQXGA) += panel-truly-nt35597.o obj-$(CONFIG_DRM_PANEL_VISIONOX_RM69299) += panel-visionox-rm69299.o +obj-$(CONFIG_DRM_PANEL_WIDECHIPS_WS2401) += panel-widechips-ws2401.o obj-$(CONFIG_DRM_PANEL_XINPENG_XPP055C272) += panel-xinpeng-xpp055c272.o diff --git a/drivers/gpu/drm/panel/panel-asus-z00t-tm5p5-n35596.c b/drivers/gpu/drm/panel/panel-asus-z00t-tm5p5-n35596.c index e95bc9f60b3f..44674ebedf59 100644 --- a/drivers/gpu/drm/panel/panel-asus-z00t-tm5p5-n35596.c +++ b/drivers/gpu/drm/panel/panel-asus-z00t-tm5p5-n35596.c @@ -302,7 +302,7 @@ static int tm5p5_nt35596_probe(struct mipi_dsi_device *dsi) dsi->lanes = 4; dsi->format = MIPI_DSI_FMT_RGB888; dsi->mode_flags = MIPI_DSI_MODE_VIDEO | MIPI_DSI_MODE_VIDEO_BURST | - MIPI_DSI_MODE_VIDEO_HSE | MIPI_DSI_MODE_EOT_PACKET | + MIPI_DSI_MODE_VIDEO_HSE | MIPI_DSI_MODE_NO_EOT_PACKET | MIPI_DSI_CLOCK_NON_CONTINUOUS | MIPI_DSI_MODE_LPM; drm_panel_init(&ctx->panel, dev, &tm5p5_nt35596_panel_funcs, diff --git a/drivers/gpu/drm/panel/panel-dsi-cm.c b/drivers/gpu/drm/panel/panel-dsi-cm.c index 5fbfb71ca3d9..da4a69067e18 100644 --- a/drivers/gpu/drm/panel/panel-dsi-cm.c +++ b/drivers/gpu/drm/panel/panel-dsi-cm.c @@ -574,7 +574,7 @@ static int dsicm_probe(struct mipi_dsi_device *dsi) dsi->lanes = 2; dsi->format = MIPI_DSI_FMT_RGB888; dsi->mode_flags = MIPI_DSI_CLOCK_NON_CONTINUOUS | - MIPI_DSI_MODE_EOT_PACKET; + MIPI_DSI_MODE_NO_EOT_PACKET; dsi->hs_rate = ddata->panel_data->max_hs_rate; dsi->lp_rate = ddata->panel_data->max_lp_rate; diff --git a/drivers/gpu/drm/panel/panel-elida-kd35t133.c b/drivers/gpu/drm/panel/panel-elida-kd35t133.c index 4787f0833264..80227617a4d6 100644 --- a/drivers/gpu/drm/panel/panel-elida-kd35t133.c +++ b/drivers/gpu/drm/panel/panel-elida-kd35t133.c @@ -273,7 +273,7 @@ static int kd35t133_probe(struct mipi_dsi_device *dsi) dsi->lanes = 1; dsi->format = MIPI_DSI_FMT_RGB888; dsi->mode_flags = MIPI_DSI_MODE_VIDEO | MIPI_DSI_MODE_VIDEO_BURST | - MIPI_DSI_MODE_LPM | MIPI_DSI_MODE_EOT_PACKET | + MIPI_DSI_MODE_LPM | MIPI_DSI_MODE_NO_EOT_PACKET | MIPI_DSI_CLOCK_NON_CONTINUOUS; drm_panel_init(&ctx->panel, &dsi->dev, &kd35t133_funcs, diff --git a/drivers/gpu/drm/panel/panel-ilitek-ili9341.c b/drivers/gpu/drm/panel/panel-ilitek-ili9341.c new file mode 100644 index 000000000000..2c3378a259b1 --- /dev/null +++ b/drivers/gpu/drm/panel/panel-ilitek-ili9341.c @@ -0,0 +1,792 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Ilitek ILI9341 TFT LCD drm_panel driver. + * + * This panel can be configured to support: + * - 16-bit parallel RGB interface + * - 18-bit parallel RGB interface + * - 4-line serial spi interface + * + * Copyright (C) 2021 Dillon Min <dillon.minfei@gmail.com> + * + * For dbi+dpi part: + * Derived from drivers/drm/gpu/panel/panel-ilitek-ili9322.c + * the reuse of DBI abstraction part referred from Linus's patch + * "drm/panel: s6e63m0: Switch to DBI abstraction for SPI" + * + * For only-dbi part, copy from David's code (drm/tiny/ili9341.c) + * Copyright 2018 David Lechner <david@lechnology.com> + */ + +#include <linux/bitops.h> +#include <linux/delay.h> +#include <linux/gpio/consumer.h> +#include <linux/module.h> +#include <linux/of_device.h> +#include <linux/regulator/consumer.h> +#include <linux/spi/spi.h> + +#include <video/mipi_display.h> + +#include <drm/drm_atomic_helper.h> +#include <drm/drm_drv.h> +#include <drm/drm_fb_helper.h> +#include <drm/drm_gem_atomic_helper.h> +#include <drm/drm_gem_cma_helper.h> +#include <drm/drm_gem_framebuffer_helper.h> +#include <drm/drm_mipi_dbi.h> +#include <drm/drm_modes.h> +#include <drm/drm_panel.h> +#include <drm/drm_print.h> + +#define ILI9341_RGB_INTERFACE 0xb0 /* RGB Interface Signal Control */ +#define ILI9341_FRC 0xb1 /* Frame Rate Control register */ +#define ILI9341_DFC 0xb6 /* Display Function Control register */ +#define ILI9341_POWER1 0xc0 /* Power Control 1 register */ +#define ILI9341_POWER2 0xc1 /* Power Control 2 register */ +#define ILI9341_VCOM1 0xc5 /* VCOM Control 1 register */ +#define ILI9341_VCOM2 0xc7 /* VCOM Control 2 register */ +#define ILI9341_POWERA 0xcb /* Power control A register */ +#define ILI9341_POWERB 0xcf /* Power control B register */ +#define ILI9341_PGAMMA 0xe0 /* Positive Gamma Correction register */ +#define ILI9341_NGAMMA 0xe1 /* Negative Gamma Correction register */ +#define ILI9341_DTCA 0xe8 /* Driver timing control A */ +#define ILI9341_DTCB 0xea /* Driver timing control B */ +#define ILI9341_POWER_SEQ 0xed /* Power on sequence register */ +#define ILI9341_3GAMMA_EN 0xf2 /* 3 Gamma enable register */ +#define ILI9341_INTERFACE 0xf6 /* Interface control register */ +#define ILI9341_PRC 0xf7 /* Pump ratio control register */ +#define ILI9341_ETMOD 0xb7 /* Entry mode set */ + +#define ILI9341_MADCTL_BGR BIT(3) +#define ILI9341_MADCTL_MV BIT(5) +#define ILI9341_MADCTL_MX BIT(6) +#define ILI9341_MADCTL_MY BIT(7) + +#define ILI9341_POWER_B_LEN 3 +#define ILI9341_POWER_SEQ_LEN 4 +#define ILI9341_DTCA_LEN 3 +#define ILI9341_DTCB_LEN 2 +#define ILI9341_POWER_A_LEN 5 +#define ILI9341_DFC_1_LEN 2 +#define ILI9341_FRC_LEN 2 +#define ILI9341_VCOM_1_LEN 2 +#define ILI9341_DFC_2_LEN 4 +#define ILI9341_COLUMN_ADDR_LEN 4 +#define ILI9341_PAGE_ADDR_LEN 4 +#define ILI9341_INTERFACE_LEN 3 +#define ILI9341_PGAMMA_LEN 15 +#define ILI9341_NGAMMA_LEN 15 +#define ILI9341_CA_LEN 3 + +#define ILI9341_PIXEL_DPI_16_BITS (BIT(6) | BIT(4)) +#define ILI9341_PIXEL_DPI_18_BITS (BIT(6) | BIT(5)) +#define ILI9341_GAMMA_CURVE_1 BIT(0) +#define ILI9341_IF_WE_MODE BIT(0) +#define ILI9341_IF_BIG_ENDIAN 0x00 +#define ILI9341_IF_DM_RGB BIT(2) +#define ILI9341_IF_DM_INTERNAL 0x00 +#define ILI9341_IF_DM_VSYNC BIT(3) +#define ILI9341_IF_RM_RGB BIT(1) +#define ILI9341_IF_RIM_RGB 0x00 + +#define ILI9341_COLUMN_ADDR 0x00ef +#define ILI9341_PAGE_ADDR 0x013f + +#define ILI9341_RGB_EPL BIT(0) +#define ILI9341_RGB_DPL BIT(1) +#define ILI9341_RGB_HSPL BIT(2) +#define ILI9341_RGB_VSPL BIT(3) +#define ILI9341_RGB_DE_MODE BIT(6) +#define ILI9341_RGB_DISP_PATH_MEM BIT(7) + +#define ILI9341_DBI_VCOMH_4P6V 0x23 +#define ILI9341_DBI_PWR_2_DEFAULT 0x10 +#define ILI9341_DBI_PRC_NORMAL 0x20 +#define ILI9341_DBI_VCOM_1_VMH_4P25V 0x3e +#define ILI9341_DBI_VCOM_1_VML_1P5V 0x28 +#define ILI9341_DBI_VCOM_2_DEC_58 0x86 +#define ILI9341_DBI_FRC_DIVA 0x00 +#define ILI9341_DBI_FRC_RTNA 0x1b +#define ILI9341_DBI_EMS_GAS BIT(0) +#define ILI9341_DBI_EMS_DTS BIT(1) +#define ILI9341_DBI_EMS_GON BIT(2) + +/* struct ili9341_config - the system specific ILI9341 configuration */ +struct ili9341_config { + u32 max_spi_speed; + /* mode: the drm display mode */ + const struct drm_display_mode mode; + /* ca: TODO: need comments for this register */ + u8 ca[ILI9341_CA_LEN]; + /* power_b: TODO: need comments for this register */ + u8 power_b[ILI9341_POWER_B_LEN]; + /* power_seq: TODO: need comments for this register */ + u8 power_seq[ILI9341_POWER_SEQ_LEN]; + /* dtca: TODO: need comments for this register */ + u8 dtca[ILI9341_DTCA_LEN]; + /* dtcb: TODO: need comments for this register */ + u8 dtcb[ILI9341_DTCB_LEN]; + /* power_a: TODO: need comments for this register */ + u8 power_a[ILI9341_POWER_A_LEN]; + /* frc: Frame Rate Control (In Normal Mode/Full Colors) (B1h) */ + u8 frc[ILI9341_FRC_LEN]; + /* prc: TODO: need comments for this register */ + u8 prc; + /* dfc_1: B6h DISCTRL (Display Function Control) */ + u8 dfc_1[ILI9341_DFC_1_LEN]; + /* power_1: Power Control 1 (C0h) */ + u8 power_1; + /* power_2: Power Control 2 (C1h) */ + u8 power_2; + /* vcom_1: VCOM Control 1(C5h) */ + u8 vcom_1[ILI9341_VCOM_1_LEN]; + /* vcom_2: VCOM Control 2(C7h) */ + u8 vcom_2; + /* address_mode: Memory Access Control (36h) */ + u8 address_mode; + /* g3amma_en: TODO: need comments for this register */ + u8 g3amma_en; + /* rgb_interface: RGB Interface Signal Control (B0h) */ + u8 rgb_interface; + /* dfc_2: refer to dfc_1 */ + u8 dfc_2[ILI9341_DFC_2_LEN]; + /* column_addr: Column Address Set (2Ah) */ + u8 column_addr[ILI9341_COLUMN_ADDR_LEN]; + /* page_addr: Page Address Set (2Bh) */ + u8 page_addr[ILI9341_PAGE_ADDR_LEN]; + /* interface: Interface Control (F6h) */ + u8 interface[ILI9341_INTERFACE_LEN]; + /* + * pixel_format: This command sets the pixel format for the RGB + * image data used by + */ + u8 pixel_format; + /* + * gamma_curve: This command is used to select the desired Gamma + * curve for the + */ + u8 gamma_curve; + /* pgamma: Positive Gamma Correction (E0h) */ + u8 pgamma[ILI9341_PGAMMA_LEN]; + /* ngamma: Negative Gamma Correction (E1h) */ + u8 ngamma[ILI9341_NGAMMA_LEN]; +}; + +struct ili9341 { + struct device *dev; + const struct ili9341_config *conf; + struct drm_panel panel; + struct gpio_desc *reset_gpio; + struct gpio_desc *dc_gpio; + struct mipi_dbi *dbi; + u32 max_spi_speed; + struct regulator_bulk_data supplies[3]; +}; + +/* + * The Stm32f429-disco board has a panel ili9341 connected to ltdc controller + */ +static const struct ili9341_config ili9341_stm32f429_disco_data = { + .max_spi_speed = 10000000, + .mode = { + .clock = 6100, + .hdisplay = 240, + .hsync_start = 240 + 10,/* hfp 10 */ + .hsync_end = 240 + 10 + 10,/* hsync 10 */ + .htotal = 240 + 10 + 10 + 20,/* hbp 20 */ + .vdisplay = 320, + .vsync_start = 320 + 4,/* vfp 4 */ + .vsync_end = 320 + 4 + 2,/* vsync 2 */ + .vtotal = 320 + 4 + 2 + 2,/* vbp 2 */ + .flags = 0, + .width_mm = 65, + .height_mm = 50, + .type = DRM_MODE_TYPE_DRIVER | DRM_MODE_TYPE_PREFERRED, + }, + .ca = {0xc3, 0x08, 0x50}, + .power_b = {0x00, 0xc1, 0x30}, + .power_seq = {0x64, 0x03, 0x12, 0x81}, + .dtca = {0x85, 0x00, 0x78}, + .power_a = {0x39, 0x2c, 0x00, 0x34, 0x02}, + .prc = 0x20, + .dtcb = {0x00, 0x00}, + /* 0x00 fosc, 0x1b 70hz */ + .frc = {0x00, 0x1b}, + /* + * 0x0a Interval scan, AGND AGND AGND AGND + * 0xa2 Normally white, G1 -> G320, S720 -> S1, + * Scan Cycle 5 frames,85ms + */ + .dfc_1 = {0x0a, 0xa2}, + /* 0x10 3.65v */ + .power_1 = 0x10, + /* 0x10 AVDD=vci*2, VGH=vci*7, VGL=-vci*4 */ + .power_2 = 0x10, + /* 0x45 VCOMH 4.425v, 0x15 VCOML -1.975*/ + .vcom_1 = {0x45, 0x15}, + /* 0x90 offset voltage, VMH-48, VML-48 */ + .vcom_2 = 0x90, + /* + * 0xc8 Row Address Order, Column Address Order + * BGR 1 + */ + .address_mode = 0xc8, + .g3amma_en = 0x00, + /* + * 0xc2 + * Display Data Path: Memory + * RGB: DE mode + * DOTCLK polarity set (data fetched at the falling time) + */ + .rgb_interface = ILI9341_RGB_DISP_PATH_MEM | + ILI9341_RGB_DE_MODE | + ILI9341_RGB_DPL, + /* + * 0x0a + * Gate outputs in non-display area: Interval scan + * Determine source/VCOM output in a non-display area in the partial + * display mode: AGND AGND AGND AGND + * + * 0xa7 + * Scan Cycle: 15 frames + * fFLM = 60Hz: 255ms + * Liquid crystal type: Normally white + * Gate Output Scan Direction: G1 -> G320 + * Source Output Scan Direction: S720 -> S1 + * + * 0x27 + * LCD Driver Line: 320 lines + * + * 0x04 + * PCDIV: 4 + */ + .dfc_2 = {0x0a, 0xa7, 0x27, 0x04}, + /* column address: 240 */ + .column_addr = {0x00, 0x00, (ILI9341_COLUMN_ADDR >> 4) & 0xff, + ILI9341_COLUMN_ADDR & 0xff}, + /* page address: 320 */ + .page_addr = {0x00, 0x00, (ILI9341_PAGE_ADDR >> 4) & 0xff, + ILI9341_PAGE_ADDR & 0xff}, + /* + * Memory write control: When the transfer number of data exceeds + * (EC-SC+1)*(EP-SP+1), the column and page number will be + * reset, and the exceeding data will be written into the following + * column and page. + * Display Operation Mode: RGB Interface Mode + * Interface for RAM Access: RGB interface + * 16- bit RGB interface (1 transfer/pixel) + */ + .interface = {ILI9341_IF_WE_MODE, 0x00, + ILI9341_IF_DM_RGB | ILI9341_IF_RM_RGB}, + /* DPI: 16 bits / pixel */ + .pixel_format = ILI9341_PIXEL_DPI_16_BITS, + /* Curve Selected: Gamma curve 1 (G2.2) */ + .gamma_curve = ILI9341_GAMMA_CURVE_1, + .pgamma = {0x0f, 0x29, 0x24, 0x0c, 0x0e, + 0x09, 0x4e, 0x78, 0x3c, 0x09, + 0x13, 0x05, 0x17, 0x11, 0x00}, + .ngamma = {0x00, 0x16, 0x1b, 0x04, 0x11, + 0x07, 0x31, 0x33, 0x42, 0x05, + 0x0c, 0x0a, 0x28, 0x2f, 0x0f}, +}; + +static inline struct ili9341 *panel_to_ili9341(struct drm_panel *panel) +{ + return container_of(panel, struct ili9341, panel); +} + +static void ili9341_dpi_init(struct ili9341 *ili) +{ + struct device *dev = (&ili->panel)->dev; + struct mipi_dbi *dbi = ili->dbi; + struct ili9341_config *cfg = (struct ili9341_config *)ili->conf; + + /* Power Control */ + mipi_dbi_command_stackbuf(dbi, 0xca, cfg->ca, ILI9341_CA_LEN); + mipi_dbi_command_stackbuf(dbi, ILI9341_POWERB, cfg->power_b, + ILI9341_POWER_B_LEN); + mipi_dbi_command_stackbuf(dbi, ILI9341_POWER_SEQ, cfg->power_seq, + ILI9341_POWER_SEQ_LEN); + mipi_dbi_command_stackbuf(dbi, ILI9341_DTCA, cfg->dtca, + ILI9341_DTCA_LEN); + mipi_dbi_command_stackbuf(dbi, ILI9341_POWERA, cfg->power_a, + ILI9341_POWER_A_LEN); + mipi_dbi_command(ili->dbi, ILI9341_PRC, cfg->prc); + mipi_dbi_command_stackbuf(dbi, ILI9341_DTCB, cfg->dtcb, + ILI9341_DTCB_LEN); + mipi_dbi_command_stackbuf(dbi, ILI9341_FRC, cfg->frc, ILI9341_FRC_LEN); + mipi_dbi_command_stackbuf(dbi, ILI9341_DFC, cfg->dfc_1, + ILI9341_DFC_1_LEN); + mipi_dbi_command(dbi, ILI9341_POWER1, cfg->power_1); + mipi_dbi_command(dbi, ILI9341_POWER2, cfg->power_2); + + /* VCOM */ + mipi_dbi_command_stackbuf(dbi, ILI9341_VCOM1, cfg->vcom_1, + ILI9341_VCOM_1_LEN); + mipi_dbi_command(dbi, ILI9341_VCOM2, cfg->vcom_2); + mipi_dbi_command(dbi, MIPI_DCS_SET_ADDRESS_MODE, cfg->address_mode); + + /* Gamma */ + mipi_dbi_command(dbi, ILI9341_3GAMMA_EN, cfg->g3amma_en); + mipi_dbi_command(dbi, ILI9341_RGB_INTERFACE, cfg->rgb_interface); + mipi_dbi_command_stackbuf(dbi, ILI9341_DFC, cfg->dfc_2, + ILI9341_DFC_2_LEN); + + /* Colomn address set */ + mipi_dbi_command_stackbuf(dbi, MIPI_DCS_SET_COLUMN_ADDRESS, + cfg->column_addr, ILI9341_COLUMN_ADDR_LEN); + + /* Page address set */ + mipi_dbi_command_stackbuf(dbi, MIPI_DCS_SET_PAGE_ADDRESS, + cfg->page_addr, ILI9341_PAGE_ADDR_LEN); + mipi_dbi_command_stackbuf(dbi, ILI9341_INTERFACE, cfg->interface, + ILI9341_INTERFACE_LEN); + + /* Format */ + mipi_dbi_command(dbi, MIPI_DCS_SET_PIXEL_FORMAT, cfg->pixel_format); + mipi_dbi_command(dbi, MIPI_DCS_WRITE_MEMORY_START); + msleep(200); + mipi_dbi_command(dbi, MIPI_DCS_SET_GAMMA_CURVE, cfg->gamma_curve); + mipi_dbi_command_stackbuf(dbi, ILI9341_PGAMMA, cfg->pgamma, + ILI9341_PGAMMA_LEN); + mipi_dbi_command_stackbuf(dbi, ILI9341_NGAMMA, cfg->ngamma, + ILI9341_NGAMMA_LEN); + mipi_dbi_command(dbi, MIPI_DCS_EXIT_SLEEP_MODE); + msleep(200); + mipi_dbi_command(dbi, MIPI_DCS_SET_DISPLAY_ON); + mipi_dbi_command(dbi, MIPI_DCS_WRITE_MEMORY_START); + + dev_info(dev, "Initialized display rgb interface\n"); +} + +static int ili9341_dpi_power_on(struct ili9341 *ili) +{ + struct device *dev = (&ili->panel)->dev; + int ret = 0; + + /* Assert RESET */ + gpiod_set_value(ili->reset_gpio, 1); + + /* Enable power */ + ret = regulator_bulk_enable(ARRAY_SIZE(ili->supplies), + ili->supplies); + if (ret < 0) { + dev_err(dev, "unable to enable vcc\n"); + return ret; + } + msleep(20); + + /* De-assert RESET */ + gpiod_set_value(ili->reset_gpio, 0); + msleep(20); + + return 0; +} + +static int ili9341_dpi_power_off(struct ili9341 *ili) +{ + /* Assert RESET */ + gpiod_set_value(ili->reset_gpio, 1); + + /* Disable power */ + return regulator_bulk_disable(ARRAY_SIZE(ili->supplies), + ili->supplies); +} + +static int ili9341_dpi_disable(struct drm_panel *panel) +{ + struct ili9341 *ili = panel_to_ili9341(panel); + + mipi_dbi_command(ili->dbi, MIPI_DCS_SET_DISPLAY_OFF); + return 0; +} + +static int ili9341_dpi_unprepare(struct drm_panel *panel) +{ + struct ili9341 *ili = panel_to_ili9341(panel); + + return ili9341_dpi_power_off(ili); +} + +static int ili9341_dpi_prepare(struct drm_panel *panel) +{ + struct ili9341 *ili = panel_to_ili9341(panel); + int ret; + + ret = ili9341_dpi_power_on(ili); + if (ret < 0) + return ret; + + ili9341_dpi_init(ili); + + return ret; +} + +static int ili9341_dpi_enable(struct drm_panel *panel) +{ + struct ili9341 *ili = panel_to_ili9341(panel); + + mipi_dbi_command(ili->dbi, MIPI_DCS_SET_DISPLAY_ON); + return 0; +} + +static int ili9341_dpi_get_modes(struct drm_panel *panel, + struct drm_connector *connector) +{ + struct ili9341 *ili = panel_to_ili9341(panel); + struct drm_device *drm = connector->dev; + struct drm_display_mode *mode; + struct drm_display_info *info; + + info = &connector->display_info; + info->width_mm = ili->conf->mode.width_mm; + info->height_mm = ili->conf->mode.height_mm; + + if (ili->conf->rgb_interface & ILI9341_RGB_DPL) + info->bus_flags |= DRM_BUS_FLAG_PIXDATA_DRIVE_POSEDGE; + else + info->bus_flags |= DRM_BUS_FLAG_PIXDATA_DRIVE_NEGEDGE; + + if (ili->conf->rgb_interface & ILI9341_RGB_EPL) + info->bus_flags |= DRM_BUS_FLAG_DE_LOW; + else + info->bus_flags |= DRM_BUS_FLAG_DE_HIGH; + + mode = drm_mode_duplicate(drm, &ili->conf->mode); + if (!mode) { + drm_err(drm, "bad mode or failed to add mode\n"); + return -EINVAL; + } + drm_mode_set_name(mode); + + /* Set up the polarity */ + if (ili->conf->rgb_interface & ILI9341_RGB_HSPL) + mode->flags |= DRM_MODE_FLAG_PHSYNC; + else + mode->flags |= DRM_MODE_FLAG_NHSYNC; + + if (ili->conf->rgb_interface & ILI9341_RGB_VSPL) + mode->flags |= DRM_MODE_FLAG_PVSYNC; + else + mode->flags |= DRM_MODE_FLAG_NVSYNC; + + drm_mode_probed_add(connector, mode); + + return 1; /* Number of modes */ +} + +static const struct drm_panel_funcs ili9341_dpi_funcs = { + .disable = ili9341_dpi_disable, + .unprepare = ili9341_dpi_unprepare, + .prepare = ili9341_dpi_prepare, + .enable = ili9341_dpi_enable, + .get_modes = ili9341_dpi_get_modes, +}; + +static void ili9341_dbi_enable(struct drm_simple_display_pipe *pipe, + struct drm_crtc_state *crtc_state, + struct drm_plane_state *plane_state) +{ + struct mipi_dbi_dev *dbidev = drm_to_mipi_dbi_dev(pipe->crtc.dev); + struct mipi_dbi *dbi = &dbidev->dbi; + u8 addr_mode; + int ret, idx; + + if (!drm_dev_enter(pipe->crtc.dev, &idx)) + return; + + ret = mipi_dbi_poweron_conditional_reset(dbidev); + if (ret < 0) + goto out_exit; + if (ret == 1) + goto out_enable; + + mipi_dbi_command(dbi, MIPI_DCS_SET_DISPLAY_OFF); + + mipi_dbi_command(dbi, ILI9341_POWERB, 0x00, 0xc1, 0x30); + mipi_dbi_command(dbi, ILI9341_POWER_SEQ, 0x64, 0x03, 0x12, 0x81); + mipi_dbi_command(dbi, ILI9341_DTCA, 0x85, 0x00, 0x78); + mipi_dbi_command(dbi, ILI9341_POWERA, 0x39, 0x2c, 0x00, 0x34, 0x02); + mipi_dbi_command(dbi, ILI9341_PRC, ILI9341_DBI_PRC_NORMAL); + mipi_dbi_command(dbi, ILI9341_DTCB, 0x00, 0x00); + + /* Power Control */ + mipi_dbi_command(dbi, ILI9341_POWER1, ILI9341_DBI_VCOMH_4P6V); + mipi_dbi_command(dbi, ILI9341_POWER2, ILI9341_DBI_PWR_2_DEFAULT); + /* VCOM */ + mipi_dbi_command(dbi, ILI9341_VCOM1, ILI9341_DBI_VCOM_1_VMH_4P25V, + ILI9341_DBI_VCOM_1_VML_1P5V); + mipi_dbi_command(dbi, ILI9341_VCOM2, ILI9341_DBI_VCOM_2_DEC_58); + + /* Memory Access Control */ + mipi_dbi_command(dbi, MIPI_DCS_SET_PIXEL_FORMAT, + MIPI_DCS_PIXEL_FMT_16BIT); + + /* Frame Rate */ + mipi_dbi_command(dbi, ILI9341_FRC, ILI9341_DBI_FRC_DIVA & 0x03, + ILI9341_DBI_FRC_RTNA & 0x1f); + + /* Gamma */ + mipi_dbi_command(dbi, ILI9341_3GAMMA_EN, 0x00); + mipi_dbi_command(dbi, MIPI_DCS_SET_GAMMA_CURVE, ILI9341_GAMMA_CURVE_1); + mipi_dbi_command(dbi, ILI9341_PGAMMA, + 0x0f, 0x31, 0x2b, 0x0c, 0x0e, 0x08, 0x4e, 0xf1, + 0x37, 0x07, 0x10, 0x03, 0x0e, 0x09, 0x00); + mipi_dbi_command(dbi, ILI9341_NGAMMA, + 0x00, 0x0e, 0x14, 0x03, 0x11, 0x07, 0x31, 0xc1, + 0x48, 0x08, 0x0f, 0x0c, 0x31, 0x36, 0x0f); + + /* DDRAM */ + mipi_dbi_command(dbi, ILI9341_ETMOD, ILI9341_DBI_EMS_GAS | + ILI9341_DBI_EMS_DTS | + ILI9341_DBI_EMS_GON); + + /* Display */ + mipi_dbi_command(dbi, ILI9341_DFC, 0x08, 0x82, 0x27, 0x00); + mipi_dbi_command(dbi, MIPI_DCS_EXIT_SLEEP_MODE); + msleep(100); + + mipi_dbi_command(dbi, MIPI_DCS_SET_DISPLAY_ON); + msleep(100); + +out_enable: + switch (dbidev->rotation) { + default: + addr_mode = ILI9341_MADCTL_MX; + break; + case 90: + addr_mode = ILI9341_MADCTL_MV; + break; + case 180: + addr_mode = ILI9341_MADCTL_MY; + break; + case 270: + addr_mode = ILI9341_MADCTL_MV | ILI9341_MADCTL_MY | + ILI9341_MADCTL_MX; + break; + } + + addr_mode |= ILI9341_MADCTL_BGR; + mipi_dbi_command(dbi, MIPI_DCS_SET_ADDRESS_MODE, addr_mode); + mipi_dbi_enable_flush(dbidev, crtc_state, plane_state); + drm_info(&dbidev->drm, "Initialized display serial interface\n"); +out_exit: + drm_dev_exit(idx); +} + +static const struct drm_simple_display_pipe_funcs ili9341_dbi_funcs = { + .enable = ili9341_dbi_enable, + .disable = mipi_dbi_pipe_disable, + .update = mipi_dbi_pipe_update, + .prepare_fb = drm_gem_simple_display_pipe_prepare_fb, +}; + +static const struct drm_display_mode ili9341_dbi_mode = { + DRM_SIMPLE_MODE(240, 320, 37, 49), +}; + +DEFINE_DRM_GEM_CMA_FOPS(ili9341_dbi_fops); + +static struct drm_driver ili9341_dbi_driver = { + .driver_features = DRIVER_GEM | DRIVER_MODESET | DRIVER_ATOMIC, + .fops = &ili9341_dbi_fops, + DRM_GEM_CMA_DRIVER_OPS_VMAP, + .debugfs_init = mipi_dbi_debugfs_init, + .name = "ili9341", + .desc = "Ilitek ILI9341", + .date = "20210716", + .major = 1, + .minor = 0, +}; + +static int ili9341_dbi_probe(struct spi_device *spi, struct gpio_desc *dc, + struct gpio_desc *reset) +{ + struct device *dev = &spi->dev; + struct mipi_dbi_dev *dbidev; + struct mipi_dbi *dbi; + struct drm_device *drm; + struct regulator *vcc; + u32 rotation = 0; + int ret; + + vcc = devm_regulator_get_optional(dev, "vcc"); + if (IS_ERR(vcc)) + dev_err(dev, "get optional vcc failed\n"); + + dbidev = devm_drm_dev_alloc(dev, &ili9341_dbi_driver, + struct mipi_dbi_dev, drm); + if (IS_ERR(dbidev)) + return PTR_ERR(dbidev); + + dbi = &dbidev->dbi; + drm = &dbidev->drm; + dbi->reset = reset; + dbidev->regulator = vcc; + + drm_mode_config_init(drm); + + dbidev->backlight = devm_of_find_backlight(dev); + if (IS_ERR(dbidev->backlight)) + return PTR_ERR(dbidev->backlight); + + device_property_read_u32(dev, "rotation", &rotation); + + ret = mipi_dbi_spi_init(spi, dbi, dc); + if (ret) + return ret; + + ret = mipi_dbi_dev_init(dbidev, &ili9341_dbi_funcs, + &ili9341_dbi_mode, rotation); + if (ret) + return ret; + + drm_mode_config_reset(drm); + + ret = drm_dev_register(drm, 0); + if (ret) + return ret; + + spi_set_drvdata(spi, drm); + + drm_fbdev_generic_setup(drm, 0); + + return 0; +} + +static int ili9341_dpi_probe(struct spi_device *spi, struct gpio_desc *dc, + struct gpio_desc *reset) +{ + struct device *dev = &spi->dev; + struct ili9341 *ili; + int ret; + + ili = devm_kzalloc(dev, sizeof(struct ili9341), GFP_KERNEL); + if (!ili) + return -ENOMEM; + + ili->dbi = devm_kzalloc(dev, sizeof(struct mipi_dbi), + GFP_KERNEL); + if (!ili->dbi) + return -ENOMEM; + + ili->supplies[0].supply = "vci"; + ili->supplies[1].supply = "vddi"; + ili->supplies[2].supply = "vddi-led"; + ret = devm_regulator_bulk_get(dev, ARRAY_SIZE(ili->supplies), + ili->supplies); + if (ret < 0) { + dev_err(dev, "failed to get regulators: %d\n", ret); + return ret; + } + + ret = mipi_dbi_spi_init(spi, ili->dbi, dc); + if (ret) + return ret; + + spi_set_drvdata(spi, ili); + ili->reset_gpio = reset; + /* + * Every new incarnation of this display must have a unique + * data entry for the system in this driver. + */ + ili->conf = of_device_get_match_data(dev); + if (!ili->conf) { + dev_err(dev, "missing device configuration\n"); + return -ENODEV; + } + + ili->max_spi_speed = ili->conf->max_spi_speed; + drm_panel_init(&ili->panel, dev, &ili9341_dpi_funcs, + DRM_MODE_CONNECTOR_DPI); + drm_panel_add(&ili->panel); + + return 0; +} + +static int ili9341_probe(struct spi_device *spi) +{ + struct device *dev = &spi->dev; + struct gpio_desc *dc; + struct gpio_desc *reset; + const struct spi_device_id *id = spi_get_device_id(spi); + + reset = devm_gpiod_get_optional(dev, "reset", GPIOD_OUT_HIGH); + if (IS_ERR(reset)) + dev_err(dev, "Failed to get gpio 'reset'\n"); + + dc = devm_gpiod_get_optional(dev, "dc", GPIOD_OUT_LOW); + if (IS_ERR(dc)) + dev_err(dev, "Failed to get gpio 'dc'\n"); + + if (!strcmp(id->name, "sf-tc240t-9370-t")) + return ili9341_dpi_probe(spi, dc, reset); + else if (!strcmp(id->name, "yx240qv29")) + return ili9341_dbi_probe(spi, dc, reset); + + return -1; +} + +static int ili9341_remove(struct spi_device *spi) +{ + const struct spi_device_id *id = spi_get_device_id(spi); + struct ili9341 *ili = spi_get_drvdata(spi); + struct drm_device *drm = spi_get_drvdata(spi); + + if (!strcmp(id->name, "sf-tc240t-9370-t")) { + ili9341_dpi_power_off(ili); + drm_panel_remove(&ili->panel); + } else if (!strcmp(id->name, "yx240qv29")) { + drm_dev_unplug(drm); + drm_atomic_helper_shutdown(drm); + } + return 0; +} + +static void ili9341_shutdown(struct spi_device *spi) +{ + const struct spi_device_id *id = spi_get_device_id(spi); + + if (!strcmp(id->name, "yx240qv29")) + drm_atomic_helper_shutdown(spi_get_drvdata(spi)); +} + +static const struct of_device_id ili9341_of_match[] = { + { + .compatible = "st,sf-tc240t-9370-t", + .data = &ili9341_stm32f429_disco_data, + }, + { + /* porting from tiny/ili9341.c + * for original mipi dbi compitable + */ + .compatible = "adafruit,yx240qv29", + .data = NULL, + }, + { } +}; +MODULE_DEVICE_TABLE(of, ili9341_of_match); + +static const struct spi_device_id ili9341_id[] = { + { "yx240qv29", 0 }, + { "sf-tc240t-9370-t", 0 }, + { } +}; +MODULE_DEVICE_TABLE(spi, ili9341_id); + +static struct spi_driver ili9341_driver = { + .probe = ili9341_probe, + .remove = ili9341_remove, + .shutdown = ili9341_shutdown, + .id_table = ili9341_id, + .driver = { + .name = "panel-ilitek-ili9341", + .of_match_table = ili9341_of_match, + }, +}; +module_spi_driver(ili9341_driver); + +MODULE_AUTHOR("Dillon Min <dillon.minfei@gmail.com>"); +MODULE_DESCRIPTION("ILI9341 LCD panel driver"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/gpu/drm/panel/panel-innolux-ej030na.c b/drivers/gpu/drm/panel/panel-innolux-ej030na.c new file mode 100644 index 000000000000..34b98f70bd22 --- /dev/null +++ b/drivers/gpu/drm/panel/panel-innolux-ej030na.c @@ -0,0 +1,289 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Innolux/Chimei EJ030NA TFT LCD panel driver + * + * Copyright (C) 2020, Paul Cercueil <paul@crapouillou.net> + * Copyright (C) 2020, Christophe Branchereau <cbranchereau@gmail.com> + */ + +#include <linux/delay.h> +#include <linux/device.h> +#include <linux/gpio/consumer.h> +#include <linux/media-bus-format.h> +#include <linux/module.h> +#include <linux/of_device.h> +#include <linux/regmap.h> +#include <linux/regulator/consumer.h> +#include <linux/spi/spi.h> + +#include <drm/drm_modes.h> +#include <drm/drm_panel.h> + +struct ej030na_info { + const struct drm_display_mode *display_modes; + unsigned int num_modes; + u16 width_mm, height_mm; + u32 bus_format, bus_flags; +}; + +struct ej030na { + struct drm_panel panel; + struct spi_device *spi; + struct regmap *map; + + const struct ej030na_info *panel_info; + + struct regulator *supply; + struct gpio_desc *reset_gpio; +}; + +static inline struct ej030na *to_ej030na(struct drm_panel *panel) +{ + return container_of(panel, struct ej030na, panel); +} + +static const struct reg_sequence ej030na_init_sequence[] = { + { 0x05, 0x1e }, + { 0x05, 0x5c }, + { 0x02, 0x14 }, + { 0x03, 0x40 }, + { 0x04, 0x07 }, + { 0x06, 0x12 }, + { 0x07, 0xd2 }, + { 0x0c, 0x06 }, + { 0x0d, 0x40 }, + { 0x0e, 0x40 }, + { 0x0f, 0x40 }, + { 0x10, 0x40 }, + { 0x11, 0x40 }, + { 0x2f, 0x40 }, + { 0x5a, 0x02 }, + + { 0x30, 0x07 }, + { 0x31, 0x57 }, + { 0x32, 0x53 }, + { 0x33, 0x77 }, + { 0x34, 0xb8 }, + { 0x35, 0xbd }, + { 0x36, 0xb8 }, + { 0x37, 0xe7 }, + { 0x38, 0x04 }, + { 0x39, 0xff }, + + { 0x40, 0x0b }, + { 0x41, 0xb8 }, + { 0x42, 0xab }, + { 0x43, 0xb9 }, + { 0x44, 0x6a }, + { 0x45, 0x56 }, + { 0x46, 0x61 }, + { 0x47, 0x08 }, + { 0x48, 0x0f }, + { 0x49, 0x0f }, + + { 0x2b, 0x01 }, +}; + +static int ej030na_prepare(struct drm_panel *panel) +{ + struct ej030na *priv = to_ej030na(panel); + struct device *dev = &priv->spi->dev; + int err; + + err = regulator_enable(priv->supply); + if (err) { + dev_err(dev, "Failed to enable power supply: %d\n", err); + return err; + } + + /* Reset the chip */ + gpiod_set_value_cansleep(priv->reset_gpio, 1); + usleep_range(50, 150); + gpiod_set_value_cansleep(priv->reset_gpio, 0); + usleep_range(50, 150); + + err = regmap_multi_reg_write(priv->map, ej030na_init_sequence, + ARRAY_SIZE(ej030na_init_sequence)); + if (err) { + dev_err(dev, "Failed to init registers: %d\n", err); + goto err_disable_regulator; + } + + msleep(120); + + return 0; + +err_disable_regulator: + regulator_disable(priv->supply); + return err; +} + +static int ej030na_unprepare(struct drm_panel *panel) +{ + struct ej030na *priv = to_ej030na(panel); + + gpiod_set_value_cansleep(priv->reset_gpio, 1); + regulator_disable(priv->supply); + + return 0; +} + +static int ej030na_get_modes(struct drm_panel *panel, + struct drm_connector *connector) +{ + struct ej030na *priv = to_ej030na(panel); + const struct ej030na_info *panel_info = priv->panel_info; + struct drm_display_mode *mode; + unsigned int i; + + for (i = 0; i < panel_info->num_modes; i++) { + mode = drm_mode_duplicate(connector->dev, + &panel_info->display_modes[i]); + if (!mode) + return -ENOMEM; + + drm_mode_set_name(mode); + + mode->type = DRM_MODE_TYPE_DRIVER; + if (panel_info->num_modes == 1) + mode->type |= DRM_MODE_TYPE_PREFERRED; + + drm_mode_probed_add(connector, mode); + } + + connector->display_info.bpc = 8; + connector->display_info.width_mm = panel_info->width_mm; + connector->display_info.height_mm = panel_info->height_mm; + + drm_display_info_set_bus_formats(&connector->display_info, + &panel_info->bus_format, 1); + connector->display_info.bus_flags = panel_info->bus_flags; + + return panel_info->num_modes; +} + +static const struct drm_panel_funcs ej030na_funcs = { + .prepare = ej030na_prepare, + .unprepare = ej030na_unprepare, + .get_modes = ej030na_get_modes, +}; + +static const struct regmap_config ej030na_regmap_config = { + .reg_bits = 8, + .val_bits = 8, + .max_register = 0x5a, +}; + +static int ej030na_probe(struct spi_device *spi) +{ + struct device *dev = &spi->dev; + struct ej030na *priv; + int err; + + priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL); + if (!priv) + return -ENOMEM; + + priv->spi = spi; + spi_set_drvdata(spi, priv); + + priv->map = devm_regmap_init_spi(spi, &ej030na_regmap_config); + if (IS_ERR(priv->map)) { + dev_err(dev, "Unable to init regmap\n"); + return PTR_ERR(priv->map); + } + + priv->panel_info = of_device_get_match_data(dev); + if (!priv->panel_info) + return -EINVAL; + + priv->supply = devm_regulator_get(dev, "power"); + if (IS_ERR(priv->supply)) { + dev_err(dev, "Failed to get power supply\n"); + return PTR_ERR(priv->supply); + } + + priv->reset_gpio = devm_gpiod_get(dev, "reset", GPIOD_OUT_HIGH); + if (IS_ERR(priv->reset_gpio)) { + dev_err(dev, "Failed to get reset GPIO\n"); + return PTR_ERR(priv->reset_gpio); + } + + drm_panel_init(&priv->panel, dev, &ej030na_funcs, + DRM_MODE_CONNECTOR_DPI); + + err = drm_panel_of_backlight(&priv->panel); + if (err) + return err; + + drm_panel_add(&priv->panel); + + return 0; +} + +static int ej030na_remove(struct spi_device *spi) +{ + struct ej030na *priv = spi_get_drvdata(spi); + + drm_panel_remove(&priv->panel); + drm_panel_disable(&priv->panel); + drm_panel_unprepare(&priv->panel); + + return 0; +} + +static const struct drm_display_mode ej030na_modes[] = { + { /* 60 Hz */ + .clock = 14400, + .hdisplay = 320, + .hsync_start = 320 + 10, + .hsync_end = 320 + 10 + 37, + .htotal = 320 + 10 + 37 + 33, + .vdisplay = 480, + .vsync_start = 480 + 102, + .vsync_end = 480 + 102 + 9 + 9, + .vtotal = 480 + 102 + 9 + 9, + .flags = DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_NVSYNC, + }, + { /* 50 Hz */ + .clock = 12000, + .hdisplay = 320, + .hsync_start = 320 + 10, + .hsync_end = 320 + 10 + 37, + .htotal = 320 + 10 + 37 + 33, + .vdisplay = 480, + .vsync_start = 480 + 102, + .vsync_end = 480 + 102 + 9, + .vtotal = 480 + 102 + 9 + 9, + .flags = DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_NVSYNC, + }, +}; + +static const struct ej030na_info ej030na_info = { + .display_modes = ej030na_modes, + .num_modes = ARRAY_SIZE(ej030na_modes), + .width_mm = 70, + .height_mm = 51, + .bus_format = MEDIA_BUS_FMT_RGB888_3X8_DELTA, + .bus_flags = DRM_BUS_FLAG_PIXDATA_SAMPLE_POSEDGE | DRM_BUS_FLAG_DE_LOW, +}; + +static const struct of_device_id ej030na_of_match[] = { + { .compatible = "innolux,ej030na", .data = &ej030na_info }, + { /* sentinel */ } +}; +MODULE_DEVICE_TABLE(of, ej030na_of_match); + +static struct spi_driver ej030na_driver = { + .driver = { + .name = "panel-innolux-ej030na", + .of_match_table = ej030na_of_match, + }, + .probe = ej030na_probe, + .remove = ej030na_remove, +}; +module_spi_driver(ej030na_driver); + +MODULE_AUTHOR("Paul Cercueil <paul@crapouillou.net>"); +MODULE_AUTHOR("Christophe Branchereau <cbranchereau@gmail.com>"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/gpu/drm/panel/panel-khadas-ts050.c b/drivers/gpu/drm/panel/panel-khadas-ts050.c index 8f6ac1a40c31..a3ec4cbdbf7a 100644 --- a/drivers/gpu/drm/panel/panel-khadas-ts050.c +++ b/drivers/gpu/drm/panel/panel-khadas-ts050.c @@ -809,7 +809,7 @@ static int khadas_ts050_panel_probe(struct mipi_dsi_device *dsi) dsi->lanes = 4; dsi->format = MIPI_DSI_FMT_RGB888; dsi->mode_flags = MIPI_DSI_MODE_VIDEO | MIPI_DSI_MODE_VIDEO_BURST | - MIPI_DSI_MODE_LPM | MIPI_DSI_MODE_EOT_PACKET; + MIPI_DSI_MODE_LPM | MIPI_DSI_MODE_NO_EOT_PACKET; khadas_ts050 = devm_kzalloc(&dsi->dev, sizeof(*khadas_ts050), GFP_KERNEL); diff --git a/drivers/gpu/drm/panel/panel-leadtek-ltk050h3146w.c b/drivers/gpu/drm/panel/panel-leadtek-ltk050h3146w.c index ed0d5f959037..a5a414920430 100644 --- a/drivers/gpu/drm/panel/panel-leadtek-ltk050h3146w.c +++ b/drivers/gpu/drm/panel/panel-leadtek-ltk050h3146w.c @@ -593,7 +593,7 @@ static int ltk050h3146w_probe(struct mipi_dsi_device *dsi) dsi->lanes = 4; dsi->format = MIPI_DSI_FMT_RGB888; dsi->mode_flags = MIPI_DSI_MODE_VIDEO | MIPI_DSI_MODE_VIDEO_BURST | - MIPI_DSI_MODE_LPM | MIPI_DSI_MODE_EOT_PACKET; + MIPI_DSI_MODE_LPM | MIPI_DSI_MODE_NO_EOT_PACKET; drm_panel_init(&ctx->panel, &dsi->dev, <k050h3146w_funcs, DRM_MODE_CONNECTOR_DSI); diff --git a/drivers/gpu/drm/panel/panel-leadtek-ltk500hd1829.c b/drivers/gpu/drm/panel/panel-leadtek-ltk500hd1829.c index 3c00e4f8f803..21e48923836d 100644 --- a/drivers/gpu/drm/panel/panel-leadtek-ltk500hd1829.c +++ b/drivers/gpu/drm/panel/panel-leadtek-ltk500hd1829.c @@ -442,7 +442,7 @@ static int ltk500hd1829_probe(struct mipi_dsi_device *dsi) dsi->lanes = 4; dsi->format = MIPI_DSI_FMT_RGB888; dsi->mode_flags = MIPI_DSI_MODE_VIDEO | MIPI_DSI_MODE_VIDEO_BURST | - MIPI_DSI_MODE_LPM | MIPI_DSI_MODE_EOT_PACKET; + MIPI_DSI_MODE_LPM | MIPI_DSI_MODE_NO_EOT_PACKET; drm_panel_init(&ctx->panel, &dsi->dev, <k500hd1829_funcs, DRM_MODE_CONNECTOR_DSI); diff --git a/drivers/gpu/drm/panel/panel-osd-osd101t2587-53ts.c b/drivers/gpu/drm/panel/panel-osd-osd101t2587-53ts.c index 45b975dee587..198493a6eb6a 100644 --- a/drivers/gpu/drm/panel/panel-osd-osd101t2587-53ts.c +++ b/drivers/gpu/drm/panel/panel-osd-osd101t2587-53ts.c @@ -184,7 +184,7 @@ static int osd101t2587_panel_probe(struct mipi_dsi_device *dsi) dsi->mode_flags = MIPI_DSI_MODE_VIDEO | MIPI_DSI_MODE_VIDEO_BURST | MIPI_DSI_MODE_VIDEO_SYNC_PULSE | - MIPI_DSI_MODE_EOT_PACKET; + MIPI_DSI_MODE_NO_EOT_PACKET; osd101t2587 = devm_kzalloc(&dsi->dev, sizeof(*osd101t2587), GFP_KERNEL); if (!osd101t2587) diff --git a/drivers/gpu/drm/panel/panel-samsung-atna33xc20.c b/drivers/gpu/drm/panel/panel-samsung-atna33xc20.c new file mode 100644 index 000000000000..221db6512859 --- /dev/null +++ b/drivers/gpu/drm/panel/panel-samsung-atna33xc20.c @@ -0,0 +1,366 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright 2021 Google Inc. + * + * Panel driver for the Samsung ATNA33XC20 panel. This panel can't be handled + * by the DRM_PANEL_SIMPLE driver because its power sequencing is non-standard. + */ + +#include <linux/backlight.h> +#include <linux/delay.h> +#include <linux/gpio/consumer.h> +#include <linux/iopoll.h> +#include <linux/module.h> +#include <linux/pm_runtime.h> +#include <linux/regulator/consumer.h> + +#include <drm/drm_dp_aux_bus.h> +#include <drm/drm_dp_helper.h> +#include <drm/drm_edid.h> +#include <drm/drm_panel.h> + +struct atana33xc20_panel { + struct drm_panel base; + bool prepared; + bool enabled; + bool el3_was_on; + + bool no_hpd; + struct gpio_desc *hpd_gpio; + + struct regulator *supply; + struct gpio_desc *el_on3_gpio; + + struct edid *edid; + + ktime_t powered_off_time; + ktime_t powered_on_time; + ktime_t el_on3_off_time; +}; + +static inline struct atana33xc20_panel *to_atana33xc20(struct drm_panel *panel) +{ + return container_of(panel, struct atana33xc20_panel, base); +} + +static void atana33xc20_wait(ktime_t start_ktime, unsigned int min_ms) +{ + ktime_t now_ktime, min_ktime; + + min_ktime = ktime_add(start_ktime, ms_to_ktime(min_ms)); + now_ktime = ktime_get(); + + if (ktime_before(now_ktime, min_ktime)) + msleep(ktime_to_ms(ktime_sub(min_ktime, now_ktime)) + 1); +} + +static int atana33xc20_suspend(struct device *dev) +{ + struct atana33xc20_panel *p = dev_get_drvdata(dev); + int ret; + + /* + * Note 3 (Example of power off sequence in detail) in spec + * specifies to wait 150 ms after deasserting EL3_ON before + * powering off. + */ + if (p->el3_was_on) + atana33xc20_wait(p->el_on3_off_time, 150); + + ret = regulator_disable(p->supply); + if (ret) + return ret; + p->powered_off_time = ktime_get(); + p->el3_was_on = false; + + return 0; +} + +static int atana33xc20_resume(struct device *dev) +{ + struct atana33xc20_panel *p = dev_get_drvdata(dev); + bool hpd_asserted = false; + int ret; + + /* T12 (Power off time) is min 500 ms */ + atana33xc20_wait(p->powered_off_time, 500); + + ret = regulator_enable(p->supply); + if (ret) + return ret; + p->powered_on_time = ktime_get(); + + /* + * Handle HPD. Note: if HPD is hooked up to a dedicated pin on the + * eDP controller then "no_hpd" will be false _and_ "hpd_gpio" will be + * NULL. It's up to the controller driver to wait for HPD after + * preparing the panel in that case. + */ + if (p->no_hpd) { + /* T3 VCC to HPD high is max 200 ms */ + msleep(200); + } else if (p->hpd_gpio) { + ret = readx_poll_timeout(gpiod_get_value_cansleep, p->hpd_gpio, + hpd_asserted, hpd_asserted, + 1000, 200000); + if (!hpd_asserted) + dev_warn(dev, "Timeout waiting for HPD\n"); + } + + return 0; +} + +static int atana33xc20_disable(struct drm_panel *panel) +{ + struct atana33xc20_panel *p = to_atana33xc20(panel); + + /* Disabling when already disabled is a no-op */ + if (!p->enabled) + return 0; + + gpiod_set_value_cansleep(p->el_on3_gpio, 0); + p->el_on3_off_time = ktime_get(); + p->enabled = false; + + /* + * Keep track of the fact that EL_ON3 was on but we haven't power + * cycled yet. This lets us know that "el_on3_off_time" is recent (we + * don't need to worry about ktime wraparounds) and also makes it + * obvious if we try to enable again without a power cycle (see the + * warning in atana33xc20_enable()). + */ + p->el3_was_on = true; + + /* + * Sleeping 20 ms here (after setting the GPIO) avoids a glitch when + * powering off. + */ + msleep(20); + + return 0; +} + +static int atana33xc20_enable(struct drm_panel *panel) +{ + struct atana33xc20_panel *p = to_atana33xc20(panel); + + /* Enabling when already enabled is a no-op */ + if (p->enabled) + return 0; + + /* + * Once EL_ON3 drops we absolutely need a power cycle before the next + * enable or the backlight will never come on again. The code ensures + * this because disable() is _always_ followed by unprepare() and + * unprepare() forces a suspend with pm_runtime_put_sync_suspend(), + * but let's track just to make sure since the requirement is so + * non-obvious. + */ + if (WARN_ON(p->el3_was_on)) + return -EIO; + + /* + * Note 2 (Example of power on sequence in detail) in spec specifies + * to wait 400 ms after powering on before asserting EL3_on. + */ + atana33xc20_wait(p->powered_on_time, 400); + + gpiod_set_value_cansleep(p->el_on3_gpio, 1); + p->enabled = true; + + return 0; +} + +static int atana33xc20_unprepare(struct drm_panel *panel) +{ + struct atana33xc20_panel *p = to_atana33xc20(panel); + int ret; + + /* Unpreparing when already unprepared is a no-op */ + if (!p->prepared) + return 0; + + /* + * Purposely do a put_sync, don't use autosuspend. The panel's tcon + * seems to sometimes crash when you stop giving it data and this is + * the best way to ensure it will come back. + * + * NOTE: we still want autosuspend for cases where we only turn on + * to get the EDID or otherwise send DP AUX commands to the panel. + */ + ret = pm_runtime_put_sync_suspend(panel->dev); + if (ret < 0) + return ret; + p->prepared = false; + + return 0; +} + +static int atana33xc20_prepare(struct drm_panel *panel) +{ + struct atana33xc20_panel *p = to_atana33xc20(panel); + int ret; + + /* Preparing when already prepared is a no-op */ + if (p->prepared) + return 0; + + ret = pm_runtime_get_sync(panel->dev); + if (ret < 0) { + pm_runtime_put_autosuspend(panel->dev); + return ret; + } + p->prepared = true; + + return 0; +} + +static int atana33xc20_get_modes(struct drm_panel *panel, + struct drm_connector *connector) +{ + struct atana33xc20_panel *p = to_atana33xc20(panel); + struct dp_aux_ep_device *aux_ep = to_dp_aux_ep_dev(panel->dev); + int num = 0; + + pm_runtime_get_sync(panel->dev); + + if (!p->edid) + p->edid = drm_get_edid(connector, &aux_ep->aux->ddc); + num = drm_add_edid_modes(connector, p->edid); + + pm_runtime_mark_last_busy(panel->dev); + pm_runtime_put_autosuspend(panel->dev); + + return num; +} + +static const struct drm_panel_funcs atana33xc20_funcs = { + .disable = atana33xc20_disable, + .enable = atana33xc20_enable, + .unprepare = atana33xc20_unprepare, + .prepare = atana33xc20_prepare, + .get_modes = atana33xc20_get_modes, +}; + +static void atana33xc20_runtime_disable(void *data) +{ + pm_runtime_disable(data); +} + +static void atana33xc20_dont_use_autosuspend(void *data) +{ + pm_runtime_dont_use_autosuspend(data); +} + +static int atana33xc20_probe(struct dp_aux_ep_device *aux_ep) +{ + struct atana33xc20_panel *panel; + struct device *dev = &aux_ep->dev; + int ret; + + panel = devm_kzalloc(dev, sizeof(*panel), GFP_KERNEL); + if (!panel) + return -ENOMEM; + dev_set_drvdata(dev, panel); + + panel->supply = devm_regulator_get(dev, "power"); + if (IS_ERR(panel->supply)) + return dev_err_probe(dev, PTR_ERR(panel->supply), + "Failed to get power supply\n"); + + panel->el_on3_gpio = devm_gpiod_get(dev, "enable", GPIOD_OUT_LOW); + if (IS_ERR(panel->el_on3_gpio)) + return dev_err_probe(dev, PTR_ERR(panel->el_on3_gpio), + "Failed to get enable GPIO\n"); + + panel->no_hpd = of_property_read_bool(dev->of_node, "no-hpd"); + if (!panel->no_hpd) { + panel->hpd_gpio = devm_gpiod_get_optional(dev, "hpd", GPIOD_IN); + if (IS_ERR(panel->hpd_gpio)) + return dev_err_probe(dev, PTR_ERR(panel->hpd_gpio), + "Failed to get HPD GPIO\n"); + } + + pm_runtime_enable(dev); + ret = devm_add_action_or_reset(dev, atana33xc20_runtime_disable, dev); + if (ret) + return ret; + pm_runtime_set_autosuspend_delay(dev, 1000); + pm_runtime_use_autosuspend(dev); + ret = devm_add_action_or_reset(dev, atana33xc20_dont_use_autosuspend, dev); + if (ret) + return ret; + + drm_panel_init(&panel->base, dev, &atana33xc20_funcs, DRM_MODE_CONNECTOR_eDP); + + pm_runtime_get_sync(dev); + ret = drm_panel_dp_aux_backlight(&panel->base, aux_ep->aux); + pm_runtime_mark_last_busy(dev); + pm_runtime_put_autosuspend(dev); + if (ret) + return dev_err_probe(dev, ret, + "failed to register dp aux backlight\n"); + + drm_panel_add(&panel->base); + + return 0; +} + +static void atana33xc20_remove(struct dp_aux_ep_device *aux_ep) +{ + struct device *dev = &aux_ep->dev; + struct atana33xc20_panel *panel = dev_get_drvdata(dev); + + drm_panel_remove(&panel->base); + drm_panel_disable(&panel->base); + drm_panel_unprepare(&panel->base); + + kfree(panel->edid); +} + +static void atana33xc20_shutdown(struct dp_aux_ep_device *aux_ep) +{ + struct device *dev = &aux_ep->dev; + struct atana33xc20_panel *panel = dev_get_drvdata(dev); + + drm_panel_disable(&panel->base); + drm_panel_unprepare(&panel->base); +} + +static const struct of_device_id atana33xc20_dt_match[] = { + { .compatible = "samsung,atna33xc20", }, + { /* sentinal */ } +}; +MODULE_DEVICE_TABLE(of, atana33xc20_dt_match); + +static const struct dev_pm_ops atana33xc20_pm_ops = { + SET_RUNTIME_PM_OPS(atana33xc20_suspend, atana33xc20_resume, NULL) + SET_SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend, + pm_runtime_force_resume) +}; + +static struct dp_aux_ep_driver atana33xc20_driver = { + .driver = { + .name = "samsung_atana33xc20", + .of_match_table = atana33xc20_dt_match, + .pm = &atana33xc20_pm_ops, + }, + .probe = atana33xc20_probe, + .remove = atana33xc20_remove, + .shutdown = atana33xc20_shutdown, +}; + +static int __init atana33xc20_init(void) +{ + return dp_aux_dp_driver_register(&atana33xc20_driver); +} +module_init(atana33xc20_init); + +static void __exit atana33xc20_exit(void) +{ + dp_aux_dp_driver_unregister(&atana33xc20_driver); +} +module_exit(atana33xc20_exit); + +MODULE_DESCRIPTION("Samsung ATANA33XC20 Panel Driver"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/gpu/drm/panel/panel-samsung-db7430.c b/drivers/gpu/drm/panel/panel-samsung-db7430.c new file mode 100644 index 000000000000..ead479719f00 --- /dev/null +++ b/drivers/gpu/drm/panel/panel-samsung-db7430.c @@ -0,0 +1,347 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Panel driver for the Samsung LMS397KF04 480x800 DPI RGB panel. + * According to the data sheet the display controller is called DB7430. + * Found in the Samsung Galaxy Beam GT-I8350 mobile phone. + * Linus Walleij <linus.walleij@linaro.org> + */ +#include <drm/drm_mipi_dbi.h> +#include <drm/drm_modes.h> +#include <drm/drm_panel.h> + +#include <linux/delay.h> +#include <linux/gpio/consumer.h> +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/media-bus-format.h> +#include <linux/module.h> +#include <linux/of.h> +#include <linux/regulator/consumer.h> +#include <linux/spi/spi.h> + +#include <video/mipi_display.h> + +#define DB7430_ACCESS_PROT_OFF 0xb0 +#define DB7430_UNKNOWN_B4 0xb4 +#define DB7430_USER_SELECT 0xb5 +#define DB7430_UNKNOWN_B7 0xb7 +#define DB7430_UNKNOWN_B8 0xb8 +#define DB7430_PANEL_DRIVING 0xc0 +#define DB7430_SOURCE_CONTROL 0xc1 +#define DB7430_GATE_INTERFACE 0xc4 +#define DB7430_DISPLAY_H_TIMING 0xc5 +#define DB7430_RGB_SYNC_OPTION 0xc6 +#define DB7430_GAMMA_SET_RED 0xc8 +#define DB7430_GAMMA_SET_GREEN 0xc9 +#define DB7430_GAMMA_SET_BLUE 0xca +#define DB7430_BIAS_CURRENT_CTRL 0xd1 +#define DB7430_DDV_CTRL 0xd2 +#define DB7430_GAMMA_CTRL_REF 0xd3 +#define DB7430_UNKNOWN_D4 0xd4 +#define DB7430_DCDC_CTRL 0xd5 +#define DB7430_VCL_CTRL 0xd6 +#define DB7430_UNKNOWN_F8 0xf8 +#define DB7430_UNKNOWN_FC 0xfc + +#define DATA_MASK 0x100 + +/** + * struct db7430 - state container for a panel controlled by the DB7430 + * controller + */ +struct db7430 { + /** @dev: the container device */ + struct device *dev; + /** @dbi: the DBI bus abstraction handle */ + struct mipi_dbi dbi; + /** @panel: the DRM panel instance for this device */ + struct drm_panel panel; + /** @width: the width of this panel in mm */ + u32 width; + /** @height: the height of this panel in mm */ + u32 height; + /** @reset: reset GPIO line */ + struct gpio_desc *reset; + /** @regulators: VCCIO and VIO supply regulators */ + struct regulator_bulk_data regulators[2]; +}; + +static const struct drm_display_mode db7430_480_800_mode = { + /* + * 31 ns period min (htotal*vtotal*vrefresh)/1000 + * gives a Vrefresh of ~71 Hz. + */ + .clock = 32258, + .hdisplay = 480, + .hsync_start = 480 + 10, + .hsync_end = 480 + 10 + 4, + .htotal = 480 + 10 + 4 + 40, + .vdisplay = 800, + .vsync_start = 800 + 6, + .vsync_end = 800 + 6 + 1, + .vtotal = 800 + 6 + 1 + 7, + .width_mm = 53, + .height_mm = 87, + .flags = DRM_MODE_FLAG_NVSYNC | DRM_MODE_FLAG_NHSYNC, +}; + +static inline struct db7430 *to_db7430(struct drm_panel *panel) +{ + return container_of(panel, struct db7430, panel); +} + +static int db7430_power_on(struct db7430 *db) +{ + struct mipi_dbi *dbi = &db->dbi; + int ret; + + /* Power up */ + ret = regulator_bulk_enable(ARRAY_SIZE(db->regulators), + db->regulators); + if (ret) { + dev_err(db->dev, "failed to enable regulators: %d\n", ret); + return ret; + } + msleep(50); + + /* Assert reset >=1 ms */ + gpiod_set_value_cansleep(db->reset, 1); + usleep_range(1000, 5000); + /* De-assert reset */ + gpiod_set_value_cansleep(db->reset, 0); + /* Wait >= 10 ms */ + msleep(10); + dev_dbg(db->dev, "de-asserted RESET\n"); + + /* + * This is set to 0x0a (RGB/BGR order + horizontal flip) in order + * to make the display behave normally. If this is not set the displays + * normal output behaviour is horizontally flipped and BGR ordered. Do + * it twice because the first message doesn't always "take". + */ + mipi_dbi_command(dbi, MIPI_DCS_SET_ADDRESS_MODE, 0x0a); + mipi_dbi_command(dbi, MIPI_DCS_SET_ADDRESS_MODE, 0x0a); + mipi_dbi_command(dbi, DB7430_ACCESS_PROT_OFF, 0x00); + mipi_dbi_command(dbi, DB7430_PANEL_DRIVING, 0x28, 0x08); + mipi_dbi_command(dbi, DB7430_SOURCE_CONTROL, + 0x01, 0x30, 0x15, 0x05, 0x22); + mipi_dbi_command(dbi, DB7430_GATE_INTERFACE, + 0x10, 0x01, 0x00); + mipi_dbi_command(dbi, DB7430_DISPLAY_H_TIMING, + 0x06, 0x55, 0x03, 0x07, 0x0b, + 0x33, 0x00, 0x01, 0x03); + /* + * 0x00 in datasheet 0x01 in vendor code 0x00, it seems 0x01 means + * DE active high and 0x00 means DE active low. + */ + mipi_dbi_command(dbi, DB7430_RGB_SYNC_OPTION, 0x01); + mipi_dbi_command(dbi, DB7430_GAMMA_SET_RED, + /* R positive gamma */ 0x00, + 0x0A, 0x31, 0x3B, 0x4E, 0x58, 0x59, 0x5B, 0x58, 0x5E, 0x62, + 0x60, 0x61, 0x5E, 0x62, 0x55, 0x55, 0x7F, 0x08, + /* R negative gamma */ 0x00, + 0x0A, 0x31, 0x3B, 0x4E, 0x58, 0x59, 0x5B, 0x58, 0x5E, 0x62, + 0x60, 0x61, 0x5E, 0x62, 0x55, 0x55, 0x7F, 0x08); + mipi_dbi_command(dbi, DB7430_GAMMA_SET_GREEN, + /* G positive gamma */ 0x00, + 0x25, 0x15, 0x28, 0x3D, 0x4A, 0x48, 0x4C, 0x4A, 0x52, 0x59, + 0x59, 0x5B, 0x56, 0x60, 0x5D, 0x55, 0x7F, 0x0A, + /* G negative gamma */ 0x00, + 0x25, 0x15, 0x28, 0x3D, 0x4A, 0x48, 0x4C, 0x4A, 0x52, 0x59, + 0x59, 0x5B, 0x56, 0x60, 0x5D, 0x55, 0x7F, 0x0A); + mipi_dbi_command(dbi, DB7430_GAMMA_SET_BLUE, + /* B positive gamma */ 0x00, + 0x48, 0x10, 0x1F, 0x2F, 0x35, 0x38, 0x3D, 0x3C, 0x45, 0x4D, + 0x4E, 0x52, 0x51, 0x60, 0x7F, 0x7E, 0x7F, 0x0C, + /* B negative gamma */ 0x00, + 0x48, 0x10, 0x1F, 0x2F, 0x35, 0x38, 0x3D, 0x3C, 0x45, 0x4D, + 0x4E, 0x52, 0x51, 0x60, 0x7F, 0x7E, 0x7F, 0x0C); + mipi_dbi_command(dbi, DB7430_BIAS_CURRENT_CTRL, 0x33, 0x13); + mipi_dbi_command(dbi, DB7430_DDV_CTRL, 0x11, 0x00, 0x00); + mipi_dbi_command(dbi, DB7430_GAMMA_CTRL_REF, 0x50, 0x50); + mipi_dbi_command(dbi, DB7430_DCDC_CTRL, 0x2f, 0x11, 0x1e, 0x46); + mipi_dbi_command(dbi, DB7430_VCL_CTRL, 0x11, 0x0a); + + return 0; +} + +static int db7430_power_off(struct db7430 *db) +{ + /* Go into RESET and disable regulators */ + gpiod_set_value_cansleep(db->reset, 1); + return regulator_bulk_disable(ARRAY_SIZE(db->regulators), + db->regulators); +} + +static int db7430_unprepare(struct drm_panel *panel) +{ + return db7430_power_off(to_db7430(panel)); +} + +static int db7430_disable(struct drm_panel *panel) +{ + struct db7430 *db = to_db7430(panel); + struct mipi_dbi *dbi = &db->dbi; + + mipi_dbi_command(dbi, MIPI_DCS_SET_DISPLAY_OFF); + msleep(25); + mipi_dbi_command(dbi, MIPI_DCS_ENTER_SLEEP_MODE); + msleep(120); + + return 0; +} + +static int db7430_prepare(struct drm_panel *panel) +{ + return db7430_power_on(to_db7430(panel)); +} + +static int db7430_enable(struct drm_panel *panel) +{ + struct db7430 *db = to_db7430(panel); + struct mipi_dbi *dbi = &db->dbi; + + /* Exit sleep mode */ + mipi_dbi_command(dbi, MIPI_DCS_EXIT_SLEEP_MODE); + msleep(20); + + /* NVM (non-volatile memory) load sequence */ + mipi_dbi_command(dbi, DB7430_UNKNOWN_D4, 0x52, 0x5e); + mipi_dbi_command(dbi, DB7430_UNKNOWN_F8, 0x01, 0xf5, 0xf2, 0x71, 0x44); + mipi_dbi_command(dbi, DB7430_UNKNOWN_FC, 0x00, 0x08); + msleep(150); + + /* CABC turn on sequence (BC = backlight control) */ + mipi_dbi_command(dbi, DB7430_UNKNOWN_B4, 0x0f, 0x00, 0x50); + mipi_dbi_command(dbi, DB7430_USER_SELECT, 0x80); + mipi_dbi_command(dbi, DB7430_UNKNOWN_B7, 0x24); + mipi_dbi_command(dbi, DB7430_UNKNOWN_B8, 0x01); + + /* Turn on display */ + mipi_dbi_command(dbi, MIPI_DCS_SET_DISPLAY_ON); + + return 0; +} + +/** + * db7430_get_modes() - return the mode + * @panel: the panel to get the mode for + * @connector: reference to the central DRM connector control structure + */ +static int db7430_get_modes(struct drm_panel *panel, + struct drm_connector *connector) +{ + struct db7430 *db = to_db7430(panel); + struct drm_display_mode *mode; + static const u32 bus_format = MEDIA_BUS_FMT_RGB888_1X24; + + mode = drm_mode_duplicate(connector->dev, &db7430_480_800_mode); + if (!mode) { + dev_err(db->dev, "failed to add mode\n"); + return -ENOMEM; + } + + connector->display_info.bpc = 8; + connector->display_info.width_mm = mode->width_mm; + connector->display_info.height_mm = mode->height_mm; + connector->display_info.bus_flags = + DRM_BUS_FLAG_PIXDATA_DRIVE_NEGEDGE; + drm_display_info_set_bus_formats(&connector->display_info, + &bus_format, 1); + + drm_mode_set_name(mode); + mode->type = DRM_MODE_TYPE_DRIVER | DRM_MODE_TYPE_PREFERRED; + + drm_mode_probed_add(connector, mode); + + return 1; +} + +static const struct drm_panel_funcs db7430_drm_funcs = { + .disable = db7430_disable, + .unprepare = db7430_unprepare, + .prepare = db7430_prepare, + .enable = db7430_enable, + .get_modes = db7430_get_modes, +}; + +static int db7430_probe(struct spi_device *spi) +{ + struct device *dev = &spi->dev; + struct db7430 *db; + int ret; + + db = devm_kzalloc(dev, sizeof(*db), GFP_KERNEL); + if (!db) + return -ENOMEM; + db->dev = dev; + + /* + * VCI is the analog voltage supply + * VCCIO is the digital I/O voltage supply + */ + db->regulators[0].supply = "vci"; + db->regulators[1].supply = "vccio"; + ret = devm_regulator_bulk_get(dev, + ARRAY_SIZE(db->regulators), + db->regulators); + if (ret) + return dev_err_probe(dev, ret, "failed to get regulators\n"); + + db->reset = devm_gpiod_get(dev, "reset", GPIOD_OUT_HIGH); + if (IS_ERR(db->reset)) { + ret = PTR_ERR(db->reset); + return dev_err_probe(dev, ret, "no RESET GPIO\n"); + } + + ret = mipi_dbi_spi_init(spi, &db->dbi, NULL); + if (ret) + return dev_err_probe(dev, ret, "MIPI DBI init failed\n"); + + drm_panel_init(&db->panel, dev, &db7430_drm_funcs, + DRM_MODE_CONNECTOR_DPI); + + /* FIXME: if no external backlight, use internal backlight */ + ret = drm_panel_of_backlight(&db->panel); + if (ret) + return dev_err_probe(dev, ret, "failed to add backlight\n"); + + spi_set_drvdata(spi, db); + + drm_panel_add(&db->panel); + dev_dbg(dev, "added panel\n"); + + return 0; +} + +static int db7430_remove(struct spi_device *spi) +{ + struct db7430 *db = spi_get_drvdata(spi); + + drm_panel_remove(&db->panel); + return 0; +} + +/* + * The DB7430 display controller may be used in several display products, + * so list the different variants here and add per-variant data if needed. + */ +static const struct of_device_id db7430_match[] = { + { .compatible = "samsung,lms397kf04", }, + {}, +}; +MODULE_DEVICE_TABLE(of, db7430_match); + +static struct spi_driver db7430_driver = { + .probe = db7430_probe, + .remove = db7430_remove, + .driver = { + .name = "db7430-panel", + .of_match_table = db7430_match, + }, +}; +module_spi_driver(db7430_driver); + +MODULE_AUTHOR("Linus Walleij <linus.walleij@linaro.org>"); +MODULE_DESCRIPTION("Samsung DB7430 panel driver"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/gpu/drm/panel/panel-samsung-s6e63j0x03.c b/drivers/gpu/drm/panel/panel-samsung-s6e63j0x03.c index b962c817fb30..ccc8ed6fe3ae 100644 --- a/drivers/gpu/drm/panel/panel-samsung-s6e63j0x03.c +++ b/drivers/gpu/drm/panel/panel-samsung-s6e63j0x03.c @@ -446,7 +446,7 @@ static int s6e63j0x03_probe(struct mipi_dsi_device *dsi) dsi->lanes = 1; dsi->format = MIPI_DSI_FMT_RGB888; - dsi->mode_flags = MIPI_DSI_MODE_EOT_PACKET; + dsi->mode_flags = MIPI_DSI_MODE_NO_EOT_PACKET; ctx->supplies[0].supply = "vdd3"; ctx->supplies[1].supply = "vci"; diff --git a/drivers/gpu/drm/panel/panel-samsung-s6e63m0-dsi.c b/drivers/gpu/drm/panel/panel-samsung-s6e63m0-dsi.c index 07a48f621289..e0b1a7e354f3 100644 --- a/drivers/gpu/drm/panel/panel-samsung-s6e63m0-dsi.c +++ b/drivers/gpu/drm/panel/panel-samsung-s6e63m0-dsi.c @@ -16,7 +16,8 @@ #define MCS_GLOBAL_PARAM 0xb0 #define S6E63M0_DSI_MAX_CHUNK 15 /* CMD + 15 bytes max */ -static int s6e63m0_dsi_dcs_read(struct device *dev, const u8 cmd, u8 *data) +static int s6e63m0_dsi_dcs_read(struct device *dev, void *trsp, + const u8 cmd, u8 *data) { struct mipi_dsi_device *dsi = to_mipi_dsi_device(dev); int ret; @@ -32,7 +33,8 @@ static int s6e63m0_dsi_dcs_read(struct device *dev, const u8 cmd, u8 *data) return 0; } -static int s6e63m0_dsi_dcs_write(struct device *dev, const u8 *data, size_t len) +static int s6e63m0_dsi_dcs_write(struct device *dev, void *trsp, + const u8 *data, size_t len) { struct mipi_dsi_device *dsi = to_mipi_dsi_device(dev); const u8 *seqp = data; @@ -99,8 +101,8 @@ static int s6e63m0_dsi_probe(struct mipi_dsi_device *dsi) dsi->mode_flags = MIPI_DSI_MODE_VIDEO | MIPI_DSI_MODE_VIDEO_BURST; - ret = s6e63m0_probe(dev, s6e63m0_dsi_dcs_read, s6e63m0_dsi_dcs_write, - true); + ret = s6e63m0_probe(dev, NULL, s6e63m0_dsi_dcs_read, + s6e63m0_dsi_dcs_write, true); if (ret) return ret; diff --git a/drivers/gpu/drm/panel/panel-samsung-s6e63m0-spi.c b/drivers/gpu/drm/panel/panel-samsung-s6e63m0-spi.c index 326deb3177b6..3669cc3719ce 100644 --- a/drivers/gpu/drm/panel/panel-samsung-s6e63m0-spi.c +++ b/drivers/gpu/drm/panel/panel-samsung-s6e63m0-spi.c @@ -5,62 +5,38 @@ #include <linux/spi/spi.h> #include <linux/delay.h> +#include <drm/drm_mipi_dbi.h> #include <drm/drm_print.h> #include "panel-samsung-s6e63m0.h" -#define DATA_MASK 0x100 +static const u8 s6e63m0_dbi_read_commands[] = { + MCS_READ_ID1, + MCS_READ_ID2, + MCS_READ_ID3, + 0, /* sentinel */ +}; -static int s6e63m0_spi_dcs_read(struct device *dev, const u8 cmd, u8 *data) +static int s6e63m0_spi_dcs_read(struct device *dev, void *trsp, + const u8 cmd, u8 *data) { - struct spi_device *spi = to_spi_device(dev); - u16 buf[1]; - u16 rbuf[1]; + struct mipi_dbi *dbi = trsp; int ret; - /* SPI buffers are always in CPU order */ - buf[0] = (u16)cmd; - ret = spi_write_then_read(spi, buf, 2, rbuf, 2); - dev_dbg(dev, "READ CMD: %04x RET: %04x\n", buf[0], rbuf[0]); - if (!ret) - /* These high 8 bits of the 9 contains the readout */ - *data = (rbuf[0] & 0x1ff) >> 1; + ret = mipi_dbi_command_read(dbi, cmd, data); + if (ret) + dev_err(dev, "error on DBI read command %02x\n", cmd); return ret; } -static int s6e63m0_spi_write_word(struct device *dev, u16 data) -{ - struct spi_device *spi = to_spi_device(dev); - - /* SPI buffers are always in CPU order */ - return spi_write(spi, &data, 2); -} - -static int s6e63m0_spi_dcs_write(struct device *dev, const u8 *data, size_t len) +static int s6e63m0_spi_dcs_write(struct device *dev, void *trsp, + const u8 *data, size_t len) { - int ret = 0; - - dev_dbg(dev, "SPI writing dcs seq: %*ph\n", (int)len, data); - - /* - * This sends 9 bits with the first bit (bit 8) set to 0 - * This indicates that this is a command. Anything after the - * command is data. - */ - ret = s6e63m0_spi_write_word(dev, *data); - - while (!ret && --len) { - ++data; - /* This sends 9 bits with the first bit (bit 8) set to 1 */ - ret = s6e63m0_spi_write_word(dev, *data | DATA_MASK); - } - - if (ret) { - dev_err(dev, "SPI error %d writing dcs seq: %*ph\n", ret, - (int)len, data); - } + struct mipi_dbi *dbi = trsp; + int ret; + ret = mipi_dbi_command_stackbuf(dbi, data[0], (data + 1), (len - 1)); usleep_range(300, 310); return ret; @@ -69,18 +45,21 @@ static int s6e63m0_spi_dcs_write(struct device *dev, const u8 *data, size_t len) static int s6e63m0_spi_probe(struct spi_device *spi) { struct device *dev = &spi->dev; + struct mipi_dbi *dbi; int ret; - spi->bits_per_word = 9; - /* Preserve e.g. SPI_3WIRE setting */ - spi->mode |= SPI_MODE_3; - ret = spi_setup(spi); - if (ret < 0) { - dev_err(dev, "spi setup failed.\n"); - return ret; - } - return s6e63m0_probe(dev, s6e63m0_spi_dcs_read, s6e63m0_spi_dcs_write, - false); + dbi = devm_kzalloc(dev, sizeof(*dbi), GFP_KERNEL); + if (!dbi) + return -ENOMEM; + + ret = mipi_dbi_spi_init(spi, dbi, NULL); + if (ret) + return dev_err_probe(dev, ret, "MIPI DBI init failed\n"); + /* Register our custom MCS read commands */ + dbi->read_commands = s6e63m0_dbi_read_commands; + + return s6e63m0_probe(dev, dbi, s6e63m0_spi_dcs_read, + s6e63m0_spi_dcs_write, false); } static int s6e63m0_spi_remove(struct spi_device *spi) diff --git a/drivers/gpu/drm/panel/panel-samsung-s6e63m0.c b/drivers/gpu/drm/panel/panel-samsung-s6e63m0.c index 603c5dfe8768..35d72ac663d6 100644 --- a/drivers/gpu/drm/panel/panel-samsung-s6e63m0.c +++ b/drivers/gpu/drm/panel/panel-samsung-s6e63m0.c @@ -22,31 +22,6 @@ #include "panel-samsung-s6e63m0.h" -/* Manufacturer Command Set */ -#define MCS_ELVSS_ON 0xb1 -#define MCS_TEMP_SWIRE 0xb2 -#define MCS_PENTILE_1 0xb3 -#define MCS_PENTILE_2 0xb4 -#define MCS_GAMMA_DELTA_Y_RED 0xb5 -#define MCS_GAMMA_DELTA_X_RED 0xb6 -#define MCS_GAMMA_DELTA_Y_GREEN 0xb7 -#define MCS_GAMMA_DELTA_X_GREEN 0xb8 -#define MCS_GAMMA_DELTA_Y_BLUE 0xb9 -#define MCS_GAMMA_DELTA_X_BLUE 0xba -#define MCS_MIECTL1 0xc0 -#define MCS_BCMODE 0xc1 -#define MCS_ERROR_CHECK 0xd5 -#define MCS_READ_ID1 0xda -#define MCS_READ_ID2 0xdb -#define MCS_READ_ID3 0xdc -#define MCS_LEVEL_2_KEY 0xf0 -#define MCS_MTP_KEY 0xf1 -#define MCS_DISCTL 0xf2 -#define MCS_SRCCTL 0xf6 -#define MCS_IFCTL 0xf7 -#define MCS_PANELCTL 0xf8 -#define MCS_PGAMMACTL 0xfa - #define S6E63M0_LCD_ID_VALUE_M2 0xA4 #define S6E63M0_LCD_ID_VALUE_SM2 0xB4 #define S6E63M0_LCD_ID_VALUE_SM2_1 0xB6 @@ -283,8 +258,9 @@ static u8 const s6e63m0_elvss_per_gamma[NUM_GAMMA_LEVELS] = { struct s6e63m0 { struct device *dev; - int (*dcs_read)(struct device *dev, const u8 cmd, u8 *val); - int (*dcs_write)(struct device *dev, const u8 *data, size_t len); + void *transport_data; + int (*dcs_read)(struct device *dev, void *trsp, const u8 cmd, u8 *val); + int (*dcs_write)(struct device *dev, void *trsp, const u8 *data, size_t len); struct drm_panel panel; struct backlight_device *bl_dev; u8 lcd_type; @@ -340,7 +316,7 @@ static void s6e63m0_dcs_read(struct s6e63m0 *ctx, const u8 cmd, u8 *data) if (ctx->error < 0) return; - ctx->error = ctx->dcs_read(ctx->dev, cmd, data); + ctx->error = ctx->dcs_read(ctx->dev, ctx->transport_data, cmd, data); } static void s6e63m0_dcs_write(struct s6e63m0 *ctx, const u8 *data, size_t len) @@ -348,7 +324,7 @@ static void s6e63m0_dcs_write(struct s6e63m0 *ctx, const u8 *data, size_t len) if (ctx->error < 0 || len == 0) return; - ctx->error = ctx->dcs_write(ctx->dev, data, len); + ctx->error = ctx->dcs_write(ctx->dev, ctx->transport_data, data, len); } #define s6e63m0_dcs_write_seq_static(ctx, seq ...) \ @@ -713,9 +689,9 @@ static int s6e63m0_backlight_register(struct s6e63m0 *ctx, u32 max_brightness) return ret; } -int s6e63m0_probe(struct device *dev, - int (*dcs_read)(struct device *dev, const u8 cmd, u8 *val), - int (*dcs_write)(struct device *dev, const u8 *data, size_t len), +int s6e63m0_probe(struct device *dev, void *trsp, + int (*dcs_read)(struct device *dev, void *trsp, const u8 cmd, u8 *val), + int (*dcs_write)(struct device *dev, void *trsp, const u8 *data, size_t len), bool dsi_mode) { struct s6e63m0 *ctx; @@ -726,6 +702,7 @@ int s6e63m0_probe(struct device *dev, if (!ctx) return -ENOMEM; + ctx->transport_data = trsp; ctx->dsi_mode = dsi_mode; ctx->dcs_read = dcs_read; ctx->dcs_write = dcs_write; diff --git a/drivers/gpu/drm/panel/panel-samsung-s6e63m0.h b/drivers/gpu/drm/panel/panel-samsung-s6e63m0.h index c669fec91763..306605ed1117 100644 --- a/drivers/gpu/drm/panel/panel-samsung-s6e63m0.h +++ b/drivers/gpu/drm/panel/panel-samsung-s6e63m0.h @@ -3,9 +3,36 @@ #ifndef _PANEL_SAMSUNG_S6E63M0_H #define _PANEL_SAMSUNG_S6E63M0_H -int s6e63m0_probe(struct device *dev, - int (*dcs_read)(struct device *dev, const u8 cmd, u8 *val), - int (*dcs_write)(struct device *dev, const u8 *data, +/* Manufacturer Command Set */ +#define MCS_ELVSS_ON 0xb1 +#define MCS_TEMP_SWIRE 0xb2 +#define MCS_PENTILE_1 0xb3 +#define MCS_PENTILE_2 0xb4 +#define MCS_GAMMA_DELTA_Y_RED 0xb5 +#define MCS_GAMMA_DELTA_X_RED 0xb6 +#define MCS_GAMMA_DELTA_Y_GREEN 0xb7 +#define MCS_GAMMA_DELTA_X_GREEN 0xb8 +#define MCS_GAMMA_DELTA_Y_BLUE 0xb9 +#define MCS_GAMMA_DELTA_X_BLUE 0xba +#define MCS_MIECTL1 0xc0 +#define MCS_BCMODE 0xc1 +#define MCS_ERROR_CHECK 0xd5 +#define MCS_READ_ID1 0xda +#define MCS_READ_ID2 0xdb +#define MCS_READ_ID3 0xdc +#define MCS_LEVEL_2_KEY 0xf0 +#define MCS_MTP_KEY 0xf1 +#define MCS_DISCTL 0xf2 +#define MCS_SRCCTL 0xf6 +#define MCS_IFCTL 0xf7 +#define MCS_PANELCTL 0xf8 +#define MCS_PGAMMACTL 0xfa + +int s6e63m0_probe(struct device *dev, void *trsp, + int (*dcs_read)(struct device *dev, void *trsp, + const u8 cmd, u8 *val), + int (*dcs_write)(struct device *dev, void *trsp, + const u8 *data, size_t len), bool dsi_mode); int s6e63m0_remove(struct device *dev); diff --git a/drivers/gpu/drm/panel/panel-samsung-s6e8aa0.c b/drivers/gpu/drm/panel/panel-samsung-s6e8aa0.c index 527371120266..9b3599d6d2de 100644 --- a/drivers/gpu/drm/panel/panel-samsung-s6e8aa0.c +++ b/drivers/gpu/drm/panel/panel-samsung-s6e8aa0.c @@ -990,8 +990,8 @@ static int s6e8aa0_probe(struct mipi_dsi_device *dsi) dsi->lanes = 4; dsi->format = MIPI_DSI_FMT_RGB888; dsi->mode_flags = MIPI_DSI_MODE_VIDEO | MIPI_DSI_MODE_VIDEO_BURST - | MIPI_DSI_MODE_VIDEO_HFP | MIPI_DSI_MODE_VIDEO_HBP - | MIPI_DSI_MODE_VIDEO_HSA | MIPI_DSI_MODE_EOT_PACKET + | MIPI_DSI_MODE_VIDEO_NO_HFP | MIPI_DSI_MODE_VIDEO_NO_HBP + | MIPI_DSI_MODE_VIDEO_NO_HSA | MIPI_DSI_MODE_NO_EOT_PACKET | MIPI_DSI_MODE_VSYNC_FLUSH | MIPI_DSI_MODE_VIDEO_AUTO_VERT; ret = s6e8aa0_parse_dt(ctx); diff --git a/drivers/gpu/drm/panel/panel-sharp-ls043t1le01.c b/drivers/gpu/drm/panel/panel-sharp-ls043t1le01.c index 16dbf0f353ed..b937e24dac8e 100644 --- a/drivers/gpu/drm/panel/panel-sharp-ls043t1le01.c +++ b/drivers/gpu/drm/panel/panel-sharp-ls043t1le01.c @@ -282,7 +282,7 @@ static int sharp_nt_panel_probe(struct mipi_dsi_device *dsi) dsi->mode_flags = MIPI_DSI_MODE_VIDEO | MIPI_DSI_MODE_VIDEO_HSE | MIPI_DSI_CLOCK_NON_CONTINUOUS | - MIPI_DSI_MODE_EOT_PACKET; + MIPI_DSI_MODE_NO_EOT_PACKET; sharp_nt = devm_kzalloc(&dsi->dev, sizeof(*sharp_nt), GFP_KERNEL); if (!sharp_nt) diff --git a/drivers/gpu/drm/panel/panel-simple.c b/drivers/gpu/drm/panel/panel-simple.c index 1b80290c2b53..9b6c4e6c38a1 100644 --- a/drivers/gpu/drm/panel/panel-simple.c +++ b/drivers/gpu/drm/panel/panel-simple.c @@ -36,6 +36,8 @@ #include <drm/drm_crtc.h> #include <drm/drm_device.h> +#include <drm/drm_dp_aux_bus.h> +#include <drm/drm_dp_helper.h> #include <drm/drm_mipi_dsi.h> #include <drm/drm_panel.h> @@ -185,6 +187,7 @@ struct panel_simple { struct regulator *supply; struct i2c_adapter *ddc; + struct drm_dp_aux *aux; struct gpio_desc *enable_gpio; struct gpio_desc *hpd_gpio; @@ -657,7 +660,8 @@ static void panel_simple_parse_panel_timing_node(struct device *dev, dev_err(dev, "Reject override mode: No display_timing found\n"); } -static int panel_simple_probe(struct device *dev, const struct panel_desc *desc) +static int panel_simple_probe(struct device *dev, const struct panel_desc *desc, + struct drm_dp_aux *aux) { struct panel_simple *panel; struct display_timing dt; @@ -673,6 +677,7 @@ static int panel_simple_probe(struct device *dev, const struct panel_desc *desc) panel->enabled = false; panel->prepared_time = 0; panel->desc = desc; + panel->aux = aux; panel->no_hpd = of_property_read_bool(dev->of_node, "no-hpd"); if (!panel->no_hpd) { @@ -707,6 +712,8 @@ static int panel_simple_probe(struct device *dev, const struct panel_desc *desc) if (!panel->ddc) return -EPROBE_DEFER; + } else if (aux) { + panel->ddc = &aux->ddc; } if (desc == &panel_dpi) { @@ -742,10 +749,8 @@ static int panel_simple_probe(struct device *dev, const struct panel_desc *desc) desc->bpc != 8); break; case DRM_MODE_CONNECTOR_eDP: - if (desc->bus_format == 0) - dev_warn(dev, "Specify missing bus_format\n"); - if (desc->bpc != 6 && desc->bpc != 8) - dev_warn(dev, "Expected bpc in {6,8} but got: %u\n", desc->bpc); + if (desc->bpc != 6 && desc->bpc != 8 && desc->bpc != 10) + dev_warn(dev, "Expected bpc in {6,8,10} but got: %u\n", desc->bpc); break; case DRM_MODE_CONNECTOR_DSI: if (desc->bpc != 6 && desc->bpc != 8) @@ -793,6 +798,15 @@ static int panel_simple_probe(struct device *dev, const struct panel_desc *desc) if (err) goto disable_pm_runtime; + if (!panel->base.backlight && panel->aux) { + pm_runtime_get_sync(dev); + err = drm_panel_dp_aux_backlight(&panel->base, panel->aux); + pm_runtime_mark_last_busy(dev); + pm_runtime_put_autosuspend(dev); + if (err) + goto disable_pm_runtime; + } + drm_panel_add(&panel->base); return 0; @@ -801,7 +815,7 @@ disable_pm_runtime: pm_runtime_dont_use_autosuspend(dev); pm_runtime_disable(dev); free_ddc: - if (panel->ddc) + if (panel->ddc && (!panel->aux || panel->ddc != &panel->aux->ddc)) put_device(&panel->ddc->dev); return err; @@ -817,7 +831,7 @@ static int panel_simple_remove(struct device *dev) pm_runtime_dont_use_autosuspend(dev); pm_runtime_disable(dev); - if (panel->ddc) + if (panel->ddc && (!panel->aux || panel->ddc != &panel->aux->ddc)) put_device(&panel->ddc->dev); return 0; @@ -1080,6 +1094,36 @@ static const struct panel_desc auo_b133xtn01 = { }, }; +static const struct drm_display_mode auo_b133han05_mode = { + .clock = 142600, + .hdisplay = 1920, + .hsync_start = 1920 + 58, + .hsync_end = 1920 + 58 + 42, + .htotal = 1920 + 58 + 42 + 60, + .vdisplay = 1080, + .vsync_start = 1080 + 3, + .vsync_end = 1080 + 3 + 5, + .vtotal = 1080 + 3 + 5 + 54, +}; + +static const struct panel_desc auo_b133han05 = { + .modes = &auo_b133han05_mode, + .num_modes = 1, + .bpc = 8, + .size = { + .width = 293, + .height = 165, + }, + .delay = { + .prepare = 100, + .enable = 20, + .unprepare = 50, + }, + .bus_format = MEDIA_BUS_FMT_RGB888_1X24, + .bus_flags = DRM_BUS_FLAG_DATA_MSB_TO_LSB, + .connector_type = DRM_MODE_CONNECTOR_eDP, +}; + static const struct drm_display_mode auo_b133htn01_mode = { .clock = 150660, .hdisplay = 1920, @@ -1107,6 +1151,36 @@ static const struct panel_desc auo_b133htn01 = { }, }; +static const struct drm_display_mode auo_b140han06_mode = { + .clock = 141000, + .hdisplay = 1920, + .hsync_start = 1920 + 16, + .hsync_end = 1920 + 16 + 16, + .htotal = 1920 + 16 + 16 + 152, + .vdisplay = 1080, + .vsync_start = 1080 + 3, + .vsync_end = 1080 + 3 + 14, + .vtotal = 1080 + 3 + 14 + 19, +}; + +static const struct panel_desc auo_b140han06 = { + .modes = &auo_b140han06_mode, + .num_modes = 1, + .bpc = 8, + .size = { + .width = 309, + .height = 174, + }, + .delay = { + .prepare = 100, + .enable = 20, + .unprepare = 50, + }, + .bus_format = MEDIA_BUS_FMT_RGB888_1X24, + .bus_flags = DRM_BUS_FLAG_DATA_MSB_TO_LSB, + .connector_type = DRM_MODE_CONNECTOR_eDP, +}; + static const struct display_timing auo_g070vvn01_timings = { .pixelclock = { 33300000, 34209000, 45000000 }, .hactive = { 800, 800, 800 }, @@ -1179,6 +1253,8 @@ static const struct panel_desc auo_g104sn02 = { .width = 211, .height = 158, }, + .bus_format = MEDIA_BUS_FMT_RGB888_1X7X4_SPWG, + .connector_type = DRM_MODE_CONNECTOR_LVDS, }; static const struct drm_display_mode auo_g121ean01_mode = { @@ -1929,6 +2005,32 @@ static const struct panel_desc edt_et035012dm6 = { .bus_flags = DRM_BUS_FLAG_DE_LOW | DRM_BUS_FLAG_PIXDATA_SAMPLE_POSEDGE, }; +static const struct drm_display_mode edt_etm0350g0dh6_mode = { + .clock = 6520, + .hdisplay = 320, + .hsync_start = 320 + 20, + .hsync_end = 320 + 20 + 68, + .htotal = 320 + 20 + 68, + .vdisplay = 240, + .vsync_start = 240 + 4, + .vsync_end = 240 + 4 + 18, + .vtotal = 240 + 4 + 18, + .flags = DRM_MODE_FLAG_NVSYNC | DRM_MODE_FLAG_NHSYNC, +}; + +static const struct panel_desc edt_etm0350g0dh6 = { + .modes = &edt_etm0350g0dh6_mode, + .num_modes = 1, + .bpc = 6, + .size = { + .width = 70, + .height = 53, + }, + .bus_format = MEDIA_BUS_FMT_RGB888_1X24, + .bus_flags = DRM_BUS_FLAG_DE_HIGH | DRM_BUS_FLAG_PIXDATA_DRIVE_NEGEDGE, + .connector_type = DRM_MODE_CONNECTOR_DPI, +}; + static const struct drm_display_mode edt_etm043080dh6gp_mode = { .clock = 10870, .hdisplay = 480, @@ -1980,6 +2082,9 @@ static const struct panel_desc edt_etm0430g0dh6 = { .width = 95, .height = 54, }, + .bus_format = MEDIA_BUS_FMT_RGB666_1X18, + .bus_flags = DRM_BUS_FLAG_DE_HIGH | DRM_BUS_FLAG_PIXDATA_SAMPLE_POSEDGE, + .connector_type = DRM_MODE_CONNECTOR_DPI, }; static const struct drm_display_mode edt_et057090dhu_mode = { @@ -2044,6 +2149,60 @@ static const struct panel_desc edt_etm0700g0bdh6 = { }, .bus_format = MEDIA_BUS_FMT_RGB666_1X18, .bus_flags = DRM_BUS_FLAG_DE_HIGH | DRM_BUS_FLAG_PIXDATA_DRIVE_POSEDGE, + .connector_type = DRM_MODE_CONNECTOR_DPI, +}; + +static const struct drm_display_mode edt_etmv570g2dhu_mode = { + .clock = 25175, + .hdisplay = 640, + .hsync_start = 640, + .hsync_end = 640 + 16, + .htotal = 640 + 16 + 30 + 114, + .vdisplay = 480, + .vsync_start = 480 + 10, + .vsync_end = 480 + 10 + 3, + .vtotal = 480 + 10 + 3 + 35, + .flags = DRM_MODE_FLAG_PVSYNC | DRM_MODE_FLAG_PHSYNC, +}; + +static const struct panel_desc edt_etmv570g2dhu = { + .modes = &edt_etmv570g2dhu_mode, + .num_modes = 1, + .bpc = 6, + .size = { + .width = 115, + .height = 86, + }, + .bus_format = MEDIA_BUS_FMT_RGB888_1X24, + .bus_flags = DRM_BUS_FLAG_DE_HIGH | DRM_BUS_FLAG_PIXDATA_DRIVE_NEGEDGE, + .connector_type = DRM_MODE_CONNECTOR_DPI, +}; + +static const struct display_timing eink_vb3300_kca_timing = { + .pixelclock = { 40000000, 40000000, 40000000 }, + .hactive = { 334, 334, 334 }, + .hfront_porch = { 1, 1, 1 }, + .hback_porch = { 1, 1, 1 }, + .hsync_len = { 1, 1, 1 }, + .vactive = { 1405, 1405, 1405 }, + .vfront_porch = { 1, 1, 1 }, + .vback_porch = { 1, 1, 1 }, + .vsync_len = { 1, 1, 1 }, + .flags = DISPLAY_FLAGS_HSYNC_LOW | DISPLAY_FLAGS_VSYNC_LOW | + DISPLAY_FLAGS_DE_HIGH | DISPLAY_FLAGS_PIXDATA_POSEDGE, +}; + +static const struct panel_desc eink_vb3300_kca = { + .timings = &eink_vb3300_kca_timing, + .num_timings = 1, + .bpc = 6, + .size = { + .width = 157, + .height = 209, + }, + .bus_format = MEDIA_BUS_FMT_RGB888_1X24, + .bus_flags = DRM_BUS_FLAG_DE_HIGH | DRM_BUS_FLAG_PIXDATA_DRIVE_POSEDGE, + .connector_type = DRM_MODE_CONNECTOR_DPI, }; static const struct display_timing evervision_vgg804821_timing = { @@ -2967,6 +3126,38 @@ static const struct panel_desc logictechno_lt170410_2whc = { .connector_type = DRM_MODE_CONNECTOR_LVDS, }; +static const struct drm_display_mode logictechno_lttd800480070_l6wh_rt_mode = { + .clock = 33000, + .hdisplay = 800, + .hsync_start = 800 + 154, + .hsync_end = 800 + 154 + 3, + .htotal = 800 + 154 + 3 + 43, + .vdisplay = 480, + .vsync_start = 480 + 47, + .vsync_end = 480 + 47 + 3, + .vtotal = 480 + 47 + 3 + 20, + .flags = DRM_MODE_FLAG_NVSYNC | DRM_MODE_FLAG_NHSYNC, +}; + +static const struct panel_desc logictechno_lttd800480070_l6wh_rt = { + .modes = &logictechno_lttd800480070_l6wh_rt_mode, + .num_modes = 1, + .bpc = 8, + .size = { + .width = 154, + .height = 86, + }, + .delay = { + .prepare = 45, + .enable = 100, + .disable = 100, + .unprepare = 45 + }, + .bus_format = MEDIA_BUS_FMT_RGB888_1X24, + .bus_flags = DRM_BUS_FLAG_PIXDATA_SAMPLE_NEGEDGE, + .connector_type = DRM_MODE_CONNECTOR_DPI, +}; + static const struct drm_display_mode mitsubishi_aa070mc01_mode = { .clock = 30400, .hdisplay = 800, @@ -3033,6 +3224,37 @@ static const struct panel_desc mitsubishi_aa070mc01 = { .bus_flags = DRM_BUS_FLAG_DE_HIGH, }; +static const struct display_timing multi_inno_mi1010ait_1cp_timing = { + .pixelclock = { 68900000, 70000000, 73400000 }, + .hactive = { 1280, 1280, 1280 }, + .hfront_porch = { 30, 60, 71 }, + .hback_porch = { 30, 60, 71 }, + .hsync_len = { 10, 10, 48 }, + .vactive = { 800, 800, 800 }, + .vfront_porch = { 5, 10, 10 }, + .vback_porch = { 5, 10, 10 }, + .vsync_len = { 5, 6, 13 }, + .flags = DISPLAY_FLAGS_HSYNC_LOW | DISPLAY_FLAGS_VSYNC_LOW | + DISPLAY_FLAGS_DE_HIGH, +}; + +static const struct panel_desc multi_inno_mi1010ait_1cp = { + .timings = &multi_inno_mi1010ait_1cp_timing, + .num_timings = 1, + .bpc = 8, + .size = { + .width = 217, + .height = 136, + }, + .delay = { + .enable = 50, + .disable = 50, + }, + .bus_format = MEDIA_BUS_FMT_RGB888_1X7X4_SPWG, + .bus_flags = DRM_BUS_FLAG_DE_HIGH, + .connector_type = DRM_MODE_CONNECTOR_LVDS, +}; + static const struct display_timing nec_nl12880bc20_05_timing = { .pixelclock = { 67000000, 71000000, 75000000 }, .hactive = { 1280, 1280, 1280 }, @@ -3463,6 +3685,46 @@ static const struct panel_desc qd43003c0_40 = { .bus_format = MEDIA_BUS_FMT_RGB888_1X24, }; +static const struct drm_display_mode qishenglong_gopher2b_lcd_modes[] = { + { /* 60 Hz */ + .clock = 10800, + .hdisplay = 480, + .hsync_start = 480 + 77, + .hsync_end = 480 + 77 + 41, + .htotal = 480 + 77 + 41 + 2, + .vdisplay = 272, + .vsync_start = 272 + 16, + .vsync_end = 272 + 16 + 10, + .vtotal = 272 + 16 + 10 + 2, + .flags = DRM_MODE_FLAG_NVSYNC | DRM_MODE_FLAG_NHSYNC, + }, + { /* 50 Hz */ + .clock = 10800, + .hdisplay = 480, + .hsync_start = 480 + 17, + .hsync_end = 480 + 17 + 41, + .htotal = 480 + 17 + 41 + 2, + .vdisplay = 272, + .vsync_start = 272 + 116, + .vsync_end = 272 + 116 + 10, + .vtotal = 272 + 116 + 10 + 2, + .flags = DRM_MODE_FLAG_NVSYNC | DRM_MODE_FLAG_NHSYNC, + }, +}; + +static const struct panel_desc qishenglong_gopher2b_lcd = { + .modes = qishenglong_gopher2b_lcd_modes, + .num_modes = ARRAY_SIZE(qishenglong_gopher2b_lcd_modes), + .bpc = 8, + .size = { + .width = 95, + .height = 54, + }, + .bus_format = MEDIA_BUS_FMT_RGB888_1X24, + .bus_flags = DRM_BUS_FLAG_DE_HIGH | DRM_BUS_FLAG_PIXDATA_SAMPLE_NEGEDGE, + .connector_type = DRM_MODE_CONNECTOR_DPI, +}; + static const struct display_timing rocktech_rk070er9427_timing = { .pixelclock = { 26400000, 33300000, 46800000 }, .hactive = { 800, 800, 800 }, @@ -4234,9 +4496,15 @@ static const struct of_device_id platform_of_match[] = { .compatible = "auo,b116xw03", .data = &auo_b116xw03, }, { + .compatible = "auo,b133han05", + .data = &auo_b133han05, + }, { .compatible = "auo,b133htn01", .data = &auo_b133htn01, }, { + .compatible = "auo,b140han06", + .data = &auo_b140han06, + }, { .compatible = "auo,b133xtn01", .data = &auo_b133xtn01, }, { @@ -4330,6 +4598,9 @@ static const struct of_device_id platform_of_match[] = { .compatible = "edt,et035012dm6", .data = &edt_et035012dm6, }, { + .compatible = "edt,etm0350g0dh6", + .data = &edt_etm0350g0dh6, + }, { .compatible = "edt,etm043080dh6gp", .data = &edt_etm043080dh6gp, }, { @@ -4351,6 +4622,12 @@ static const struct of_device_id platform_of_match[] = { .compatible = "edt,etm0700g0edh6", .data = &edt_etm0700g0bdh6, }, { + .compatible = "edt,etmv570g2dhu", + .data = &edt_etmv570g2dhu, + }, { + .compatible = "eink,vb3300-kca", + .data = &eink_vb3300_kca, + }, { .compatible = "evervision,vgg804821", .data = &evervision_vgg804821, }, { @@ -4462,9 +4739,15 @@ static const struct of_device_id platform_of_match[] = { .compatible = "logictechno,lt170410-2whc", .data = &logictechno_lt170410_2whc, }, { + .compatible = "logictechno,lttd800480070-l6wh-rt", + .data = &logictechno_lttd800480070_l6wh_rt, + }, { .compatible = "mitsubishi,aa070mc01-ca1", .data = &mitsubishi_aa070mc01, }, { + .compatible = "multi-inno,mi1010ait-1cp", + .data = &multi_inno_mi1010ait_1cp, + }, { .compatible = "nec,nl12880bc20-05", .data = &nec_nl12880bc20_05, }, { @@ -4516,6 +4799,9 @@ static const struct of_device_id platform_of_match[] = { .compatible = "qiaodian,qd43003c0-40", .data = &qd43003c0_40, }, { + .compatible = "qishenglong,gopher2b-lcd", + .data = &qishenglong_gopher2b_lcd, + }, { .compatible = "rocktech,rk070er9427", .data = &rocktech_rk070er9427, }, { @@ -4632,7 +4918,7 @@ static int panel_simple_platform_probe(struct platform_device *pdev) if (!id) return -ENODEV; - return panel_simple_probe(&pdev->dev, id->data); + return panel_simple_probe(&pdev->dev, id->data, NULL); } static int panel_simple_platform_remove(struct platform_device *pdev) @@ -4867,7 +5153,7 @@ static const struct panel_desc_dsi osd101t2045_53ts = { }, .flags = MIPI_DSI_MODE_VIDEO | MIPI_DSI_MODE_VIDEO_BURST | MIPI_DSI_MODE_VIDEO_SYNC_PULSE | - MIPI_DSI_MODE_EOT_PACKET, + MIPI_DSI_MODE_NO_EOT_PACKET, .format = MIPI_DSI_FMT_RGB888, .lanes = 4, }; @@ -4912,7 +5198,7 @@ static int panel_simple_dsi_probe(struct mipi_dsi_device *dsi) desc = id->data; - err = panel_simple_probe(&dsi->dev, &desc->desc); + err = panel_simple_probe(&dsi->dev, &desc->desc, NULL); if (err < 0) return err; @@ -4957,6 +5243,38 @@ static struct mipi_dsi_driver panel_simple_dsi_driver = { .shutdown = panel_simple_dsi_shutdown, }; +static int panel_simple_dp_aux_ep_probe(struct dp_aux_ep_device *aux_ep) +{ + const struct of_device_id *id; + + id = of_match_node(platform_of_match, aux_ep->dev.of_node); + if (!id) + return -ENODEV; + + return panel_simple_probe(&aux_ep->dev, id->data, aux_ep->aux); +} + +static void panel_simple_dp_aux_ep_remove(struct dp_aux_ep_device *aux_ep) +{ + panel_simple_remove(&aux_ep->dev); +} + +static void panel_simple_dp_aux_ep_shutdown(struct dp_aux_ep_device *aux_ep) +{ + panel_simple_shutdown(&aux_ep->dev); +} + +static struct dp_aux_ep_driver panel_simple_dp_aux_ep_driver = { + .driver = { + .name = "panel-simple-dp-aux", + .of_match_table = platform_of_match, /* Same as platform one! */ + .pm = &panel_simple_pm_ops, + }, + .probe = panel_simple_dp_aux_ep_probe, + .remove = panel_simple_dp_aux_ep_remove, + .shutdown = panel_simple_dp_aux_ep_shutdown, +}; + static int __init panel_simple_init(void) { int err; @@ -4965,15 +5283,25 @@ static int __init panel_simple_init(void) if (err < 0) return err; + err = dp_aux_dp_driver_register(&panel_simple_dp_aux_ep_driver); + if (err < 0) + goto err_did_platform_register; + if (IS_ENABLED(CONFIG_DRM_MIPI_DSI)) { err = mipi_dsi_driver_register(&panel_simple_dsi_driver); - if (err < 0) { - platform_driver_unregister(&panel_simple_platform_driver); - return err; - } + if (err < 0) + goto err_did_aux_ep_register; } return 0; + +err_did_aux_ep_register: + dp_aux_dp_driver_unregister(&panel_simple_dp_aux_ep_driver); + +err_did_platform_register: + platform_driver_unregister(&panel_simple_platform_driver); + + return err; } module_init(panel_simple_init); @@ -4982,6 +5310,7 @@ static void __exit panel_simple_exit(void) if (IS_ENABLED(CONFIG_DRM_MIPI_DSI)) mipi_dsi_driver_unregister(&panel_simple_dsi_driver); + dp_aux_dp_driver_unregister(&panel_simple_dp_aux_ep_driver); platform_driver_unregister(&panel_simple_platform_driver); } module_exit(panel_simple_exit); diff --git a/drivers/gpu/drm/panel/panel-sony-acx424akp.c b/drivers/gpu/drm/panel/panel-sony-acx424akp.c index 95659a4d15e9..9536d56a94a5 100644 --- a/drivers/gpu/drm/panel/panel-sony-acx424akp.c +++ b/drivers/gpu/drm/panel/panel-sony-acx424akp.c @@ -40,7 +40,6 @@ struct acx424akp { struct drm_panel panel; struct device *dev; - struct backlight_device *bl; struct regulator *supply; struct gpio_desc *reset_gpio; bool video_mode; @@ -102,6 +101,18 @@ static int acx424akp_set_brightness(struct backlight_device *bl) u8 par; int ret; + if (backlight_is_blank(bl)) { + /* Disable backlight */ + par = 0x00; + ret = mipi_dsi_dcs_write(dsi, MIPI_DCS_WRITE_CONTROL_DISPLAY, + &par, 1); + if (ret) { + dev_err(acx->dev, "failed to disable display backlight (%d)\n", ret); + return ret; + } + return 0; + } + /* Calculate the PWM duty cycle in n/256's */ pwm_ratio = max(((duty_ns * 256) / period_ns) - 1, 1); pwm_div = max(1, @@ -172,6 +183,12 @@ static const struct backlight_ops acx424akp_bl_ops = { .update_status = acx424akp_set_brightness, }; +static const struct backlight_properties acx424akp_bl_props = { + .type = BACKLIGHT_RAW, + .brightness = 512, + .max_brightness = 1023, +}; + static int acx424akp_read_id(struct acx424akp *acx) { struct mipi_dsi_device *dsi = to_mipi_dsi_device(acx->dev); @@ -310,8 +327,6 @@ static int acx424akp_prepare(struct drm_panel *panel) } } - acx->bl->props.power = FB_BLANK_NORMAL; - return 0; err_power_off: @@ -323,18 +338,8 @@ static int acx424akp_unprepare(struct drm_panel *panel) { struct acx424akp *acx = panel_to_acx424akp(panel); struct mipi_dsi_device *dsi = to_mipi_dsi_device(acx->dev); - u8 par; int ret; - /* Disable backlight */ - par = 0x00; - ret = mipi_dsi_dcs_write(dsi, MIPI_DCS_WRITE_CONTROL_DISPLAY, - &par, 1); - if (ret) { - dev_err(acx->dev, "failed to disable display backlight (%d)\n", ret); - return ret; - } - ret = mipi_dsi_dcs_set_display_off(dsi); if (ret) { dev_err(acx->dev, "failed to turn display off (%d)\n", ret); @@ -350,36 +355,10 @@ static int acx424akp_unprepare(struct drm_panel *panel) msleep(85); acx424akp_power_off(acx); - acx->bl->props.power = FB_BLANK_POWERDOWN; - - return 0; -} - -static int acx424akp_enable(struct drm_panel *panel) -{ - struct acx424akp *acx = panel_to_acx424akp(panel); - - /* - * The backlight is on as long as the display is on - * so no use to call backlight_enable() here. - */ - acx->bl->props.power = FB_BLANK_UNBLANK; return 0; } -static int acx424akp_disable(struct drm_panel *panel) -{ - struct acx424akp *acx = panel_to_acx424akp(panel); - - /* - * The backlight is on as long as the display is on - * so no use to call backlight_disable() here. - */ - acx->bl->props.power = FB_BLANK_NORMAL; - - return 0; -} static int acx424akp_get_modes(struct drm_panel *panel, struct drm_connector *connector) @@ -409,10 +388,8 @@ static int acx424akp_get_modes(struct drm_panel *panel, } static const struct drm_panel_funcs acx424akp_drm_funcs = { - .disable = acx424akp_disable, .unprepare = acx424akp_unprepare, .prepare = acx424akp_prepare, - .enable = acx424akp_enable, .get_modes = acx424akp_get_modes, }; @@ -458,25 +435,18 @@ static int acx424akp_probe(struct mipi_dsi_device *dsi) /* This asserts RESET by default */ acx->reset_gpio = devm_gpiod_get_optional(dev, "reset", GPIOD_OUT_HIGH); - if (IS_ERR(acx->reset_gpio)) { - ret = PTR_ERR(acx->reset_gpio); - if (ret != -EPROBE_DEFER) - dev_err(dev, "failed to request GPIO (%d)\n", ret); - return ret; - } + if (IS_ERR(acx->reset_gpio)) + return dev_err_probe(dev, PTR_ERR(acx->reset_gpio), + "failed to request GPIO\n"); drm_panel_init(&acx->panel, dev, &acx424akp_drm_funcs, DRM_MODE_CONNECTOR_DSI); - acx->bl = devm_backlight_device_register(dev, "acx424akp", dev, acx, - &acx424akp_bl_ops, NULL); - if (IS_ERR(acx->bl)) { - dev_err(dev, "failed to register backlight device\n"); - return PTR_ERR(acx->bl); - } - acx->bl->props.max_brightness = 1023; - acx->bl->props.brightness = 512; - acx->bl->props.power = FB_BLANK_POWERDOWN; + acx->panel.backlight = devm_backlight_device_register(dev, "acx424akp", dev, acx, + &acx424akp_bl_ops, &acx424akp_bl_props); + if (IS_ERR(acx->panel.backlight)) + return dev_err_probe(dev, PTR_ERR(acx->panel.backlight), + "failed to register backlight device\n"); drm_panel_add(&acx->panel); diff --git a/drivers/gpu/drm/panel/panel-widechips-ws2401.c b/drivers/gpu/drm/panel/panel-widechips-ws2401.c new file mode 100644 index 000000000000..8bc976f54b80 --- /dev/null +++ b/drivers/gpu/drm/panel/panel-widechips-ws2401.c @@ -0,0 +1,441 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Panel driver for the WideChips WS2401 480x800 DPI RGB panel, used in + * the Samsung Mobile Display (SMD) LMS380KF01. + * Found in the Samsung Galaxy Ace 2 GT-I8160 mobile phone. + * Linus Walleij <linus.walleij@linaro.org> + * Inspired by code and know-how in the vendor driver by Gareth Phillips. + */ +#include <drm/drm_mipi_dbi.h> +#include <drm/drm_modes.h> +#include <drm/drm_panel.h> + +#include <linux/backlight.h> +#include <linux/delay.h> +#include <linux/gpio/consumer.h> +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/media-bus-format.h> +#include <linux/module.h> +#include <linux/regulator/consumer.h> +#include <linux/spi/spi.h> + +#include <video/mipi_display.h> + +#define WS2401_RESCTL 0xb8 /* Resolution select control */ +#define WS2401_PSMPS 0xbd /* SMPS positive control */ +#define WS2401_NSMPS 0xbe /* SMPS negative control */ +#define WS2401_SMPS 0xbf +#define WS2401_BCMODE 0xc1 /* Backlight control mode */ +#define WS2401_WRBLCTL 0xc3 /* Backlight control */ +#define WS2401_WRDISBV 0xc4 /* Write manual brightness */ +#define WS2401_WRCTRLD 0xc6 /* Write BL control */ +#define WS2401_WRMIE 0xc7 /* Write MIE mode */ +#define WS2401_READ_ID1 0xda /* Read panel ID 1 */ +#define WS2401_READ_ID2 0xdb /* Read panel ID 2 */ +#define WS2401_READ_ID3 0xdc /* Read panel ID 3 */ +#define WS2401_GAMMA_R1 0xe7 /* Gamma red 1 */ +#define WS2401_GAMMA_G1 0xe8 /* Gamma green 1 */ +#define WS2401_GAMMA_B1 0xe9 /* Gamma blue 1 */ +#define WS2401_GAMMA_R2 0xea /* Gamma red 2 */ +#define WS2401_GAMMA_G2 0xeb /* Gamma green 2 */ +#define WS2401_GAMMA_B2 0xec /* Gamma blue 2 */ +#define WS2401_PASSWD1 0xf0 /* Password command for level 2 */ +#define WS2401_DISCTL 0xf2 /* Display control */ +#define WS2401_PWRCTL 0xf3 /* Power control */ +#define WS2401_VCOMCTL 0xf4 /* VCOM control */ +#define WS2401_SRCCTL 0xf5 /* Source control */ +#define WS2401_PANELCTL 0xf6 /* Panel control */ + +static const u8 ws2401_dbi_read_commands[] = { + WS2401_READ_ID1, + WS2401_READ_ID2, + WS2401_READ_ID3, + 0, /* sentinel */ +}; + +/** + * struct ws2401 - state container for a panel controlled by the WS2401 + * controller + */ +struct ws2401 { + /** @dev: the container device */ + struct device *dev; + /** @dbi: the DBI bus abstraction handle */ + struct mipi_dbi dbi; + /** @panel: the DRM panel instance for this device */ + struct drm_panel panel; + /** @width: the width of this panel in mm */ + u32 width; + /** @height: the height of this panel in mm */ + u32 height; + /** @reset: reset GPIO line */ + struct gpio_desc *reset; + /** @regulators: VCCIO and VIO supply regulators */ + struct regulator_bulk_data regulators[2]; + /** @internal_bl: If using internal backlight */ + bool internal_bl; +}; + +static const struct drm_display_mode lms380kf01_480_800_mode = { + /* + * The vendor driver states that the "SMD panel" has a clock + * frequency of 49920000 Hz / 2 = 24960000 Hz. + */ + .clock = 24960, + .hdisplay = 480, + .hsync_start = 480 + 8, + .hsync_end = 480 + 8 + 10, + .htotal = 480 + 8 + 10 + 8, + .vdisplay = 800, + .vsync_start = 800 + 8, + .vsync_end = 800 + 8 + 2, + .vtotal = 800 + 8 + 2 + 18, + .width_mm = 50, + .height_mm = 84, + .flags = DRM_MODE_FLAG_NVSYNC | DRM_MODE_FLAG_NHSYNC, +}; + +static inline struct ws2401 *to_ws2401(struct drm_panel *panel) +{ + return container_of(panel, struct ws2401, panel); +} + +static void ws2401_read_mtp_id(struct ws2401 *ws) +{ + struct mipi_dbi *dbi = &ws->dbi; + u8 id1, id2, id3; + int ret; + + ret = mipi_dbi_command_read(dbi, WS2401_READ_ID1, &id1); + if (ret) { + dev_err(ws->dev, "unable to read MTP ID 1\n"); + return; + } + ret = mipi_dbi_command_read(dbi, WS2401_READ_ID2, &id2); + if (ret) { + dev_err(ws->dev, "unable to read MTP ID 2\n"); + return; + } + ret = mipi_dbi_command_read(dbi, WS2401_READ_ID3, &id3); + if (ret) { + dev_err(ws->dev, "unable to read MTP ID 3\n"); + return; + } + dev_info(ws->dev, "MTP ID: %02x %02x %02x\n", id1, id2, id3); +} + +static int ws2401_power_on(struct ws2401 *ws) +{ + struct mipi_dbi *dbi = &ws->dbi; + int ret; + + /* Power up */ + ret = regulator_bulk_enable(ARRAY_SIZE(ws->regulators), + ws->regulators); + if (ret) { + dev_err(ws->dev, "failed to enable regulators: %d\n", ret); + return ret; + } + msleep(10); + + /* Assert reset >=1 ms */ + gpiod_set_value_cansleep(ws->reset, 1); + usleep_range(1000, 5000); + /* De-assert reset */ + gpiod_set_value_cansleep(ws->reset, 0); + /* Wait >= 10 ms */ + msleep(10); + dev_dbg(ws->dev, "de-asserted RESET\n"); + + /* + * Exit sleep mode and initialize display - some hammering is + * necessary. + */ + mipi_dbi_command(dbi, MIPI_DCS_EXIT_SLEEP_MODE); + mipi_dbi_command(dbi, MIPI_DCS_EXIT_SLEEP_MODE); + msleep(50); + + /* Magic to unlock level 2 control of the display */ + mipi_dbi_command(dbi, WS2401_PASSWD1, 0x5a, 0x5a); + /* Configure resolution to 480RGBx800 */ + mipi_dbi_command(dbi, WS2401_RESCTL, 0x12); + /* Set addressing mode Flip V(d0), Flip H(d1) RGB/BGR(d3) */ + mipi_dbi_command(dbi, MIPI_DCS_SET_ADDRESS_MODE, 0x01); + /* Set pixel format: 24 bpp */ + mipi_dbi_command(dbi, MIPI_DCS_SET_PIXEL_FORMAT, 0x70); + mipi_dbi_command(dbi, WS2401_SMPS, 0x00, 0x0f); + mipi_dbi_command(dbi, WS2401_PSMPS, 0x06, 0x03, /* DDVDH: 4.6v */ + 0x7e, 0x03, 0x12, 0x37); + mipi_dbi_command(dbi, WS2401_NSMPS, 0x06, 0x03, /* DDVDH: -4.6v */ + 0x7e, 0x02, 0x15, 0x37); + mipi_dbi_command(dbi, WS2401_SMPS, 0x02, 0x0f); + mipi_dbi_command(dbi, WS2401_PWRCTL, 0x10, 0xA9, 0x00, 0x01, 0x44, + 0xb4, /* VGH:16.1v, VGL:-13.8v */ + 0x50, /* GREFP:4.2v (default) */ + 0x50, /* GREFN:-4.2v (default) */ + 0x00, + 0x44); /* VOUTL:-10v (default) */ + mipi_dbi_command(dbi, WS2401_DISCTL, 0x01, 0x00, 0x00, 0x00, 0x14, + 0x16); + mipi_dbi_command(dbi, WS2401_VCOMCTL, 0x30, 0x53, 0x53); + mipi_dbi_command(dbi, WS2401_SRCCTL, 0x03, 0x0C, 0x00, 0x00, 0x00, + 0x01, /* 2 dot inversion */ + 0x01, 0x06, 0x03); + mipi_dbi_command(dbi, WS2401_PANELCTL, 0x14, 0x00, 0x80, 0x00); + mipi_dbi_command(dbi, WS2401_WRMIE, 0x01); + + /* Set up gamma, probably these are P-gamma and N-gamma for each color */ + mipi_dbi_command(dbi, WS2401_GAMMA_R1, 0x00, + 0x5b, 0x42, 0x41, 0x3f, 0x42, 0x3d, 0x38, 0x2e, + 0x2b, 0x2a, 0x27, 0x22, 0x27, 0x0f, 0x00, 0x00); + mipi_dbi_command(dbi, WS2401_GAMMA_R2, 0x00, + 0x5b, 0x42, 0x41, 0x3f, 0x42, 0x3d, 0x38, 0x2e, + 0x2b, 0x2a, 0x27, 0x22, 0x27, 0x0f, 0x00, 0x00); + mipi_dbi_command(dbi, WS2401_GAMMA_G1, 0x00, + 0x59, 0x40, 0x3f, 0x3e, 0x41, 0x3d, 0x39, 0x2f, + 0x2c, 0x2b, 0x29, 0x25, 0x29, 0x19, 0x08, 0x00); + mipi_dbi_command(dbi, WS2401_GAMMA_G2, 0x00, + 0x59, 0x40, 0x3f, 0x3e, 0x41, 0x3d, 0x39, 0x2f, + 0x2c, 0x2b, 0x29, 0x25, 0x29, 0x19, 0x08, 0x00); + mipi_dbi_command(dbi, WS2401_GAMMA_B1, 0x00, + 0x57, 0x3b, 0x3a, 0x3b, 0x3f, 0x3b, 0x38, 0x27, + 0x38, 0x2a, 0x26, 0x22, 0x34, 0x0c, 0x09, 0x00); + mipi_dbi_command(dbi, WS2401_GAMMA_B2, 0x00, + 0x57, 0x3b, 0x3a, 0x3b, 0x3f, 0x3b, 0x38, 0x27, + 0x38, 0x2a, 0x26, 0x22, 0x34, 0x0c, 0x09, 0x00); + + if (ws->internal_bl) { + mipi_dbi_command(dbi, WS2401_WRCTRLD, 0x2c); + } else { + mipi_dbi_command(dbi, WS2401_WRCTRLD, 0x00); + /* + * When not using internal backlight we do not need any further + * L2 accesses to the panel so we close the door on our way out. + * Otherwise we need to leave the L2 door open. + */ + mipi_dbi_command(dbi, WS2401_PASSWD1, 0xa5, 0xa5); + } + + return 0; +} + +static int ws2401_power_off(struct ws2401 *ws) +{ + /* Go into RESET and disable regulators */ + gpiod_set_value_cansleep(ws->reset, 1); + return regulator_bulk_disable(ARRAY_SIZE(ws->regulators), + ws->regulators); +} + +static int ws2401_unprepare(struct drm_panel *panel) +{ + struct ws2401 *ws = to_ws2401(panel); + struct mipi_dbi *dbi = &ws->dbi; + + /* Make sure we disable backlight, if any */ + if (ws->internal_bl) + mipi_dbi_command(dbi, WS2401_WRCTRLD, 0x00); + mipi_dbi_command(dbi, MIPI_DCS_ENTER_SLEEP_MODE); + msleep(120); + return ws2401_power_off(to_ws2401(panel)); +} + +static int ws2401_disable(struct drm_panel *panel) +{ + struct ws2401 *ws = to_ws2401(panel); + struct mipi_dbi *dbi = &ws->dbi; + + mipi_dbi_command(dbi, MIPI_DCS_SET_DISPLAY_OFF); + msleep(25); + + return 0; +} + +static int ws2401_prepare(struct drm_panel *panel) +{ + return ws2401_power_on(to_ws2401(panel)); +} + +static int ws2401_enable(struct drm_panel *panel) +{ + struct ws2401 *ws = to_ws2401(panel); + struct mipi_dbi *dbi = &ws->dbi; + + mipi_dbi_command(dbi, MIPI_DCS_SET_DISPLAY_ON); + + return 0; +} + +/** + * ws2401_get_modes() - return the mode + * @panel: the panel to get the mode for + * @connector: reference to the central DRM connector control structure + */ +static int ws2401_get_modes(struct drm_panel *panel, + struct drm_connector *connector) +{ + struct ws2401 *ws = to_ws2401(panel); + struct drm_display_mode *mode; + static const u32 bus_format = MEDIA_BUS_FMT_RGB888_1X24; + + /* + * We just support the LMS380KF01 so far, if we implement more panels + * this mode, the following connector display_info settings and + * probably the custom DCS sequences needs to selected based on what + * the target panel needs. + */ + mode = drm_mode_duplicate(connector->dev, &lms380kf01_480_800_mode); + if (!mode) { + dev_err(ws->dev, "failed to add mode\n"); + return -ENOMEM; + } + + connector->display_info.bpc = 8; + connector->display_info.width_mm = mode->width_mm; + connector->display_info.height_mm = mode->height_mm; + connector->display_info.bus_flags = + DRM_BUS_FLAG_PIXDATA_DRIVE_NEGEDGE; + drm_display_info_set_bus_formats(&connector->display_info, + &bus_format, 1); + + drm_mode_set_name(mode); + mode->type = DRM_MODE_TYPE_DRIVER | DRM_MODE_TYPE_PREFERRED; + + drm_mode_probed_add(connector, mode); + + return 1; +} + +static const struct drm_panel_funcs ws2401_drm_funcs = { + .disable = ws2401_disable, + .unprepare = ws2401_unprepare, + .prepare = ws2401_prepare, + .enable = ws2401_enable, + .get_modes = ws2401_get_modes, +}; + +static int ws2401_set_brightness(struct backlight_device *bl) +{ + struct ws2401 *ws = bl_get_data(bl); + struct mipi_dbi *dbi = &ws->dbi; + u8 brightness = backlight_get_brightness(bl); + + if (backlight_is_blank(bl)) { + mipi_dbi_command(dbi, WS2401_WRCTRLD, 0x00); + } else { + mipi_dbi_command(dbi, WS2401_WRCTRLD, 0x2c); + mipi_dbi_command(dbi, WS2401_WRDISBV, brightness); + } + + return 0; +} + +static const struct backlight_ops ws2401_bl_ops = { + .update_status = ws2401_set_brightness, +}; + +static const struct backlight_properties ws2401_bl_props = { + .type = BACKLIGHT_PLATFORM, + .brightness = 120, + .max_brightness = U8_MAX, +}; + +static int ws2401_probe(struct spi_device *spi) +{ + struct device *dev = &spi->dev; + struct ws2401 *ws; + int ret; + + ws = devm_kzalloc(dev, sizeof(*ws), GFP_KERNEL); + if (!ws) + return -ENOMEM; + ws->dev = dev; + + /* + * VCI is the analog voltage supply + * VCCIO is the digital I/O voltage supply + */ + ws->regulators[0].supply = "vci"; + ws->regulators[1].supply = "vccio"; + ret = devm_regulator_bulk_get(dev, + ARRAY_SIZE(ws->regulators), + ws->regulators); + if (ret) + return dev_err_probe(dev, ret, "failed to get regulators\n"); + + ws->reset = devm_gpiod_get(dev, "reset", GPIOD_OUT_HIGH); + if (IS_ERR(ws->reset)) { + ret = PTR_ERR(ws->reset); + return dev_err_probe(dev, ret, "no RESET GPIO\n"); + } + + ret = mipi_dbi_spi_init(spi, &ws->dbi, NULL); + if (ret) + return dev_err_probe(dev, ret, "MIPI DBI init failed\n"); + ws->dbi.read_commands = ws2401_dbi_read_commands; + + ws2401_power_on(ws); + ws2401_read_mtp_id(ws); + ws2401_power_off(ws); + + drm_panel_init(&ws->panel, dev, &ws2401_drm_funcs, + DRM_MODE_CONNECTOR_DPI); + + ret = drm_panel_of_backlight(&ws->panel); + if (ret) + return dev_err_probe(dev, ret, + "failed to get external backlight device\n"); + + if (!ws->panel.backlight) { + dev_dbg(dev, "no external backlight, using internal backlight\n"); + ws->panel.backlight = + devm_backlight_device_register(dev, "ws2401", dev, ws, + &ws2401_bl_ops, &ws2401_bl_props); + if (IS_ERR(ws->panel.backlight)) + return dev_err_probe(dev, PTR_ERR(ws->panel.backlight), + "failed to register backlight device\n"); + } else { + dev_dbg(dev, "using external backlight\n"); + } + + spi_set_drvdata(spi, ws); + + drm_panel_add(&ws->panel); + dev_dbg(dev, "added panel\n"); + + return 0; +} + +static int ws2401_remove(struct spi_device *spi) +{ + struct ws2401 *ws = spi_get_drvdata(spi); + + drm_panel_remove(&ws->panel); + return 0; +} + +/* + * Samsung LMS380KF01 is the one instance of this display controller that we + * know about, but if more are found, the controller can be parameterized + * here and used for other configurations. + */ +static const struct of_device_id ws2401_match[] = { + { .compatible = "samsung,lms380kf01", }, + {}, +}; +MODULE_DEVICE_TABLE(of, ws2401_match); + +static struct spi_driver ws2401_driver = { + .probe = ws2401_probe, + .remove = ws2401_remove, + .driver = { + .name = "ws2401-panel", + .of_match_table = ws2401_match, + }, +}; +module_spi_driver(ws2401_driver); + +MODULE_AUTHOR("Linus Walleij <linus.walleij@linaro.org>"); +MODULE_DESCRIPTION("Samsung WS2401 panel driver"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/gpu/drm/panel/panel-xinpeng-xpp055c272.c b/drivers/gpu/drm/panel/panel-xinpeng-xpp055c272.c index 55172d63a922..d17aae8b71d7 100644 --- a/drivers/gpu/drm/panel/panel-xinpeng-xpp055c272.c +++ b/drivers/gpu/drm/panel/panel-xinpeng-xpp055c272.c @@ -311,7 +311,7 @@ static int xpp055c272_probe(struct mipi_dsi_device *dsi) dsi->lanes = 4; dsi->format = MIPI_DSI_FMT_RGB888; dsi->mode_flags = MIPI_DSI_MODE_VIDEO | MIPI_DSI_MODE_VIDEO_BURST | - MIPI_DSI_MODE_LPM | MIPI_DSI_MODE_EOT_PACKET; + MIPI_DSI_MODE_LPM | MIPI_DSI_MODE_NO_EOT_PACKET; drm_panel_init(&ctx->panel, &dsi->dev, &xpp055c272_funcs, DRM_MODE_CONNECTOR_DSI); diff --git a/drivers/gpu/drm/panfrost/panfrost_devfreq.c b/drivers/gpu/drm/panfrost/panfrost_devfreq.c index 3644652f726f..194af7f607a6 100644 --- a/drivers/gpu/drm/panfrost/panfrost_devfreq.c +++ b/drivers/gpu/drm/panfrost/panfrost_devfreq.c @@ -106,7 +106,8 @@ int panfrost_devfreq_init(struct panfrost_device *pfdev) if (ret) { /* Continue if the optional regulator is missing */ if (ret != -ENODEV) { - DRM_DEV_ERROR(dev, "Couldn't set OPP regulators\n"); + if (ret != -EPROBE_DEFER) + DRM_DEV_ERROR(dev, "Couldn't set OPP regulators\n"); return ret; } } diff --git a/drivers/gpu/drm/panfrost/panfrost_device.c b/drivers/gpu/drm/panfrost/panfrost_device.c index 125ed973feaa..bd9b7be63b0f 100644 --- a/drivers/gpu/drm/panfrost/panfrost_device.c +++ b/drivers/gpu/drm/panfrost/panfrost_device.c @@ -54,7 +54,8 @@ static int panfrost_clk_init(struct panfrost_device *pfdev) if (IS_ERR(pfdev->bus_clock)) { dev_err(pfdev->dev, "get bus_clock failed %ld\n", PTR_ERR(pfdev->bus_clock)); - return PTR_ERR(pfdev->bus_clock); + err = PTR_ERR(pfdev->bus_clock); + goto disable_clock; } if (pfdev->bus_clock) { @@ -291,55 +292,100 @@ void panfrost_device_fini(struct panfrost_device *pfdev) panfrost_clk_fini(pfdev); } -const char *panfrost_exception_name(struct panfrost_device *pfdev, u32 exception_code) -{ - switch (exception_code) { - /* Non-Fault Status code */ - case 0x00: return "NOT_STARTED/IDLE/OK"; - case 0x01: return "DONE"; - case 0x02: return "INTERRUPTED"; - case 0x03: return "STOPPED"; - case 0x04: return "TERMINATED"; - case 0x08: return "ACTIVE"; - /* Job exceptions */ - case 0x40: return "JOB_CONFIG_FAULT"; - case 0x41: return "JOB_POWER_FAULT"; - case 0x42: return "JOB_READ_FAULT"; - case 0x43: return "JOB_WRITE_FAULT"; - case 0x44: return "JOB_AFFINITY_FAULT"; - case 0x48: return "JOB_BUS_FAULT"; - case 0x50: return "INSTR_INVALID_PC"; - case 0x51: return "INSTR_INVALID_ENC"; - case 0x52: return "INSTR_TYPE_MISMATCH"; - case 0x53: return "INSTR_OPERAND_FAULT"; - case 0x54: return "INSTR_TLS_FAULT"; - case 0x55: return "INSTR_BARRIER_FAULT"; - case 0x56: return "INSTR_ALIGN_FAULT"; - case 0x58: return "DATA_INVALID_FAULT"; - case 0x59: return "TILE_RANGE_FAULT"; - case 0x5A: return "ADDR_RANGE_FAULT"; - case 0x60: return "OUT_OF_MEMORY"; - /* GPU exceptions */ - case 0x80: return "DELAYED_BUS_FAULT"; - case 0x88: return "SHAREABILITY_FAULT"; - /* MMU exceptions */ - case 0xC1: return "TRANSLATION_FAULT_LEVEL1"; - case 0xC2: return "TRANSLATION_FAULT_LEVEL2"; - case 0xC3: return "TRANSLATION_FAULT_LEVEL3"; - case 0xC4: return "TRANSLATION_FAULT_LEVEL4"; - case 0xC8: return "PERMISSION_FAULT"; - case 0xC9 ... 0xCF: return "PERMISSION_FAULT"; - case 0xD1: return "TRANSTAB_BUS_FAULT_LEVEL1"; - case 0xD2: return "TRANSTAB_BUS_FAULT_LEVEL2"; - case 0xD3: return "TRANSTAB_BUS_FAULT_LEVEL3"; - case 0xD4: return "TRANSTAB_BUS_FAULT_LEVEL4"; - case 0xD8: return "ACCESS_FLAG"; - case 0xD9 ... 0xDF: return "ACCESS_FLAG"; - case 0xE0 ... 0xE7: return "ADDRESS_SIZE_FAULT"; - case 0xE8 ... 0xEF: return "MEMORY_ATTRIBUTES_FAULT"; +#define PANFROST_EXCEPTION(id) \ + [DRM_PANFROST_EXCEPTION_ ## id] = { \ + .name = #id, \ } - return "UNKNOWN"; +struct panfrost_exception_info { + const char *name; +}; + +static const struct panfrost_exception_info panfrost_exception_infos[] = { + PANFROST_EXCEPTION(OK), + PANFROST_EXCEPTION(DONE), + PANFROST_EXCEPTION(INTERRUPTED), + PANFROST_EXCEPTION(STOPPED), + PANFROST_EXCEPTION(TERMINATED), + PANFROST_EXCEPTION(KABOOM), + PANFROST_EXCEPTION(EUREKA), + PANFROST_EXCEPTION(ACTIVE), + PANFROST_EXCEPTION(JOB_CONFIG_FAULT), + PANFROST_EXCEPTION(JOB_POWER_FAULT), + PANFROST_EXCEPTION(JOB_READ_FAULT), + PANFROST_EXCEPTION(JOB_WRITE_FAULT), + PANFROST_EXCEPTION(JOB_AFFINITY_FAULT), + PANFROST_EXCEPTION(JOB_BUS_FAULT), + PANFROST_EXCEPTION(INSTR_INVALID_PC), + PANFROST_EXCEPTION(INSTR_INVALID_ENC), + PANFROST_EXCEPTION(INSTR_TYPE_MISMATCH), + PANFROST_EXCEPTION(INSTR_OPERAND_FAULT), + PANFROST_EXCEPTION(INSTR_TLS_FAULT), + PANFROST_EXCEPTION(INSTR_BARRIER_FAULT), + PANFROST_EXCEPTION(INSTR_ALIGN_FAULT), + PANFROST_EXCEPTION(DATA_INVALID_FAULT), + PANFROST_EXCEPTION(TILE_RANGE_FAULT), + PANFROST_EXCEPTION(ADDR_RANGE_FAULT), + PANFROST_EXCEPTION(IMPRECISE_FAULT), + PANFROST_EXCEPTION(OOM), + PANFROST_EXCEPTION(OOM_AFBC), + PANFROST_EXCEPTION(UNKNOWN), + PANFROST_EXCEPTION(DELAYED_BUS_FAULT), + PANFROST_EXCEPTION(GPU_SHAREABILITY_FAULT), + PANFROST_EXCEPTION(SYS_SHAREABILITY_FAULT), + PANFROST_EXCEPTION(GPU_CACHEABILITY_FAULT), + PANFROST_EXCEPTION(TRANSLATION_FAULT_0), + PANFROST_EXCEPTION(TRANSLATION_FAULT_1), + PANFROST_EXCEPTION(TRANSLATION_FAULT_2), + PANFROST_EXCEPTION(TRANSLATION_FAULT_3), + PANFROST_EXCEPTION(TRANSLATION_FAULT_4), + PANFROST_EXCEPTION(TRANSLATION_FAULT_IDENTITY), + PANFROST_EXCEPTION(PERM_FAULT_0), + PANFROST_EXCEPTION(PERM_FAULT_1), + PANFROST_EXCEPTION(PERM_FAULT_2), + PANFROST_EXCEPTION(PERM_FAULT_3), + PANFROST_EXCEPTION(TRANSTAB_BUS_FAULT_0), + PANFROST_EXCEPTION(TRANSTAB_BUS_FAULT_1), + PANFROST_EXCEPTION(TRANSTAB_BUS_FAULT_2), + PANFROST_EXCEPTION(TRANSTAB_BUS_FAULT_3), + PANFROST_EXCEPTION(ACCESS_FLAG_0), + PANFROST_EXCEPTION(ACCESS_FLAG_1), + PANFROST_EXCEPTION(ACCESS_FLAG_2), + PANFROST_EXCEPTION(ACCESS_FLAG_3), + PANFROST_EXCEPTION(ADDR_SIZE_FAULT_IN0), + PANFROST_EXCEPTION(ADDR_SIZE_FAULT_IN1), + PANFROST_EXCEPTION(ADDR_SIZE_FAULT_IN2), + PANFROST_EXCEPTION(ADDR_SIZE_FAULT_IN3), + PANFROST_EXCEPTION(ADDR_SIZE_FAULT_OUT0), + PANFROST_EXCEPTION(ADDR_SIZE_FAULT_OUT1), + PANFROST_EXCEPTION(ADDR_SIZE_FAULT_OUT2), + PANFROST_EXCEPTION(ADDR_SIZE_FAULT_OUT3), + PANFROST_EXCEPTION(MEM_ATTR_FAULT_0), + PANFROST_EXCEPTION(MEM_ATTR_FAULT_1), + PANFROST_EXCEPTION(MEM_ATTR_FAULT_2), + PANFROST_EXCEPTION(MEM_ATTR_FAULT_3), + PANFROST_EXCEPTION(MEM_ATTR_NONCACHE_0), + PANFROST_EXCEPTION(MEM_ATTR_NONCACHE_1), + PANFROST_EXCEPTION(MEM_ATTR_NONCACHE_2), + PANFROST_EXCEPTION(MEM_ATTR_NONCACHE_3), +}; + +const char *panfrost_exception_name(u32 exception_code) +{ + if (WARN_ON(exception_code >= ARRAY_SIZE(panfrost_exception_infos) || + !panfrost_exception_infos[exception_code].name)) + return "Unknown exception type"; + + return panfrost_exception_infos[exception_code].name; +} + +bool panfrost_exception_needs_reset(const struct panfrost_device *pfdev, + u32 exception_code) +{ + /* Right now, none of the GPU we support need a reset, but this + * might change. + */ + return false; } void panfrost_device_reset(struct panfrost_device *pfdev) diff --git a/drivers/gpu/drm/panfrost/panfrost_device.h b/drivers/gpu/drm/panfrost/panfrost_device.h index f614e98771e4..8b25278f34c8 100644 --- a/drivers/gpu/drm/panfrost/panfrost_device.h +++ b/drivers/gpu/drm/panfrost/panfrost_device.h @@ -97,11 +97,12 @@ struct panfrost_device { spinlock_t as_lock; unsigned long as_in_use_mask; unsigned long as_alloc_mask; + unsigned long as_faulty_mask; struct list_head as_lru_list; struct panfrost_job_slot *js; - struct panfrost_job *jobs[NUM_JOB_SLOTS]; + struct panfrost_job *jobs[NUM_JOB_SLOTS][2]; struct list_head scheduled_jobs; struct panfrost_perfcnt *perfcnt; @@ -109,6 +110,7 @@ struct panfrost_device { struct mutex sched_lock; struct { + struct workqueue_struct *wq; struct work_struct work; atomic_t pending; } reset; @@ -121,8 +123,12 @@ struct panfrost_device { }; struct panfrost_mmu { + struct panfrost_device *pfdev; + struct kref refcount; struct io_pgtable_cfg pgtbl_cfg; struct io_pgtable_ops *pgtbl_ops; + struct drm_mm mm; + spinlock_t mm_lock; int as; atomic_t as_count; struct list_head list; @@ -133,9 +139,7 @@ struct panfrost_file_priv { struct drm_sched_entity sched_entity[NUM_JOB_SLOTS]; - struct panfrost_mmu mmu; - struct drm_mm mm; - spinlock_t mm_lock; + struct panfrost_mmu *mmu; }; static inline struct panfrost_device *to_panfrost_device(struct drm_device *ddev) @@ -171,6 +175,91 @@ void panfrost_device_reset(struct panfrost_device *pfdev); int panfrost_device_resume(struct device *dev); int panfrost_device_suspend(struct device *dev); -const char *panfrost_exception_name(struct panfrost_device *pfdev, u32 exception_code); +enum drm_panfrost_exception_type { + DRM_PANFROST_EXCEPTION_OK = 0x00, + DRM_PANFROST_EXCEPTION_DONE = 0x01, + DRM_PANFROST_EXCEPTION_INTERRUPTED = 0x02, + DRM_PANFROST_EXCEPTION_STOPPED = 0x03, + DRM_PANFROST_EXCEPTION_TERMINATED = 0x04, + DRM_PANFROST_EXCEPTION_KABOOM = 0x05, + DRM_PANFROST_EXCEPTION_EUREKA = 0x06, + DRM_PANFROST_EXCEPTION_ACTIVE = 0x08, + DRM_PANFROST_EXCEPTION_MAX_NON_FAULT = 0x3f, + DRM_PANFROST_EXCEPTION_JOB_CONFIG_FAULT = 0x40, + DRM_PANFROST_EXCEPTION_JOB_POWER_FAULT = 0x41, + DRM_PANFROST_EXCEPTION_JOB_READ_FAULT = 0x42, + DRM_PANFROST_EXCEPTION_JOB_WRITE_FAULT = 0x43, + DRM_PANFROST_EXCEPTION_JOB_AFFINITY_FAULT = 0x44, + DRM_PANFROST_EXCEPTION_JOB_BUS_FAULT = 0x48, + DRM_PANFROST_EXCEPTION_INSTR_INVALID_PC = 0x50, + DRM_PANFROST_EXCEPTION_INSTR_INVALID_ENC = 0x51, + DRM_PANFROST_EXCEPTION_INSTR_TYPE_MISMATCH = 0x52, + DRM_PANFROST_EXCEPTION_INSTR_OPERAND_FAULT = 0x53, + DRM_PANFROST_EXCEPTION_INSTR_TLS_FAULT = 0x54, + DRM_PANFROST_EXCEPTION_INSTR_BARRIER_FAULT = 0x55, + DRM_PANFROST_EXCEPTION_INSTR_ALIGN_FAULT = 0x56, + DRM_PANFROST_EXCEPTION_DATA_INVALID_FAULT = 0x58, + DRM_PANFROST_EXCEPTION_TILE_RANGE_FAULT = 0x59, + DRM_PANFROST_EXCEPTION_ADDR_RANGE_FAULT = 0x5a, + DRM_PANFROST_EXCEPTION_IMPRECISE_FAULT = 0x5b, + DRM_PANFROST_EXCEPTION_OOM = 0x60, + DRM_PANFROST_EXCEPTION_OOM_AFBC = 0x61, + DRM_PANFROST_EXCEPTION_UNKNOWN = 0x7f, + DRM_PANFROST_EXCEPTION_DELAYED_BUS_FAULT = 0x80, + DRM_PANFROST_EXCEPTION_GPU_SHAREABILITY_FAULT = 0x88, + DRM_PANFROST_EXCEPTION_SYS_SHAREABILITY_FAULT = 0x89, + DRM_PANFROST_EXCEPTION_GPU_CACHEABILITY_FAULT = 0x8a, + DRM_PANFROST_EXCEPTION_TRANSLATION_FAULT_0 = 0xc0, + DRM_PANFROST_EXCEPTION_TRANSLATION_FAULT_1 = 0xc1, + DRM_PANFROST_EXCEPTION_TRANSLATION_FAULT_2 = 0xc2, + DRM_PANFROST_EXCEPTION_TRANSLATION_FAULT_3 = 0xc3, + DRM_PANFROST_EXCEPTION_TRANSLATION_FAULT_4 = 0xc4, + DRM_PANFROST_EXCEPTION_TRANSLATION_FAULT_IDENTITY = 0xc7, + DRM_PANFROST_EXCEPTION_PERM_FAULT_0 = 0xc8, + DRM_PANFROST_EXCEPTION_PERM_FAULT_1 = 0xc9, + DRM_PANFROST_EXCEPTION_PERM_FAULT_2 = 0xca, + DRM_PANFROST_EXCEPTION_PERM_FAULT_3 = 0xcb, + DRM_PANFROST_EXCEPTION_TRANSTAB_BUS_FAULT_0 = 0xd0, + DRM_PANFROST_EXCEPTION_TRANSTAB_BUS_FAULT_1 = 0xd1, + DRM_PANFROST_EXCEPTION_TRANSTAB_BUS_FAULT_2 = 0xd2, + DRM_PANFROST_EXCEPTION_TRANSTAB_BUS_FAULT_3 = 0xd3, + DRM_PANFROST_EXCEPTION_ACCESS_FLAG_0 = 0xd8, + DRM_PANFROST_EXCEPTION_ACCESS_FLAG_1 = 0xd9, + DRM_PANFROST_EXCEPTION_ACCESS_FLAG_2 = 0xda, + DRM_PANFROST_EXCEPTION_ACCESS_FLAG_3 = 0xdb, + DRM_PANFROST_EXCEPTION_ADDR_SIZE_FAULT_IN0 = 0xe0, + DRM_PANFROST_EXCEPTION_ADDR_SIZE_FAULT_IN1 = 0xe1, + DRM_PANFROST_EXCEPTION_ADDR_SIZE_FAULT_IN2 = 0xe2, + DRM_PANFROST_EXCEPTION_ADDR_SIZE_FAULT_IN3 = 0xe3, + DRM_PANFROST_EXCEPTION_ADDR_SIZE_FAULT_OUT0 = 0xe4, + DRM_PANFROST_EXCEPTION_ADDR_SIZE_FAULT_OUT1 = 0xe5, + DRM_PANFROST_EXCEPTION_ADDR_SIZE_FAULT_OUT2 = 0xe6, + DRM_PANFROST_EXCEPTION_ADDR_SIZE_FAULT_OUT3 = 0xe7, + DRM_PANFROST_EXCEPTION_MEM_ATTR_FAULT_0 = 0xe8, + DRM_PANFROST_EXCEPTION_MEM_ATTR_FAULT_1 = 0xe9, + DRM_PANFROST_EXCEPTION_MEM_ATTR_FAULT_2 = 0xea, + DRM_PANFROST_EXCEPTION_MEM_ATTR_FAULT_3 = 0xeb, + DRM_PANFROST_EXCEPTION_MEM_ATTR_NONCACHE_0 = 0xec, + DRM_PANFROST_EXCEPTION_MEM_ATTR_NONCACHE_1 = 0xed, + DRM_PANFROST_EXCEPTION_MEM_ATTR_NONCACHE_2 = 0xee, + DRM_PANFROST_EXCEPTION_MEM_ATTR_NONCACHE_3 = 0xef, +}; + +static inline bool +panfrost_exception_is_fault(u32 exception_code) +{ + return exception_code > DRM_PANFROST_EXCEPTION_MAX_NON_FAULT; +} + +const char *panfrost_exception_name(u32 exception_code); +bool panfrost_exception_needs_reset(const struct panfrost_device *pfdev, + u32 exception_code); + +static inline void +panfrost_device_schedule_reset(struct panfrost_device *pfdev) +{ + atomic_set(&pfdev->reset.pending, 1); + queue_work(pfdev->reset.wq, &pfdev->reset.work); +} #endif diff --git a/drivers/gpu/drm/panfrost/panfrost_drv.c b/drivers/gpu/drm/panfrost/panfrost_drv.c index 075ec0ef746c..1ffaef5ec5ff 100644 --- a/drivers/gpu/drm/panfrost/panfrost_drv.c +++ b/drivers/gpu/drm/panfrost/panfrost_drv.c @@ -138,12 +138,6 @@ panfrost_lookup_bos(struct drm_device *dev, if (!job->bo_count) return 0; - job->implicit_fences = kvmalloc_array(job->bo_count, - sizeof(struct dma_fence *), - GFP_KERNEL | __GFP_ZERO); - if (!job->implicit_fences) - return -ENOMEM; - ret = drm_gem_objects_lookup(file_priv, (void __user *)(uintptr_t)args->bo_handles, job->bo_count, &job->bos); @@ -174,7 +168,7 @@ panfrost_lookup_bos(struct drm_device *dev, } /** - * panfrost_copy_in_sync() - Sets up job->in_fences[] with the sync objects + * panfrost_copy_in_sync() - Sets up job->deps with the sync objects * referenced by the job. * @dev: DRM device * @file_priv: DRM file for this fd @@ -194,22 +188,14 @@ panfrost_copy_in_sync(struct drm_device *dev, { u32 *handles; int ret = 0; - int i; + int i, in_fence_count; - job->in_fence_count = args->in_sync_count; + in_fence_count = args->in_sync_count; - if (!job->in_fence_count) + if (!in_fence_count) return 0; - job->in_fences = kvmalloc_array(job->in_fence_count, - sizeof(struct dma_fence *), - GFP_KERNEL | __GFP_ZERO); - if (!job->in_fences) { - DRM_DEBUG("Failed to allocate job in fences\n"); - return -ENOMEM; - } - - handles = kvmalloc_array(job->in_fence_count, sizeof(u32), GFP_KERNEL); + handles = kvmalloc_array(in_fence_count, sizeof(u32), GFP_KERNEL); if (!handles) { ret = -ENOMEM; DRM_DEBUG("Failed to allocate incoming syncobj handles\n"); @@ -218,16 +204,23 @@ panfrost_copy_in_sync(struct drm_device *dev, if (copy_from_user(handles, (void __user *)(uintptr_t)args->in_syncs, - job->in_fence_count * sizeof(u32))) { + in_fence_count * sizeof(u32))) { ret = -EFAULT; DRM_DEBUG("Failed to copy in syncobj handles\n"); goto fail; } - for (i = 0; i < job->in_fence_count; i++) { + for (i = 0; i < in_fence_count; i++) { + struct dma_fence *fence; + ret = drm_syncobj_find_fence(file_priv, handles[i], 0, 0, - &job->in_fences[i]); - if (ret == -EINVAL) + &fence); + if (ret) + goto fail; + + ret = drm_gem_fence_array_add(&job->deps, fence); + + if (ret) goto fail; } @@ -265,6 +258,8 @@ static int panfrost_ioctl_submit(struct drm_device *dev, void *data, kref_init(&job->refcount); + xa_init_flags(&job->deps, XA_FLAGS_ALLOC); + job->pfdev = pfdev; job->jc = args->jc; job->requirements = args->requirements; @@ -417,7 +412,7 @@ static int panfrost_ioctl_madvise(struct drm_device *dev, void *data, * anyway, so let's not bother. */ if (!list_is_singular(&bo->mappings.list) || - WARN_ON_ONCE(first->mmu != &priv->mmu)) { + WARN_ON_ONCE(first->mmu != priv->mmu)) { ret = -EINVAL; goto out_unlock_mappings; } @@ -449,32 +444,6 @@ int panfrost_unstable_ioctl_check(void) return 0; } -#define PFN_4G (SZ_4G >> PAGE_SHIFT) -#define PFN_4G_MASK (PFN_4G - 1) -#define PFN_16M (SZ_16M >> PAGE_SHIFT) - -static void panfrost_drm_mm_color_adjust(const struct drm_mm_node *node, - unsigned long color, - u64 *start, u64 *end) -{ - /* Executable buffers can't start or end on a 4GB boundary */ - if (!(color & PANFROST_BO_NOEXEC)) { - u64 next_seg; - - if ((*start & PFN_4G_MASK) == 0) - (*start)++; - - if ((*end & PFN_4G_MASK) == 0) - (*end)--; - - next_seg = ALIGN(*start, PFN_4G); - if (next_seg - *start <= PFN_16M) - *start = next_seg + 1; - - *end = min(*end, ALIGN(*start, PFN_4G) - 1); - } -} - static int panfrost_open(struct drm_device *dev, struct drm_file *file) { @@ -489,15 +458,11 @@ panfrost_open(struct drm_device *dev, struct drm_file *file) panfrost_priv->pfdev = pfdev; file->driver_priv = panfrost_priv; - spin_lock_init(&panfrost_priv->mm_lock); - - /* 4G enough for now. can be 48-bit */ - drm_mm_init(&panfrost_priv->mm, SZ_32M >> PAGE_SHIFT, (SZ_4G - SZ_32M) >> PAGE_SHIFT); - panfrost_priv->mm.color_adjust = panfrost_drm_mm_color_adjust; - - ret = panfrost_mmu_pgtable_alloc(panfrost_priv); - if (ret) - goto err_pgtable; + panfrost_priv->mmu = panfrost_mmu_ctx_create(pfdev); + if (IS_ERR(panfrost_priv->mmu)) { + ret = PTR_ERR(panfrost_priv->mmu); + goto err_free; + } ret = panfrost_job_open(panfrost_priv); if (ret) @@ -506,9 +471,8 @@ panfrost_open(struct drm_device *dev, struct drm_file *file) return 0; err_job: - panfrost_mmu_pgtable_free(panfrost_priv); -err_pgtable: - drm_mm_takedown(&panfrost_priv->mm); + panfrost_mmu_ctx_put(panfrost_priv->mmu); +err_free: kfree(panfrost_priv); return ret; } @@ -521,8 +485,7 @@ panfrost_postclose(struct drm_device *dev, struct drm_file *file) panfrost_perfcnt_close(file); panfrost_job_close(panfrost_priv); - panfrost_mmu_pgtable_free(panfrost_priv); - drm_mm_takedown(&panfrost_priv->mm); + panfrost_mmu_ctx_put(panfrost_priv->mmu); kfree(panfrost_priv); } diff --git a/drivers/gpu/drm/panfrost/panfrost_gem.c b/drivers/gpu/drm/panfrost/panfrost_gem.c index 3e0723bc36bd..23377481f4e3 100644 --- a/drivers/gpu/drm/panfrost/panfrost_gem.c +++ b/drivers/gpu/drm/panfrost/panfrost_gem.c @@ -60,7 +60,7 @@ panfrost_gem_mapping_get(struct panfrost_gem_object *bo, mutex_lock(&bo->mappings.lock); list_for_each_entry(iter, &bo->mappings.list, node) { - if (iter->mmu == &priv->mmu) { + if (iter->mmu == priv->mmu) { kref_get(&iter->refcount); mapping = iter; break; @@ -74,16 +74,13 @@ panfrost_gem_mapping_get(struct panfrost_gem_object *bo, static void panfrost_gem_teardown_mapping(struct panfrost_gem_mapping *mapping) { - struct panfrost_file_priv *priv; - if (mapping->active) panfrost_mmu_unmap(mapping); - priv = container_of(mapping->mmu, struct panfrost_file_priv, mmu); - spin_lock(&priv->mm_lock); + spin_lock(&mapping->mmu->mm_lock); if (drm_mm_node_allocated(&mapping->mmnode)) drm_mm_remove_node(&mapping->mmnode); - spin_unlock(&priv->mm_lock); + spin_unlock(&mapping->mmu->mm_lock); } static void panfrost_gem_mapping_release(struct kref *kref) @@ -94,6 +91,7 @@ static void panfrost_gem_mapping_release(struct kref *kref) panfrost_gem_teardown_mapping(mapping); drm_gem_object_put(&mapping->obj->base.base); + panfrost_mmu_ctx_put(mapping->mmu); kfree(mapping); } @@ -143,11 +141,11 @@ int panfrost_gem_open(struct drm_gem_object *obj, struct drm_file *file_priv) else align = size >= SZ_2M ? SZ_2M >> PAGE_SHIFT : 0; - mapping->mmu = &priv->mmu; - spin_lock(&priv->mm_lock); - ret = drm_mm_insert_node_generic(&priv->mm, &mapping->mmnode, + mapping->mmu = panfrost_mmu_ctx_get(priv->mmu); + spin_lock(&mapping->mmu->mm_lock); + ret = drm_mm_insert_node_generic(&mapping->mmu->mm, &mapping->mmnode, size >> PAGE_SHIFT, align, color, 0); - spin_unlock(&priv->mm_lock); + spin_unlock(&mapping->mmu->mm_lock); if (ret) goto err; @@ -176,7 +174,7 @@ void panfrost_gem_close(struct drm_gem_object *obj, struct drm_file *file_priv) mutex_lock(&bo->mappings.lock); list_for_each_entry(iter, &bo->mappings.list, node) { - if (iter->mmu == &priv->mmu) { + if (iter->mmu == priv->mmu) { mapping = iter; list_del(&iter->node); break; diff --git a/drivers/gpu/drm/panfrost/panfrost_gpu.c b/drivers/gpu/drm/panfrost/panfrost_gpu.c index 0e70e27fd8c3..bbe628b306ee 100644 --- a/drivers/gpu/drm/panfrost/panfrost_gpu.c +++ b/drivers/gpu/drm/panfrost/panfrost_gpu.c @@ -33,7 +33,7 @@ static irqreturn_t panfrost_gpu_irq_handler(int irq, void *data) address |= gpu_read(pfdev, GPU_FAULT_ADDRESS_LO); dev_warn(pfdev->dev, "GPU Fault 0x%08x (%s) at 0x%016llx\n", - fault_status & 0xFF, panfrost_exception_name(pfdev, fault_status), + fault_status, panfrost_exception_name(fault_status & 0xFF), address); if (state & GPU_IRQ_MULTIPLE_FAULT) diff --git a/drivers/gpu/drm/panfrost/panfrost_job.c b/drivers/gpu/drm/panfrost/panfrost_job.c index 2df3e999a38d..71a72fb50e6b 100644 --- a/drivers/gpu/drm/panfrost/panfrost_job.c +++ b/drivers/gpu/drm/panfrost/panfrost_job.c @@ -4,6 +4,7 @@ #include <linux/delay.h> #include <linux/interrupt.h> #include <linux/io.h> +#include <linux/iopoll.h> #include <linux/platform_device.h> #include <linux/pm_runtime.h> #include <linux/dma-resv.h> @@ -25,17 +26,8 @@ #define job_write(dev, reg, data) writel(data, dev->iomem + (reg)) #define job_read(dev, reg) readl(dev->iomem + (reg)) -enum panfrost_queue_status { - PANFROST_QUEUE_STATUS_ACTIVE, - PANFROST_QUEUE_STATUS_STOPPED, - PANFROST_QUEUE_STATUS_STARTING, - PANFROST_QUEUE_STATUS_FAULT_PENDING, -}; - struct panfrost_queue_state { struct drm_gpu_scheduler sched; - atomic_t status; - struct mutex lock; u64 fence_context; u64 emit_seqno; }; @@ -43,6 +35,7 @@ struct panfrost_queue_state { struct panfrost_job_slot { struct panfrost_queue_state queue[NUM_JOB_SLOTS]; spinlock_t job_lock; + int irq; }; static struct panfrost_job * @@ -148,9 +141,52 @@ static void panfrost_job_write_affinity(struct panfrost_device *pfdev, job_write(pfdev, JS_AFFINITY_NEXT_HI(js), affinity >> 32); } +static u32 +panfrost_get_job_chain_flag(const struct panfrost_job *job) +{ + struct panfrost_fence *f = to_panfrost_fence(job->done_fence); + + if (!panfrost_has_hw_feature(job->pfdev, HW_FEATURE_JOBCHAIN_DISAMBIGUATION)) + return 0; + + return (f->seqno & 1) ? JS_CONFIG_JOB_CHAIN_FLAG : 0; +} + +static struct panfrost_job * +panfrost_dequeue_job(struct panfrost_device *pfdev, int slot) +{ + struct panfrost_job *job = pfdev->jobs[slot][0]; + + WARN_ON(!job); + pfdev->jobs[slot][0] = pfdev->jobs[slot][1]; + pfdev->jobs[slot][1] = NULL; + + return job; +} + +static unsigned int +panfrost_enqueue_job(struct panfrost_device *pfdev, int slot, + struct panfrost_job *job) +{ + if (WARN_ON(!job)) + return 0; + + if (!pfdev->jobs[slot][0]) { + pfdev->jobs[slot][0] = job; + return 0; + } + + WARN_ON(pfdev->jobs[slot][1]); + pfdev->jobs[slot][1] = job; + WARN_ON(panfrost_get_job_chain_flag(job) == + panfrost_get_job_chain_flag(pfdev->jobs[slot][0])); + return 1; +} + static void panfrost_job_hw_submit(struct panfrost_job *job, int js) { struct panfrost_device *pfdev = job->pfdev; + unsigned int subslot; u32 cfg; u64 jc_head = job->jc; int ret; @@ -165,7 +201,7 @@ static void panfrost_job_hw_submit(struct panfrost_job *job, int js) return; } - cfg = panfrost_mmu_as_get(pfdev, &job->file_priv->mmu); + cfg = panfrost_mmu_as_get(pfdev, job->file_priv->mmu); job_write(pfdev, JS_HEAD_NEXT_LO(js), jc_head & 0xFFFFFFFF); job_write(pfdev, JS_HEAD_NEXT_HI(js), jc_head >> 32); @@ -176,7 +212,8 @@ static void panfrost_job_hw_submit(struct panfrost_job *job, int js) * start */ cfg |= JS_CONFIG_THREAD_PRI(8) | JS_CONFIG_START_FLUSH_CLEAN_INVALIDATE | - JS_CONFIG_END_FLUSH_CLEAN_INVALIDATE; + JS_CONFIG_END_FLUSH_CLEAN_INVALIDATE | + panfrost_get_job_chain_flag(job); if (panfrost_has_hw_feature(pfdev, HW_FEATURE_FLUSH_REDUCTION)) cfg |= JS_CONFIG_ENABLE_FLUSH_REDUCTION; @@ -190,20 +227,33 @@ static void panfrost_job_hw_submit(struct panfrost_job *job, int js) job_write(pfdev, JS_FLUSH_ID_NEXT(js), job->flush_id); /* GO ! */ - dev_dbg(pfdev->dev, "JS: Submitting atom %p to js[%d] with head=0x%llx", - job, js, jc_head); - job_write(pfdev, JS_COMMAND_NEXT(js), JS_COMMAND_START); + spin_lock(&pfdev->js->job_lock); + subslot = panfrost_enqueue_job(pfdev, js, job); + /* Don't queue the job if a reset is in progress */ + if (!atomic_read(&pfdev->reset.pending)) { + job_write(pfdev, JS_COMMAND_NEXT(js), JS_COMMAND_START); + dev_dbg(pfdev->dev, + "JS: Submitting atom %p to js[%d][%d] with head=0x%llx AS %d", + job, js, subslot, jc_head, cfg & 0xf); + } + spin_unlock(&pfdev->js->job_lock); } -static void panfrost_acquire_object_fences(struct drm_gem_object **bos, - int bo_count, - struct dma_fence **implicit_fences) +static int panfrost_acquire_object_fences(struct drm_gem_object **bos, + int bo_count, + struct xarray *deps) { - int i; + int i, ret; - for (i = 0; i < bo_count; i++) - implicit_fences[i] = dma_resv_get_excl_unlocked(bos[i]->resv); + for (i = 0; i < bo_count; i++) { + /* panfrost always uses write mode in its current uapi */ + ret = drm_gem_fence_array_add_implicit(deps, bos[i], true); + if (ret) + return ret; + } + + return 0; } static void panfrost_attach_object_fences(struct drm_gem_object **bos, @@ -224,14 +274,13 @@ int panfrost_job_push(struct panfrost_job *job) struct ww_acquire_ctx acquire_ctx; int ret = 0; - mutex_lock(&pfdev->sched_lock); ret = drm_gem_lock_reservations(job->bos, job->bo_count, &acquire_ctx); - if (ret) { - mutex_unlock(&pfdev->sched_lock); + if (ret) return ret; - } + + mutex_lock(&pfdev->sched_lock); ret = drm_sched_job_init(&job->base, entity, NULL); if (ret) { @@ -241,10 +290,14 @@ int panfrost_job_push(struct panfrost_job *job) job->render_done_fence = dma_fence_get(&job->base.s_fence->finished); - kref_get(&job->refcount); /* put by scheduler job completion */ + ret = panfrost_acquire_object_fences(job->bos, job->bo_count, + &job->deps); + if (ret) { + mutex_unlock(&pfdev->sched_lock); + goto unlock; + } - panfrost_acquire_object_fences(job->bos, job->bo_count, - job->implicit_fences); + kref_get(&job->refcount); /* put by scheduler job completion */ drm_sched_entity_push_job(&job->base, entity); @@ -263,18 +316,15 @@ static void panfrost_job_cleanup(struct kref *ref) { struct panfrost_job *job = container_of(ref, struct panfrost_job, refcount); + struct dma_fence *fence; + unsigned long index; unsigned int i; - if (job->in_fences) { - for (i = 0; i < job->in_fence_count; i++) - dma_fence_put(job->in_fences[i]); - kvfree(job->in_fences); - } - if (job->implicit_fences) { - for (i = 0; i < job->bo_count; i++) - dma_fence_put(job->implicit_fences[i]); - kvfree(job->implicit_fences); + xa_for_each(&job->deps, index, fence) { + dma_fence_put(fence); } + xa_destroy(&job->deps); + dma_fence_put(job->done_fence); dma_fence_put(job->render_done_fence); @@ -317,26 +367,9 @@ static struct dma_fence *panfrost_job_dependency(struct drm_sched_job *sched_job struct drm_sched_entity *s_entity) { struct panfrost_job *job = to_panfrost_job(sched_job); - struct dma_fence *fence; - unsigned int i; - /* Explicit fences */ - for (i = 0; i < job->in_fence_count; i++) { - if (job->in_fences[i]) { - fence = job->in_fences[i]; - job->in_fences[i] = NULL; - return fence; - } - } - - /* Implicit fences, max. one per BO */ - for (i = 0; i < job->bo_count; i++) { - if (job->implicit_fences[i]) { - fence = job->implicit_fences[i]; - job->implicit_fences[i] = NULL; - return fence; - } - } + if (!xa_empty(&job->deps)) + return xa_erase(&job->deps, job->last_dep++); return NULL; } @@ -351,11 +384,15 @@ static struct dma_fence *panfrost_job_run(struct drm_sched_job *sched_job) if (unlikely(job->base.s_fence->finished.error)) return NULL; - pfdev->jobs[slot] = job; + /* Nothing to execute: can happen if the job has finished while + * we were resetting the GPU. + */ + if (!job->jc) + return NULL; fence = panfrost_fence_create(pfdev, slot); if (IS_ERR(fence)) - return NULL; + return fence; if (job->done_fence) dma_fence_put(job->done_fence); @@ -379,57 +416,314 @@ void panfrost_job_enable_interrupts(struct panfrost_device *pfdev) job_write(pfdev, JOB_INT_MASK, irq_mask); } -static bool panfrost_scheduler_stop(struct panfrost_queue_state *queue, - struct drm_sched_job *bad) +static void panfrost_job_handle_err(struct panfrost_device *pfdev, + struct panfrost_job *job, + unsigned int js) { - enum panfrost_queue_status old_status; - bool stopped = false; + u32 js_status = job_read(pfdev, JS_STATUS(js)); + const char *exception_name = panfrost_exception_name(js_status); + bool signal_fence = true; + + if (!panfrost_exception_is_fault(js_status)) { + dev_dbg(pfdev->dev, "js event, js=%d, status=%s, head=0x%x, tail=0x%x", + js, exception_name, + job_read(pfdev, JS_HEAD_LO(js)), + job_read(pfdev, JS_TAIL_LO(js))); + } else { + dev_err(pfdev->dev, "js fault, js=%d, status=%s, head=0x%x, tail=0x%x", + js, exception_name, + job_read(pfdev, JS_HEAD_LO(js)), + job_read(pfdev, JS_TAIL_LO(js))); + } - mutex_lock(&queue->lock); - old_status = atomic_xchg(&queue->status, - PANFROST_QUEUE_STATUS_STOPPED); - if (old_status == PANFROST_QUEUE_STATUS_STOPPED) - goto out; + if (js_status == DRM_PANFROST_EXCEPTION_STOPPED) { + /* Update the job head so we can resume */ + job->jc = job_read(pfdev, JS_TAIL_LO(js)) | + ((u64)job_read(pfdev, JS_TAIL_HI(js)) << 32); + + /* The job will be resumed, don't signal the fence */ + signal_fence = false; + } else if (js_status == DRM_PANFROST_EXCEPTION_TERMINATED) { + /* Job has been hard-stopped, flag it as canceled */ + dma_fence_set_error(job->done_fence, -ECANCELED); + job->jc = 0; + } else if (panfrost_exception_is_fault(js_status)) { + /* We might want to provide finer-grained error code based on + * the exception type, but unconditionally setting to EINVAL + * is good enough for now. + */ + dma_fence_set_error(job->done_fence, -EINVAL); + job->jc = 0; + } - WARN_ON(old_status != PANFROST_QUEUE_STATUS_ACTIVE); - drm_sched_stop(&queue->sched, bad); - if (bad) - drm_sched_increase_karma(bad); + panfrost_mmu_as_put(pfdev, job->file_priv->mmu); + panfrost_devfreq_record_idle(&pfdev->pfdevfreq); - stopped = true; + if (signal_fence) + dma_fence_signal_locked(job->done_fence); - /* - * Set the timeout to max so the timer doesn't get started - * when we return from the timeout handler (restored in - * panfrost_scheduler_start()). + pm_runtime_put_autosuspend(pfdev->dev); + + if (panfrost_exception_needs_reset(pfdev, js_status)) { + atomic_set(&pfdev->reset.pending, 1); + drm_sched_fault(&pfdev->js->queue[js].sched); + } +} + +static void panfrost_job_handle_done(struct panfrost_device *pfdev, + struct panfrost_job *job) +{ + /* Set ->jc to 0 to avoid re-submitting an already finished job (can + * happen when we receive the DONE interrupt while doing a GPU reset). + */ + job->jc = 0; + panfrost_mmu_as_put(pfdev, job->file_priv->mmu); + panfrost_devfreq_record_idle(&pfdev->pfdevfreq); + + dma_fence_signal_locked(job->done_fence); + pm_runtime_put_autosuspend(pfdev->dev); +} + +static void panfrost_job_handle_irq(struct panfrost_device *pfdev, u32 status) +{ + struct panfrost_job *done[NUM_JOB_SLOTS][2] = {}; + struct panfrost_job *failed[NUM_JOB_SLOTS] = {}; + u32 js_state = 0, js_events = 0; + unsigned int i, j; + + /* First we collect all failed/done jobs. */ + while (status) { + u32 js_state_mask = 0; + + for (j = 0; j < NUM_JOB_SLOTS; j++) { + if (status & MK_JS_MASK(j)) + js_state_mask |= MK_JS_MASK(j); + + if (status & JOB_INT_MASK_DONE(j)) { + if (done[j][0]) + done[j][1] = panfrost_dequeue_job(pfdev, j); + else + done[j][0] = panfrost_dequeue_job(pfdev, j); + } + + if (status & JOB_INT_MASK_ERR(j)) { + /* Cancel the next submission. Will be submitted + * after we're done handling this failure if + * there's no reset pending. + */ + job_write(pfdev, JS_COMMAND_NEXT(j), JS_COMMAND_NOP); + failed[j] = panfrost_dequeue_job(pfdev, j); + } + } + + /* JS_STATE is sampled when JOB_INT_CLEAR is written. + * For each BIT(slot) or BIT(slot + 16) bit written to + * JOB_INT_CLEAR, the corresponding bits in JS_STATE + * (BIT(slot) and BIT(slot + 16)) are updated, but this + * is racy. If we only have one job done at the time we + * read JOB_INT_RAWSTAT but the second job fails before we + * clear the status, we end up with a status containing + * only the DONE bit and consider both jobs as DONE since + * JS_STATE reports both NEXT and CURRENT as inactive. + * To prevent that, let's repeat this clear+read steps + * until status is 0. + */ + job_write(pfdev, JOB_INT_CLEAR, status); + js_state &= ~js_state_mask; + js_state |= job_read(pfdev, JOB_INT_JS_STATE) & js_state_mask; + js_events |= status; + status = job_read(pfdev, JOB_INT_RAWSTAT); + } + + /* Then we handle the dequeued jobs. */ + for (j = 0; j < NUM_JOB_SLOTS; j++) { + if (!(js_events & MK_JS_MASK(j))) + continue; + + if (failed[j]) { + panfrost_job_handle_err(pfdev, failed[j], j); + } else if (pfdev->jobs[j][0] && !(js_state & MK_JS_MASK(j))) { + /* When the current job doesn't fail, the JM dequeues + * the next job without waiting for an ACK, this means + * we can have 2 jobs dequeued and only catch the + * interrupt when the second one is done. If both slots + * are inactive, but one job remains in pfdev->jobs[j], + * consider it done. Of course that doesn't apply if a + * failure happened since we cancelled execution of the + * job in _NEXT (see above). + */ + if (WARN_ON(!done[j][0])) + done[j][0] = panfrost_dequeue_job(pfdev, j); + else + done[j][1] = panfrost_dequeue_job(pfdev, j); + } + + for (i = 0; i < ARRAY_SIZE(done[0]) && done[j][i]; i++) + panfrost_job_handle_done(pfdev, done[j][i]); + } + + /* And finally we requeue jobs that were waiting in the second slot + * and have been stopped if we detected a failure on the first slot. */ - queue->sched.timeout = MAX_SCHEDULE_TIMEOUT; + for (j = 0; j < NUM_JOB_SLOTS; j++) { + if (!(js_events & MK_JS_MASK(j))) + continue; + + if (!failed[j] || !pfdev->jobs[j][0]) + continue; + + if (pfdev->jobs[j][0]->jc == 0) { + /* The job was cancelled, signal the fence now */ + struct panfrost_job *canceled = panfrost_dequeue_job(pfdev, j); + + dma_fence_set_error(canceled->done_fence, -ECANCELED); + panfrost_job_handle_done(pfdev, canceled); + } else if (!atomic_read(&pfdev->reset.pending)) { + /* Requeue the job we removed if no reset is pending */ + job_write(pfdev, JS_COMMAND_NEXT(j), JS_COMMAND_START); + } + } +} -out: - mutex_unlock(&queue->lock); +static void panfrost_job_handle_irqs(struct panfrost_device *pfdev) +{ + u32 status = job_read(pfdev, JOB_INT_RAWSTAT); + + while (status) { + pm_runtime_mark_last_busy(pfdev->dev); - return stopped; + spin_lock(&pfdev->js->job_lock); + panfrost_job_handle_irq(pfdev, status); + spin_unlock(&pfdev->js->job_lock); + status = job_read(pfdev, JOB_INT_RAWSTAT); + } } -static void panfrost_scheduler_start(struct panfrost_queue_state *queue) +static u32 panfrost_active_slots(struct panfrost_device *pfdev, + u32 *js_state_mask, u32 js_state) { - enum panfrost_queue_status old_status; + u32 rawstat; - mutex_lock(&queue->lock); - old_status = atomic_xchg(&queue->status, - PANFROST_QUEUE_STATUS_STARTING); - WARN_ON(old_status != PANFROST_QUEUE_STATUS_STOPPED); + if (!(js_state & *js_state_mask)) + return 0; - /* Restore the original timeout before starting the scheduler. */ - queue->sched.timeout = msecs_to_jiffies(JOB_TIMEOUT_MS); - drm_sched_resubmit_jobs(&queue->sched); - drm_sched_start(&queue->sched, true); - old_status = atomic_xchg(&queue->status, - PANFROST_QUEUE_STATUS_ACTIVE); - if (old_status == PANFROST_QUEUE_STATUS_FAULT_PENDING) - drm_sched_fault(&queue->sched); + rawstat = job_read(pfdev, JOB_INT_RAWSTAT); + if (rawstat) { + unsigned int i; + + for (i = 0; i < NUM_JOB_SLOTS; i++) { + if (rawstat & MK_JS_MASK(i)) + *js_state_mask &= ~MK_JS_MASK(i); + } + } - mutex_unlock(&queue->lock); + return js_state & *js_state_mask; +} + +static void +panfrost_reset(struct panfrost_device *pfdev, + struct drm_sched_job *bad) +{ + u32 js_state, js_state_mask = 0xffffffff; + unsigned int i, j; + bool cookie; + int ret; + + if (!atomic_read(&pfdev->reset.pending)) + return; + + /* Stop the schedulers. + * + * FIXME: We temporarily get out of the dma_fence_signalling section + * because the cleanup path generate lockdep splats when taking locks + * to release job resources. We should rework the code to follow this + * pattern: + * + * try_lock + * if (locked) + * release + * else + * schedule_work_to_release_later + */ + for (i = 0; i < NUM_JOB_SLOTS; i++) + drm_sched_stop(&pfdev->js->queue[i].sched, bad); + + cookie = dma_fence_begin_signalling(); + + if (bad) + drm_sched_increase_karma(bad); + + /* Mask job interrupts and synchronize to make sure we won't be + * interrupted during our reset. + */ + job_write(pfdev, JOB_INT_MASK, 0); + synchronize_irq(pfdev->js->irq); + + for (i = 0; i < NUM_JOB_SLOTS; i++) { + /* Cancel the next job and soft-stop the running job. */ + job_write(pfdev, JS_COMMAND_NEXT(i), JS_COMMAND_NOP); + job_write(pfdev, JS_COMMAND(i), JS_COMMAND_SOFT_STOP); + } + + /* Wait at most 10ms for soft-stops to complete */ + ret = readl_poll_timeout(pfdev->iomem + JOB_INT_JS_STATE, js_state, + !panfrost_active_slots(pfdev, &js_state_mask, js_state), + 10, 10000); + + if (ret) + dev_err(pfdev->dev, "Soft-stop failed\n"); + + /* Handle the remaining interrupts before we reset. */ + panfrost_job_handle_irqs(pfdev); + + /* Remaining interrupts have been handled, but we might still have + * stuck jobs. Let's make sure the PM counters stay balanced by + * manually calling pm_runtime_put_noidle() and + * panfrost_devfreq_record_idle() for each stuck job. + */ + spin_lock(&pfdev->js->job_lock); + for (i = 0; i < NUM_JOB_SLOTS; i++) { + for (j = 0; j < ARRAY_SIZE(pfdev->jobs[0]) && pfdev->jobs[i][j]; j++) { + pm_runtime_put_noidle(pfdev->dev); + panfrost_devfreq_record_idle(&pfdev->pfdevfreq); + } + } + memset(pfdev->jobs, 0, sizeof(pfdev->jobs)); + spin_unlock(&pfdev->js->job_lock); + + /* Proceed with reset now. */ + panfrost_device_reset(pfdev); + + /* panfrost_device_reset() unmasks job interrupts, but we want to + * keep them masked a bit longer. + */ + job_write(pfdev, JOB_INT_MASK, 0); + + /* GPU has been reset, we can clear the reset pending bit. */ + atomic_set(&pfdev->reset.pending, 0); + + /* Now resubmit jobs that were previously queued but didn't have a + * chance to finish. + * FIXME: We temporarily get out of the DMA fence signalling section + * while resubmitting jobs because the job submission logic will + * allocate memory with the GFP_KERNEL flag which can trigger memory + * reclaim and exposes a lock ordering issue. + */ + dma_fence_end_signalling(cookie); + for (i = 0; i < NUM_JOB_SLOTS; i++) + drm_sched_resubmit_jobs(&pfdev->js->queue[i].sched); + cookie = dma_fence_begin_signalling(); + + /* Restart the schedulers */ + for (i = 0; i < NUM_JOB_SLOTS; i++) + drm_sched_start(&pfdev->js->queue[i].sched, true); + + /* Re-enable job interrupts now that everything has been restarted. */ + job_write(pfdev, JOB_INT_MASK, + GENMASK(16 + NUM_JOB_SLOTS - 1, 16) | + GENMASK(NUM_JOB_SLOTS - 1, 0)); + + dma_fence_end_signalling(cookie); } static enum drm_gpu_sched_stat panfrost_job_timedout(struct drm_sched_job @@ -454,17 +748,20 @@ static enum drm_gpu_sched_stat panfrost_job_timedout(struct drm_sched_job job_read(pfdev, JS_TAIL_LO(js)), sched_job); - /* Scheduler is already stopped, nothing to do. */ - if (!panfrost_scheduler_stop(&pfdev->js->queue[js], sched_job)) - return DRM_GPU_SCHED_STAT_NOMINAL; - - /* Schedule a reset if there's no reset in progress. */ - if (!atomic_xchg(&pfdev->reset.pending, 1)) - schedule_work(&pfdev->reset.work); + atomic_set(&pfdev->reset.pending, 1); + panfrost_reset(pfdev, sched_job); return DRM_GPU_SCHED_STAT_NOMINAL; } +static void panfrost_reset_work(struct work_struct *work) +{ + struct panfrost_device *pfdev; + + pfdev = container_of(work, struct panfrost_device, reset.work); + panfrost_reset(pfdev, NULL); +} + static const struct drm_sched_backend_ops panfrost_sched_ops = { .dependency = panfrost_job_dependency, .run_job = panfrost_job_run, @@ -472,161 +769,75 @@ static const struct drm_sched_backend_ops panfrost_sched_ops = { .free_job = panfrost_job_free }; -static irqreturn_t panfrost_job_irq_handler(int irq, void *data) +static irqreturn_t panfrost_job_irq_handler_thread(int irq, void *data) { struct panfrost_device *pfdev = data; - u32 status = job_read(pfdev, JOB_INT_STAT); - int j; - - dev_dbg(pfdev->dev, "jobslot irq status=%x\n", status); - - if (!status) - return IRQ_NONE; - - pm_runtime_mark_last_busy(pfdev->dev); - - for (j = 0; status; j++) { - u32 mask = MK_JS_MASK(j); - - if (!(status & mask)) - continue; - - job_write(pfdev, JOB_INT_CLEAR, mask); - - if (status & JOB_INT_MASK_ERR(j)) { - enum panfrost_queue_status old_status; - - job_write(pfdev, JS_COMMAND_NEXT(j), JS_COMMAND_NOP); - - dev_err(pfdev->dev, "js fault, js=%d, status=%s, head=0x%x, tail=0x%x", - j, - panfrost_exception_name(pfdev, job_read(pfdev, JS_STATUS(j))), - job_read(pfdev, JS_HEAD_LO(j)), - job_read(pfdev, JS_TAIL_LO(j))); - - /* - * When the queue is being restarted we don't report - * faults directly to avoid races between the timeout - * and reset handlers. panfrost_scheduler_start() will - * call drm_sched_fault() after the queue has been - * started if status == FAULT_PENDING. - */ - old_status = atomic_cmpxchg(&pfdev->js->queue[j].status, - PANFROST_QUEUE_STATUS_STARTING, - PANFROST_QUEUE_STATUS_FAULT_PENDING); - if (old_status == PANFROST_QUEUE_STATUS_ACTIVE) - drm_sched_fault(&pfdev->js->queue[j].sched); - } - - if (status & JOB_INT_MASK_DONE(j)) { - struct panfrost_job *job; - - spin_lock(&pfdev->js->job_lock); - job = pfdev->jobs[j]; - /* Only NULL if job timeout occurred */ - if (job) { - pfdev->jobs[j] = NULL; - - panfrost_mmu_as_put(pfdev, &job->file_priv->mmu); - panfrost_devfreq_record_idle(&pfdev->pfdevfreq); - - dma_fence_signal_locked(job->done_fence); - pm_runtime_put_autosuspend(pfdev->dev); - } - spin_unlock(&pfdev->js->job_lock); - } - - status &= ~mask; - } + panfrost_job_handle_irqs(pfdev); + job_write(pfdev, JOB_INT_MASK, + GENMASK(16 + NUM_JOB_SLOTS - 1, 16) | + GENMASK(NUM_JOB_SLOTS - 1, 0)); return IRQ_HANDLED; } -static void panfrost_reset(struct work_struct *work) +static irqreturn_t panfrost_job_irq_handler(int irq, void *data) { - struct panfrost_device *pfdev = container_of(work, - struct panfrost_device, - reset.work); - unsigned long flags; - unsigned int i; - bool cookie; - - cookie = dma_fence_begin_signalling(); - for (i = 0; i < NUM_JOB_SLOTS; i++) { - /* - * We want pending timeouts to be handled before we attempt - * to stop the scheduler. If we don't do that and the timeout - * handler is in flight, it might have removed the bad job - * from the list, and we'll lose this job if the reset handler - * enters the critical section in panfrost_scheduler_stop() - * before the timeout handler. - * - * Timeout is set to MAX_SCHEDULE_TIMEOUT - 1 because we need - * something big enough to make sure the timer will not expire - * before we manage to stop the scheduler, but we can't use - * MAX_SCHEDULE_TIMEOUT because drm_sched_get_cleanup_job() - * considers that as 'timer is not running' and will dequeue - * the job without making sure the timeout handler is not - * running. - */ - pfdev->js->queue[i].sched.timeout = MAX_SCHEDULE_TIMEOUT - 1; - cancel_delayed_work_sync(&pfdev->js->queue[i].sched.work_tdr); - panfrost_scheduler_stop(&pfdev->js->queue[i], NULL); - } - - /* All timers have been stopped, we can safely reset the pending state. */ - atomic_set(&pfdev->reset.pending, 0); - - spin_lock_irqsave(&pfdev->js->job_lock, flags); - for (i = 0; i < NUM_JOB_SLOTS; i++) { - if (pfdev->jobs[i]) { - pm_runtime_put_noidle(pfdev->dev); - panfrost_devfreq_record_idle(&pfdev->pfdevfreq); - pfdev->jobs[i] = NULL; - } - } - spin_unlock_irqrestore(&pfdev->js->job_lock, flags); - - panfrost_device_reset(pfdev); + struct panfrost_device *pfdev = data; + u32 status = job_read(pfdev, JOB_INT_STAT); - for (i = 0; i < NUM_JOB_SLOTS; i++) - panfrost_scheduler_start(&pfdev->js->queue[i]); + if (!status) + return IRQ_NONE; - dma_fence_end_signalling(cookie); + job_write(pfdev, JOB_INT_MASK, 0); + return IRQ_WAKE_THREAD; } int panfrost_job_init(struct panfrost_device *pfdev) { struct panfrost_job_slot *js; - int ret, j, irq; + unsigned int nentries = 2; + int ret, j; - INIT_WORK(&pfdev->reset.work, panfrost_reset); + /* All GPUs have two entries per queue, but without jobchain + * disambiguation stopping the right job in the close path is tricky, + * so let's just advertise one entry in that case. + */ + if (!panfrost_has_hw_feature(pfdev, HW_FEATURE_JOBCHAIN_DISAMBIGUATION)) + nentries = 1; pfdev->js = js = devm_kzalloc(pfdev->dev, sizeof(*js), GFP_KERNEL); if (!js) return -ENOMEM; + INIT_WORK(&pfdev->reset.work, panfrost_reset_work); spin_lock_init(&js->job_lock); - irq = platform_get_irq_byname(to_platform_device(pfdev->dev), "job"); - if (irq <= 0) + js->irq = platform_get_irq_byname(to_platform_device(pfdev->dev), "job"); + if (js->irq <= 0) return -ENODEV; - ret = devm_request_irq(pfdev->dev, irq, panfrost_job_irq_handler, - IRQF_SHARED, KBUILD_MODNAME "-job", pfdev); + ret = devm_request_threaded_irq(pfdev->dev, js->irq, + panfrost_job_irq_handler, + panfrost_job_irq_handler_thread, + IRQF_SHARED, KBUILD_MODNAME "-job", + pfdev); if (ret) { dev_err(pfdev->dev, "failed to request job irq"); return ret; } - for (j = 0; j < NUM_JOB_SLOTS; j++) { - mutex_init(&js->queue[j].lock); + pfdev->reset.wq = alloc_ordered_workqueue("panfrost-reset", 0); + if (!pfdev->reset.wq) + return -ENOMEM; + for (j = 0; j < NUM_JOB_SLOTS; j++) { js->queue[j].fence_context = dma_fence_context_alloc(1); ret = drm_sched_init(&js->queue[j].sched, &panfrost_sched_ops, - 1, 0, msecs_to_jiffies(JOB_TIMEOUT_MS), + nentries, 0, + msecs_to_jiffies(JOB_TIMEOUT_MS), + pfdev->reset.wq, NULL, "pan_js"); if (ret) { dev_err(pfdev->dev, "Failed to create scheduler: %d.", ret); @@ -642,6 +853,7 @@ err_sched: for (j--; j >= 0; j--) drm_sched_fini(&js->queue[j].sched); + destroy_workqueue(pfdev->reset.wq); return ret; } @@ -654,9 +866,10 @@ void panfrost_job_fini(struct panfrost_device *pfdev) for (j = 0; j < NUM_JOB_SLOTS; j++) { drm_sched_fini(&js->queue[j].sched); - mutex_destroy(&js->queue[j].lock); } + cancel_work_sync(&pfdev->reset.work); + destroy_workqueue(pfdev->reset.wq); } int panfrost_job_open(struct panfrost_file_priv *panfrost_priv) @@ -679,10 +892,46 @@ int panfrost_job_open(struct panfrost_file_priv *panfrost_priv) void panfrost_job_close(struct panfrost_file_priv *panfrost_priv) { + struct panfrost_device *pfdev = panfrost_priv->pfdev; int i; for (i = 0; i < NUM_JOB_SLOTS; i++) drm_sched_entity_destroy(&panfrost_priv->sched_entity[i]); + + /* Kill in-flight jobs */ + spin_lock(&pfdev->js->job_lock); + for (i = 0; i < NUM_JOB_SLOTS; i++) { + struct drm_sched_entity *entity = &panfrost_priv->sched_entity[i]; + int j; + + for (j = ARRAY_SIZE(pfdev->jobs[0]) - 1; j >= 0; j--) { + struct panfrost_job *job = pfdev->jobs[i][j]; + u32 cmd; + + if (!job || job->base.entity != entity) + continue; + + if (j == 1) { + /* Try to cancel the job before it starts */ + job_write(pfdev, JS_COMMAND_NEXT(i), JS_COMMAND_NOP); + /* Reset the job head so it doesn't get restarted if + * the job in the first slot failed. + */ + job->jc = 0; + } + + if (panfrost_has_hw_feature(pfdev, HW_FEATURE_JOBCHAIN_DISAMBIGUATION)) { + cmd = panfrost_get_job_chain_flag(job) ? + JS_COMMAND_HARD_STOP_1 : + JS_COMMAND_HARD_STOP_0; + } else { + cmd = JS_COMMAND_HARD_STOP; + } + + job_write(pfdev, JS_COMMAND(i), cmd); + } + } + spin_unlock(&pfdev->js->job_lock); } int panfrost_job_is_idle(struct panfrost_device *pfdev) diff --git a/drivers/gpu/drm/panfrost/panfrost_job.h b/drivers/gpu/drm/panfrost/panfrost_job.h index bbd3ba97ff67..82306a03b57e 100644 --- a/drivers/gpu/drm/panfrost/panfrost_job.h +++ b/drivers/gpu/drm/panfrost/panfrost_job.h @@ -19,9 +19,9 @@ struct panfrost_job { struct panfrost_device *pfdev; struct panfrost_file_priv *file_priv; - /* Optional fences userspace can pass in for the job to depend on. */ - struct dma_fence **in_fences; - u32 in_fence_count; + /* Contains both explicit and implicit fences */ + struct xarray deps; + unsigned long last_dep; /* Fence to be signaled by IRQ handler when the job is complete. */ struct dma_fence *done_fence; @@ -30,8 +30,6 @@ struct panfrost_job { __u32 requirements; __u32 flush_id; - /* Exclusive fences we have taken from the BOs to wait for */ - struct dma_fence **implicit_fences; struct panfrost_gem_mapping **mappings; struct drm_gem_object **bos; u32 bo_count; diff --git a/drivers/gpu/drm/panfrost/panfrost_mmu.c b/drivers/gpu/drm/panfrost/panfrost_mmu.c index 0581186ebfb3..0da5b3100ab1 100644 --- a/drivers/gpu/drm/panfrost/panfrost_mmu.c +++ b/drivers/gpu/drm/panfrost/panfrost_mmu.c @@ -1,5 +1,8 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright 2019 Linaro, Ltd, Rob Herring <robh@kernel.org> */ + +#include <drm/panfrost_drm.h> + #include <linux/atomic.h> #include <linux/bitfield.h> #include <linux/delay.h> @@ -31,10 +34,13 @@ static int wait_ready(struct panfrost_device *pfdev, u32 as_nr) /* Wait for the MMU status to indicate there is no active command, in * case one is pending. */ ret = readl_relaxed_poll_timeout_atomic(pfdev->iomem + AS_STATUS(as_nr), - val, !(val & AS_STATUS_AS_ACTIVE), 10, 1000); + val, !(val & AS_STATUS_AS_ACTIVE), 10, 100000); - if (ret) + if (ret) { + /* The GPU hung, let's trigger a reset */ + panfrost_device_schedule_reset(pfdev); dev_err(pfdev->dev, "AS_ACTIVE bit stuck\n"); + } return ret; } @@ -151,6 +157,7 @@ u32 panfrost_mmu_as_get(struct panfrost_device *pfdev, struct panfrost_mmu *mmu) as = mmu->as; if (as >= 0) { int en = atomic_inc_return(&mmu->as_count); + u32 mask = BIT(as) | BIT(16 + as); /* * AS can be retained by active jobs or a perfcnt context, @@ -159,6 +166,18 @@ u32 panfrost_mmu_as_get(struct panfrost_device *pfdev, struct panfrost_mmu *mmu) WARN_ON(en >= (NUM_JOB_SLOTS + 1)); list_move(&mmu->list, &pfdev->as_lru_list); + + if (pfdev->as_faulty_mask & mask) { + /* Unhandled pagefault on this AS, the MMU was + * disabled. We need to re-enable the MMU after + * clearing+unmasking the AS interrupts. + */ + mmu_write(pfdev, MMU_INT_CLEAR, mask); + mmu_write(pfdev, MMU_INT_MASK, ~pfdev->as_faulty_mask); + pfdev->as_faulty_mask &= ~mask; + panfrost_mmu_enable(pfdev, mmu); + } + goto out; } @@ -208,6 +227,7 @@ void panfrost_mmu_reset(struct panfrost_device *pfdev) spin_lock(&pfdev->as_lock); pfdev->as_alloc_mask = 0; + pfdev->as_faulty_mask = 0; list_for_each_entry_safe(mmu, mmu_tmp, &pfdev->as_lru_list, list) { mmu->as = -1; @@ -337,7 +357,7 @@ static void mmu_tlb_inv_context_s1(void *cookie) static void mmu_tlb_sync_context(void *cookie) { - //struct panfrost_device *pfdev = cookie; + //struct panfrost_mmu *mmu = cookie; // TODO: Wait 1000 GPU cycles for HW_ISSUE_6367/T60X } @@ -352,57 +372,10 @@ static const struct iommu_flush_ops mmu_tlb_ops = { .tlb_flush_walk = mmu_tlb_flush_walk, }; -int panfrost_mmu_pgtable_alloc(struct panfrost_file_priv *priv) -{ - struct panfrost_mmu *mmu = &priv->mmu; - struct panfrost_device *pfdev = priv->pfdev; - - INIT_LIST_HEAD(&mmu->list); - mmu->as = -1; - - mmu->pgtbl_cfg = (struct io_pgtable_cfg) { - .pgsize_bitmap = SZ_4K | SZ_2M, - .ias = FIELD_GET(0xff, pfdev->features.mmu_features), - .oas = FIELD_GET(0xff00, pfdev->features.mmu_features), - .coherent_walk = pfdev->coherent, - .tlb = &mmu_tlb_ops, - .iommu_dev = pfdev->dev, - }; - - mmu->pgtbl_ops = alloc_io_pgtable_ops(ARM_MALI_LPAE, &mmu->pgtbl_cfg, - priv); - if (!mmu->pgtbl_ops) - return -EINVAL; - - return 0; -} - -void panfrost_mmu_pgtable_free(struct panfrost_file_priv *priv) -{ - struct panfrost_device *pfdev = priv->pfdev; - struct panfrost_mmu *mmu = &priv->mmu; - - spin_lock(&pfdev->as_lock); - if (mmu->as >= 0) { - pm_runtime_get_noresume(pfdev->dev); - if (pm_runtime_active(pfdev->dev)) - panfrost_mmu_disable(pfdev, mmu->as); - pm_runtime_put_autosuspend(pfdev->dev); - - clear_bit(mmu->as, &pfdev->as_alloc_mask); - clear_bit(mmu->as, &pfdev->as_in_use_mask); - list_del(&mmu->list); - } - spin_unlock(&pfdev->as_lock); - - free_io_pgtable_ops(mmu->pgtbl_ops); -} - static struct panfrost_gem_mapping * addr_to_mapping(struct panfrost_device *pfdev, int as, u64 addr) { struct panfrost_gem_mapping *mapping = NULL; - struct panfrost_file_priv *priv; struct drm_mm_node *node; u64 offset = addr >> PAGE_SHIFT; struct panfrost_mmu *mmu; @@ -415,11 +388,10 @@ addr_to_mapping(struct panfrost_device *pfdev, int as, u64 addr) goto out; found_mmu: - priv = container_of(mmu, struct panfrost_file_priv, mmu); - spin_lock(&priv->mm_lock); + spin_lock(&mmu->mm_lock); - drm_mm_for_each_node(node, &priv->mm) { + drm_mm_for_each_node(node, &mmu->mm) { if (offset >= node->start && offset < (node->start + node->size)) { mapping = drm_mm_node_to_panfrost_mapping(node); @@ -429,7 +401,7 @@ found_mmu: } } - spin_unlock(&priv->mm_lock); + spin_unlock(&mmu->mm_lock); out: spin_unlock(&pfdev->as_lock); return mapping; @@ -542,6 +514,107 @@ err_bo: return ret; } +static void panfrost_mmu_release_ctx(struct kref *kref) +{ + struct panfrost_mmu *mmu = container_of(kref, struct panfrost_mmu, + refcount); + struct panfrost_device *pfdev = mmu->pfdev; + + spin_lock(&pfdev->as_lock); + if (mmu->as >= 0) { + pm_runtime_get_noresume(pfdev->dev); + if (pm_runtime_active(pfdev->dev)) + panfrost_mmu_disable(pfdev, mmu->as); + pm_runtime_put_autosuspend(pfdev->dev); + + clear_bit(mmu->as, &pfdev->as_alloc_mask); + clear_bit(mmu->as, &pfdev->as_in_use_mask); + list_del(&mmu->list); + } + spin_unlock(&pfdev->as_lock); + + free_io_pgtable_ops(mmu->pgtbl_ops); + drm_mm_takedown(&mmu->mm); + kfree(mmu); +} + +void panfrost_mmu_ctx_put(struct panfrost_mmu *mmu) +{ + kref_put(&mmu->refcount, panfrost_mmu_release_ctx); +} + +struct panfrost_mmu *panfrost_mmu_ctx_get(struct panfrost_mmu *mmu) +{ + kref_get(&mmu->refcount); + + return mmu; +} + +#define PFN_4G (SZ_4G >> PAGE_SHIFT) +#define PFN_4G_MASK (PFN_4G - 1) +#define PFN_16M (SZ_16M >> PAGE_SHIFT) + +static void panfrost_drm_mm_color_adjust(const struct drm_mm_node *node, + unsigned long color, + u64 *start, u64 *end) +{ + /* Executable buffers can't start or end on a 4GB boundary */ + if (!(color & PANFROST_BO_NOEXEC)) { + u64 next_seg; + + if ((*start & PFN_4G_MASK) == 0) + (*start)++; + + if ((*end & PFN_4G_MASK) == 0) + (*end)--; + + next_seg = ALIGN(*start, PFN_4G); + if (next_seg - *start <= PFN_16M) + *start = next_seg + 1; + + *end = min(*end, ALIGN(*start, PFN_4G) - 1); + } +} + +struct panfrost_mmu *panfrost_mmu_ctx_create(struct panfrost_device *pfdev) +{ + struct panfrost_mmu *mmu; + + mmu = kzalloc(sizeof(*mmu), GFP_KERNEL); + if (!mmu) + return ERR_PTR(-ENOMEM); + + mmu->pfdev = pfdev; + spin_lock_init(&mmu->mm_lock); + + /* 4G enough for now. can be 48-bit */ + drm_mm_init(&mmu->mm, SZ_32M >> PAGE_SHIFT, (SZ_4G - SZ_32M) >> PAGE_SHIFT); + mmu->mm.color_adjust = panfrost_drm_mm_color_adjust; + + INIT_LIST_HEAD(&mmu->list); + mmu->as = -1; + + mmu->pgtbl_cfg = (struct io_pgtable_cfg) { + .pgsize_bitmap = SZ_4K | SZ_2M, + .ias = FIELD_GET(0xff, pfdev->features.mmu_features), + .oas = FIELD_GET(0xff00, pfdev->features.mmu_features), + .coherent_walk = pfdev->coherent, + .tlb = &mmu_tlb_ops, + .iommu_dev = pfdev->dev, + }; + + mmu->pgtbl_ops = alloc_io_pgtable_ops(ARM_MALI_LPAE, &mmu->pgtbl_cfg, + mmu); + if (!mmu->pgtbl_ops) { + kfree(mmu); + return ERR_PTR(-EINVAL); + } + + kref_init(&mmu->refcount); + + return mmu; +} + static const char *access_type_name(struct panfrost_device *pfdev, u32 fault_status) { @@ -605,7 +678,7 @@ static irqreturn_t panfrost_mmu_irq_handler_thread(int irq, void *data) if ((status & mask) == BIT(as) && (exception_type & 0xF8) == 0xC0) ret = panfrost_mmu_map_fault_addr(pfdev, as, addr); - if (ret) + if (ret) { /* terminal fault, print info about the fault */ dev_err(pfdev->dev, "Unhandled Page fault in AS%d at VA 0x%016llX\n" @@ -619,18 +692,32 @@ static irqreturn_t panfrost_mmu_irq_handler_thread(int irq, void *data) "TODO", fault_status, (fault_status & (1 << 10) ? "DECODER FAULT" : "SLAVE FAULT"), - exception_type, panfrost_exception_name(pfdev, exception_type), + exception_type, panfrost_exception_name(exception_type), access_type, access_type_name(pfdev, fault_status), source_id); + spin_lock(&pfdev->as_lock); + /* Ignore MMU interrupts on this AS until it's been + * re-enabled. + */ + pfdev->as_faulty_mask |= mask; + + /* Disable the MMU to kill jobs on this AS. */ + panfrost_mmu_disable(pfdev, as); + spin_unlock(&pfdev->as_lock); + } + status &= ~mask; /* If we received new MMU interrupts, process them before returning. */ if (!status) - status = mmu_read(pfdev, MMU_INT_RAWSTAT); + status = mmu_read(pfdev, MMU_INT_RAWSTAT) & ~pfdev->as_faulty_mask; } - mmu_write(pfdev, MMU_INT_MASK, ~0); + spin_lock(&pfdev->as_lock); + mmu_write(pfdev, MMU_INT_MASK, ~pfdev->as_faulty_mask); + spin_unlock(&pfdev->as_lock); + return IRQ_HANDLED; }; diff --git a/drivers/gpu/drm/panfrost/panfrost_mmu.h b/drivers/gpu/drm/panfrost/panfrost_mmu.h index 44fc2edf63ce..cc2a0d307feb 100644 --- a/drivers/gpu/drm/panfrost/panfrost_mmu.h +++ b/drivers/gpu/drm/panfrost/panfrost_mmu.h @@ -18,7 +18,8 @@ void panfrost_mmu_reset(struct panfrost_device *pfdev); u32 panfrost_mmu_as_get(struct panfrost_device *pfdev, struct panfrost_mmu *mmu); void panfrost_mmu_as_put(struct panfrost_device *pfdev, struct panfrost_mmu *mmu); -int panfrost_mmu_pgtable_alloc(struct panfrost_file_priv *priv); -void panfrost_mmu_pgtable_free(struct panfrost_file_priv *priv); +struct panfrost_mmu *panfrost_mmu_ctx_get(struct panfrost_mmu *mmu); +void panfrost_mmu_ctx_put(struct panfrost_mmu *mmu); +struct panfrost_mmu *panfrost_mmu_ctx_create(struct panfrost_device *pfdev); #endif diff --git a/drivers/gpu/drm/panfrost/panfrost_regs.h b/drivers/gpu/drm/panfrost/panfrost_regs.h index dc9df5457f1c..1940ff86e49a 100644 --- a/drivers/gpu/drm/panfrost/panfrost_regs.h +++ b/drivers/gpu/drm/panfrost/panfrost_regs.h @@ -262,9 +262,6 @@ #define JS_COMMAND_SOFT_STOP_1 0x06 /* Execute SOFT_STOP if JOB_CHAIN_FLAG is 1 */ #define JS_COMMAND_HARD_STOP_1 0x07 /* Execute HARD_STOP if JOB_CHAIN_FLAG is 1 */ -#define JS_STATUS_EVENT_ACTIVE 0x08 - - /* MMU regs */ #define MMU_INT_RAWSTAT 0x2000 #define MMU_INT_CLEAR 0x2004 diff --git a/drivers/gpu/drm/pl111/pl111_display.c b/drivers/gpu/drm/pl111/pl111_display.c index 6fd7f13f1aca..443e3b932322 100644 --- a/drivers/gpu/drm/pl111/pl111_display.c +++ b/drivers/gpu/drm/pl111/pl111_display.c @@ -11,7 +11,6 @@ #include <linux/clk.h> #include <linux/delay.h> -#include <linux/version.h> #include <linux/dma-buf.h> #include <linux/of_graph.h> @@ -440,7 +439,6 @@ static struct drm_simple_display_pipe_funcs pl111_display_funcs = { .enable = pl111_display_enable, .disable = pl111_display_disable, .update = pl111_display_update, - .prepare_fb = drm_gem_simple_display_pipe_prepare_fb, }; static int pl111_clk_div_choose_div(struct clk_hw *hw, unsigned long rate, diff --git a/drivers/gpu/drm/pl111/pl111_drv.c b/drivers/gpu/drm/pl111/pl111_drv.c index fa0a737e9dea..520301b405f1 100644 --- a/drivers/gpu/drm/pl111/pl111_drv.c +++ b/drivers/gpu/drm/pl111/pl111_drv.c @@ -44,7 +44,6 @@ #include <linux/of_reserved_mem.h> #include <linux/shmem_fs.h> #include <linux/slab.h> -#include <linux/version.h> #include <drm/drm_atomic_helper.h> #include <drm/drm_bridge.h> diff --git a/drivers/gpu/drm/qxl/qxl_drv.c b/drivers/gpu/drm/qxl/qxl_drv.c index 854e6c5a563f..fc47b0deb021 100644 --- a/drivers/gpu/drm/qxl/qxl_drv.c +++ b/drivers/gpu/drm/qxl/qxl_drv.c @@ -95,7 +95,7 @@ qxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) if (ret) return ret; - ret = drm_aperture_remove_conflicting_pci_framebuffers(pdev, "qxl"); + ret = drm_aperture_remove_conflicting_pci_framebuffers(pdev, &qxl_driver); if (ret) goto disable_pci; @@ -281,10 +281,8 @@ static struct drm_driver qxl_driver = { .prime_handle_to_fd = drm_gem_prime_handle_to_fd, .prime_fd_to_handle = drm_gem_prime_fd_to_handle, .gem_prime_import_sg_table = qxl_gem_prime_import_sg_table, - .gem_prime_mmap = qxl_gem_prime_mmap, .fops = &qxl_fops, .ioctls = qxl_ioctls, - .irq_handler = qxl_irq_handler, .name = DRIVER_NAME, .desc = DRIVER_DESC, .date = DRIVER_DATE, diff --git a/drivers/gpu/drm/qxl/qxl_drv.h b/drivers/gpu/drm/qxl/qxl_drv.h index dd6abee55f56..359266d9e860 100644 --- a/drivers/gpu/drm/qxl/qxl_drv.h +++ b/drivers/gpu/drm/qxl/qxl_drv.h @@ -434,12 +434,9 @@ struct drm_gem_object *qxl_gem_prime_import_sg_table( int qxl_gem_prime_vmap(struct drm_gem_object *obj, struct dma_buf_map *map); void qxl_gem_prime_vunmap(struct drm_gem_object *obj, struct dma_buf_map *map); -int qxl_gem_prime_mmap(struct drm_gem_object *obj, - struct vm_area_struct *vma); /* qxl_irq.c */ int qxl_irq_init(struct qxl_device *qdev); -irqreturn_t qxl_irq_handler(int irq, void *arg); void qxl_debugfs_add_files(struct qxl_device *qdev, struct drm_info_list *files, diff --git a/drivers/gpu/drm/qxl/qxl_irq.c b/drivers/gpu/drm/qxl/qxl_irq.c index d312322cacd1..665278ee3b6d 100644 --- a/drivers/gpu/drm/qxl/qxl_irq.c +++ b/drivers/gpu/drm/qxl/qxl_irq.c @@ -25,11 +25,11 @@ #include <linux/pci.h> -#include <drm/drm_irq.h> +#include <drm/drm_drv.h> #include "qxl_drv.h" -irqreturn_t qxl_irq_handler(int irq, void *arg) +static irqreturn_t qxl_irq_handler(int irq, void *arg) { struct drm_device *dev = (struct drm_device *) arg; struct qxl_device *qdev = to_qxl(dev); @@ -81,7 +81,8 @@ static void qxl_client_monitors_config_work_func(struct work_struct *work) int qxl_irq_init(struct qxl_device *qdev) { - struct pci_dev *pdev = to_pci_dev(qdev->ddev.dev); + struct drm_device *ddev = &qdev->ddev; + struct pci_dev *pdev = to_pci_dev(ddev->dev); int ret; init_waitqueue_head(&qdev->display_event); @@ -95,7 +96,7 @@ int qxl_irq_init(struct qxl_device *qdev) atomic_set(&qdev->irq_received_cursor, 0); atomic_set(&qdev->irq_received_io_cmd, 0); qdev->irq_received_error = 0; - ret = drm_irq_install(&qdev->ddev, pdev->irq); + ret = request_irq(pdev->irq, qxl_irq_handler, IRQF_SHARED, ddev->driver->name, ddev); qdev->ram_header->int_mask = QXL_INTERRUPT_MASK; if (unlikely(ret != 0)) { DRM_ERROR("Failed installing irq: %d\n", ret); diff --git a/drivers/gpu/drm/qxl/qxl_prime.c b/drivers/gpu/drm/qxl/qxl_prime.c index 0628d1cc91fe..4a10cb0a413b 100644 --- a/drivers/gpu/drm/qxl/qxl_prime.c +++ b/drivers/gpu/drm/qxl/qxl_prime.c @@ -73,9 +73,3 @@ void qxl_gem_prime_vunmap(struct drm_gem_object *obj, qxl_bo_vunmap(bo); } - -int qxl_gem_prime_mmap(struct drm_gem_object *obj, - struct vm_area_struct *area) -{ - return -ENOSYS; -} diff --git a/drivers/gpu/drm/r128/r128_cce.c b/drivers/gpu/drm/r128/r128_cce.c index 2a2933c16308..c04d84a69dd2 100644 --- a/drivers/gpu/drm/r128/r128_cce.c +++ b/drivers/gpu/drm/r128/r128_cce.c @@ -39,7 +39,6 @@ #include <drm/drm_device.h> #include <drm/drm_file.h> -#include <drm/drm_irq.h> #include <drm/drm_legacy.h> #include <drm/drm_print.h> #include <drm/r128_drm.h> @@ -603,7 +602,7 @@ int r128_do_cleanup_cce(struct drm_device *dev) * is freed, it's too late. */ if (dev->irq_enabled) - drm_irq_uninstall(dev); + drm_legacy_irq_uninstall(dev); if (dev->dev_private) { drm_r128_private_t *dev_priv = dev->dev_private; diff --git a/drivers/gpu/drm/radeon/atombios.h b/drivers/gpu/drm/radeon/atombios.h index 4b86e8b45009..83e8b8547f9b 100644 --- a/drivers/gpu/drm/radeon/atombios.h +++ b/drivers/gpu/drm/radeon/atombios.h @@ -2802,8 +2802,8 @@ ucMaxNBVoltageHigh: Voltage regulator dependent PWM value. High 8 bits of t ucMinNBVoltageHigh: Voltage regulator dependent PWM value. High 8 bits of the value for the min voltage.Set this one to 0x00 if VC without PWM or no VC at all. -usInterNBVoltageLow: Voltage regulator dependent PWM value. The value makes the the voltage >=Min NB voltage but <=InterNBVoltageHigh. Set this to 0x0000 if VC without PWM or no VC at all. -usInterNBVoltageHigh: Voltage regulator dependent PWM value. The value makes the the voltage >=InterNBVoltageLow but <=Max NB voltage.Set this to 0x0000 if VC without PWM or no VC at all. +usInterNBVoltageLow: Voltage regulator dependent PWM value. The value makes the voltage >=Min NB voltage but <=InterNBVoltageHigh. Set this to 0x0000 if VC without PWM or no VC at all. +usInterNBVoltageHigh: Voltage regulator dependent PWM value. The value makes the voltage >=InterNBVoltageLow but <=Max NB voltage.Set this to 0x0000 if VC without PWM or no VC at all. */ diff --git a/drivers/gpu/drm/radeon/evergreen.c b/drivers/gpu/drm/radeon/evergreen.c index 36a888e1b179..eeb590d2dec2 100644 --- a/drivers/gpu/drm/radeon/evergreen.c +++ b/drivers/gpu/drm/radeon/evergreen.c @@ -28,6 +28,7 @@ #include <drm/drm_vblank.h> #include <drm/radeon_drm.h> +#include <drm/drm_fourcc.h> #include "atom.h" #include "avivod.h" @@ -1414,10 +1415,15 @@ void evergreen_page_flip(struct radeon_device *rdev, int crtc_id, u64 crtc_base, bool async) { struct radeon_crtc *radeon_crtc = rdev->mode_info.crtcs[crtc_id]; + struct drm_framebuffer *fb = radeon_crtc->base.primary->fb; - /* update the scanout addresses */ + /* flip at hsync for async, default is vsync */ WREG32(EVERGREEN_GRPH_FLIP_CONTROL + radeon_crtc->crtc_offset, async ? EVERGREEN_GRPH_SURFACE_UPDATE_H_RETRACE_EN : 0); + /* update pitch */ + WREG32(EVERGREEN_GRPH_PITCH + radeon_crtc->crtc_offset, + fb->pitches[0] / fb->format->cpp[0]); + /* update the scanout addresses */ WREG32(EVERGREEN_GRPH_PRIMARY_SURFACE_ADDRESS_HIGH + radeon_crtc->crtc_offset, upper_32_bits(crtc_base)); WREG32(EVERGREEN_GRPH_PRIMARY_SURFACE_ADDRESS + radeon_crtc->crtc_offset, diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c index ba724198b72e..2dd85ba1faa2 100644 --- a/drivers/gpu/drm/radeon/r100.c +++ b/drivers/gpu/drm/radeon/r100.c @@ -162,6 +162,8 @@ void r100_wait_for_vblank(struct radeon_device *rdev, int crtc) void r100_page_flip(struct radeon_device *rdev, int crtc_id, u64 crtc_base, bool async) { struct radeon_crtc *radeon_crtc = rdev->mode_info.crtcs[crtc_id]; + uint32_t crtc_pitch, pitch_pixels; + struct drm_framebuffer *fb = radeon_crtc->base.primary->fb; u32 tmp = ((u32)crtc_base) | RADEON_CRTC_OFFSET__OFFSET_LOCK; int i; @@ -169,6 +171,13 @@ void r100_page_flip(struct radeon_device *rdev, int crtc_id, u64 crtc_base, bool /* update the scanout addresses */ WREG32(RADEON_CRTC_OFFSET + radeon_crtc->crtc_offset, tmp); + /* update pitch */ + pitch_pixels = fb->pitches[0] / fb->format->cpp[0]; + crtc_pitch = DIV_ROUND_UP(pitch_pixels * fb->format->cpp[0] * 8, + fb->format->cpp[0] * 8 * 8); + crtc_pitch |= crtc_pitch << 16; + WREG32(RADEON_CRTC_PITCH + radeon_crtc->crtc_offset, crtc_pitch); + /* Wait for update_pending to go high. */ for (i = 0; i < rdev->usec_timeout; i++) { if (RREG32(RADEON_CRTC_OFFSET + radeon_crtc->crtc_offset) & RADEON_CRTC_OFFSET__GUI_TRIG_OFFSET) diff --git a/drivers/gpu/drm/radeon/r300_reg.h b/drivers/gpu/drm/radeon/r300_reg.h index 00c0d2ba22d3..60d5413bafa1 100644 --- a/drivers/gpu/drm/radeon/r300_reg.h +++ b/drivers/gpu/drm/radeon/r300_reg.h @@ -353,7 +353,7 @@ # define R300_PVS_CNTL_1_PROGRAM_START_SHIFT 0 # define R300_PVS_CNTL_1_POS_END_SHIFT 10 # define R300_PVS_CNTL_1_PROGRAM_END_SHIFT 20 -/* Addresses are relative the the vertex program parameters area. */ +/* Addresses are relative the vertex program parameters area. */ #define R300_VAP_PVS_CNTL_2 0x22D4 # define R300_PVS_CNTL_2_PARAM_OFFSET_SHIFT 0 # define R300_PVS_CNTL_2_PARAM_COUNT_SHIFT 16 diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c index 46eea01950cb..4f0fbf667431 100644 --- a/drivers/gpu/drm/radeon/radeon_device.c +++ b/drivers/gpu/drm/radeon/radeon_device.c @@ -406,7 +406,7 @@ void radeon_doorbell_free(struct radeon_device *rdev, u32 doorbell) /* * radeon_wb_*() - * Writeback is the the method by which the the GPU updates special pages + * Writeback is the method by which the GPU updates special pages * in memory with the status of certain GPU events (fences, ring pointers, * etc.). */ @@ -785,7 +785,7 @@ int radeon_dummy_page_init(struct radeon_device *rdev) if (rdev->dummy_page.page == NULL) return -ENOMEM; rdev->dummy_page.addr = dma_map_page(&rdev->pdev->dev, rdev->dummy_page.page, - 0, PAGE_SIZE, PCI_DMA_BIDIRECTIONAL); + 0, PAGE_SIZE, DMA_BIDIRECTIONAL); if (dma_mapping_error(&rdev->pdev->dev, rdev->dummy_page.addr)) { dev_err(&rdev->pdev->dev, "Failed to DMA MAP the dummy page\n"); __free_page(rdev->dummy_page.page); @@ -808,8 +808,8 @@ void radeon_dummy_page_fini(struct radeon_device *rdev) { if (rdev->dummy_page.page == NULL) return; - pci_unmap_page(rdev->pdev, rdev->dummy_page.addr, - PAGE_SIZE, PCI_DMA_BIDIRECTIONAL); + dma_unmap_page(&rdev->pdev->dev, rdev->dummy_page.addr, PAGE_SIZE, + DMA_BIDIRECTIONAL); __free_page(rdev->dummy_page.page); rdev->dummy_page.page = NULL; } @@ -1067,15 +1067,16 @@ void radeon_combios_fini(struct radeon_device *rdev) /** * radeon_vga_set_decode - enable/disable vga decode * - * @cookie: radeon_device pointer + * @pdev: PCI device * @state: enable/disable vga decode * * Enable/disable vga decode (all asics). * Returns VGA resource flags. */ -static unsigned int radeon_vga_set_decode(void *cookie, bool state) +static unsigned int radeon_vga_set_decode(struct pci_dev *pdev, bool state) { - struct radeon_device *rdev = cookie; + struct drm_device *dev = pci_get_drvdata(pdev); + struct radeon_device *rdev = dev->dev_private; radeon_vga_set_state(rdev, state); if (state) return VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM | @@ -1434,7 +1435,7 @@ int radeon_device_init(struct radeon_device *rdev, /* if we have > 1 VGA cards, then disable the radeon VGA resources */ /* this will fail for cards that aren't VGA class devices, just * ignore it */ - vga_client_register(rdev->pdev, rdev, NULL, radeon_vga_set_decode); + vga_client_register(rdev->pdev, radeon_vga_set_decode); if (rdev->flags & RADEON_IS_PX) runtime = true; @@ -1530,7 +1531,7 @@ void radeon_device_fini(struct radeon_device *rdev) vga_switcheroo_unregister_client(rdev->pdev); if (rdev->flags & RADEON_IS_PX) vga_switcheroo_fini_domain_pm_ops(rdev->dev); - vga_client_register(rdev->pdev, NULL, NULL, NULL); + vga_client_unregister(rdev->pdev); if (rdev->rio_mem) pci_iounmap(rdev->pdev, rdev->rio_mem); rdev->rio_mem = NULL; diff --git a/drivers/gpu/drm/radeon/radeon_drv.c b/drivers/gpu/drm/radeon/radeon_drv.c index 5c23b77cb81a..b74cebca1f89 100644 --- a/drivers/gpu/drm/radeon/radeon_drv.c +++ b/drivers/gpu/drm/radeon/radeon_drv.c @@ -330,7 +330,7 @@ static int radeon_pci_probe(struct pci_dev *pdev, return -EPROBE_DEFER; /* Get rid of things like offb */ - ret = drm_aperture_remove_conflicting_pci_framebuffers(pdev, "radeondrmfb"); + ret = drm_aperture_remove_conflicting_pci_framebuffers(pdev, &kms_driver); if (ret) return ret; @@ -607,10 +607,6 @@ static const struct drm_driver kms_driver = { .postclose = radeon_driver_postclose_kms, .lastclose = radeon_driver_lastclose_kms, .unload = radeon_driver_unload_kms, - .irq_preinstall = radeon_driver_irq_preinstall_kms, - .irq_postinstall = radeon_driver_irq_postinstall_kms, - .irq_uninstall = radeon_driver_irq_uninstall_kms, - .irq_handler = radeon_driver_irq_handler_kms, .ioctls = radeon_ioctls_kms, .num_ioctls = ARRAY_SIZE(radeon_ioctls_kms), .dumb_create = radeon_mode_dumb_create, diff --git a/drivers/gpu/drm/radeon/radeon_fb.c b/drivers/gpu/drm/radeon/radeon_fb.c index 0b206b052972..ca382fbf7a86 100644 --- a/drivers/gpu/drm/radeon/radeon_fb.c +++ b/drivers/gpu/drm/radeon/radeon_fb.c @@ -54,6 +54,7 @@ radeonfb_open(struct fb_info *info, int user) struct radeon_fbdev *rfbdev = info->par; struct radeon_device *rdev = rfbdev->rdev; int ret = pm_runtime_get_sync(rdev->ddev->dev); + if (ret < 0 && ret != -EACCES) { pm_runtime_mark_last_busy(rdev->ddev->dev); pm_runtime_put_autosuspend(rdev->ddev->dev); @@ -167,6 +168,7 @@ static int radeonfb_create_pinned_object(struct radeon_fbdev *rfbdev, break; case 2: tiling_flags |= RADEON_TILING_SWAP_16BIT; + break; default: break; } @@ -196,9 +198,8 @@ static int radeonfb_create_pinned_object(struct radeon_fbdev *rfbdev, radeon_bo_check_tiling(rbo, 0, 0); ret = radeon_bo_kmap(rbo, NULL); radeon_bo_unreserve(rbo); - if (ret) { + if (ret) goto out_unref; - } *gobj_p = gobj; return 0; @@ -294,9 +295,6 @@ static int radeonfb_create(struct drm_fb_helper *helper, return 0; out: - if (rbo) { - - } if (fb && ret) { drm_gem_object_put(gobj); drm_framebuffer_unregister_private(fb); diff --git a/drivers/gpu/drm/radeon/radeon_fence.c b/drivers/gpu/drm/radeon/radeon_fence.c index b2ce642ca4fa..e9c47ec28ade 100644 --- a/drivers/gpu/drm/radeon/radeon_fence.c +++ b/drivers/gpu/drm/radeon/radeon_fence.c @@ -50,7 +50,7 @@ * for GPU/CPU synchronization. When the fence is written, * it is expected that all buffers associated with that fence * are no longer in use by the associated ring on the GPU and - * that the the relevant GPU caches have been flushed. Whether + * that the relevant GPU caches have been flushed. Whether * we use a scratch register or memory location depends on the asic * and whether writeback is enabled. */ @@ -288,7 +288,7 @@ static void radeon_fence_check_lockup(struct work_struct *work) return; } - if (fence_drv->delayed_irq && rdev->ddev->irq_enabled) { + if (fence_drv->delayed_irq && rdev->irq.installed) { unsigned long irqflags; fence_drv->delayed_irq = false; diff --git a/drivers/gpu/drm/radeon/radeon_irq_kms.c b/drivers/gpu/drm/radeon/radeon_irq_kms.c index 84d0b1a3355f..3907785d0798 100644 --- a/drivers/gpu/drm/radeon/radeon_irq_kms.c +++ b/drivers/gpu/drm/radeon/radeon_irq_kms.c @@ -31,7 +31,7 @@ #include <drm/drm_crtc_helper.h> #include <drm/drm_device.h> -#include <drm/drm_irq.h> +#include <drm/drm_drv.h> #include <drm/drm_probe_helper.h> #include <drm/drm_vblank.h> #include <drm/radeon_drm.h> @@ -51,7 +51,7 @@ * radeon_irq_process is a macro that points to the per-asic * irq handler callback. */ -irqreturn_t radeon_driver_irq_handler_kms(int irq, void *arg) +static irqreturn_t radeon_driver_irq_handler_kms(int irq, void *arg) { struct drm_device *dev = (struct drm_device *) arg; struct radeon_device *rdev = dev->dev_private; @@ -118,7 +118,7 @@ static void radeon_dp_work_func(struct work_struct *work) * Gets the hw ready to enable irqs (all asics). * This function disables all interrupt sources on the GPU. */ -void radeon_driver_irq_preinstall_kms(struct drm_device *dev) +static void radeon_driver_irq_preinstall_kms(struct drm_device *dev) { struct radeon_device *rdev = dev->dev_private; unsigned long irqflags; @@ -150,7 +150,7 @@ void radeon_driver_irq_preinstall_kms(struct drm_device *dev) * Handles stuff to be done after enabling irqs (all asics). * Returns 0 on success. */ -int radeon_driver_irq_postinstall_kms(struct drm_device *dev) +static int radeon_driver_irq_postinstall_kms(struct drm_device *dev) { struct radeon_device *rdev = dev->dev_private; @@ -169,7 +169,7 @@ int radeon_driver_irq_postinstall_kms(struct drm_device *dev) * * This function disables all interrupt sources on the GPU (all asics). */ -void radeon_driver_irq_uninstall_kms(struct drm_device *dev) +static void radeon_driver_irq_uninstall_kms(struct drm_device *dev) { struct radeon_device *rdev = dev->dev_private; unsigned long irqflags; @@ -194,6 +194,36 @@ void radeon_driver_irq_uninstall_kms(struct drm_device *dev) spin_unlock_irqrestore(&rdev->irq.lock, irqflags); } +static int radeon_irq_install(struct radeon_device *rdev, int irq) +{ + struct drm_device *dev = rdev->ddev; + int ret; + + if (irq == IRQ_NOTCONNECTED) + return -ENOTCONN; + + radeon_driver_irq_preinstall_kms(dev); + + /* PCI devices require shared interrupts. */ + ret = request_irq(irq, radeon_driver_irq_handler_kms, + IRQF_SHARED, dev->driver->name, dev); + if (ret) + return ret; + + radeon_driver_irq_postinstall_kms(dev); + + return 0; +} + +static void radeon_irq_uninstall(struct radeon_device *rdev) +{ + struct drm_device *dev = rdev->ddev; + struct pci_dev *pdev = to_pci_dev(dev->dev); + + radeon_driver_irq_uninstall_kms(dev); + free_irq(pdev->irq, dev); +} + /** * radeon_msi_ok - asic specific msi checks * @@ -314,7 +344,7 @@ int radeon_irq_kms_init(struct radeon_device *rdev) INIT_WORK(&rdev->audio_work, r600_audio_update_hdmi); rdev->irq.installed = true; - r = drm_irq_install(rdev->ddev, rdev->pdev->irq); + r = radeon_irq_install(rdev, rdev->pdev->irq); if (r) { rdev->irq.installed = false; flush_delayed_work(&rdev->hotplug_work); @@ -335,7 +365,7 @@ int radeon_irq_kms_init(struct radeon_device *rdev) void radeon_irq_kms_fini(struct radeon_device *rdev) { if (rdev->irq.installed) { - drm_irq_uninstall(rdev->ddev); + radeon_irq_uninstall(rdev); rdev->irq.installed = false; if (rdev->msi_enabled) pci_disable_msi(rdev->pdev); @@ -357,7 +387,7 @@ void radeon_irq_kms_sw_irq_get(struct radeon_device *rdev, int ring) { unsigned long irqflags; - if (!rdev->ddev->irq_enabled) + if (!rdev->irq.installed) return; if (atomic_inc_return(&rdev->irq.ring_int[ring]) == 1) { @@ -396,7 +426,7 @@ void radeon_irq_kms_sw_irq_put(struct radeon_device *rdev, int ring) { unsigned long irqflags; - if (!rdev->ddev->irq_enabled) + if (!rdev->irq.installed) return; if (atomic_dec_and_test(&rdev->irq.ring_int[ring])) { @@ -422,7 +452,7 @@ void radeon_irq_kms_pflip_irq_get(struct radeon_device *rdev, int crtc) if (crtc < 0 || crtc >= rdev->num_crtc) return; - if (!rdev->ddev->irq_enabled) + if (!rdev->irq.installed) return; if (atomic_inc_return(&rdev->irq.pflip[crtc]) == 1) { @@ -448,7 +478,7 @@ void radeon_irq_kms_pflip_irq_put(struct radeon_device *rdev, int crtc) if (crtc < 0 || crtc >= rdev->num_crtc) return; - if (!rdev->ddev->irq_enabled) + if (!rdev->irq.installed) return; if (atomic_dec_and_test(&rdev->irq.pflip[crtc])) { @@ -470,7 +500,7 @@ void radeon_irq_kms_enable_afmt(struct radeon_device *rdev, int block) { unsigned long irqflags; - if (!rdev->ddev->irq_enabled) + if (!rdev->irq.installed) return; spin_lock_irqsave(&rdev->irq.lock, irqflags); @@ -492,7 +522,7 @@ void radeon_irq_kms_disable_afmt(struct radeon_device *rdev, int block) { unsigned long irqflags; - if (!rdev->ddev->irq_enabled) + if (!rdev->irq.installed) return; spin_lock_irqsave(&rdev->irq.lock, irqflags); @@ -514,7 +544,7 @@ void radeon_irq_kms_enable_hpd(struct radeon_device *rdev, unsigned hpd_mask) unsigned long irqflags; int i; - if (!rdev->ddev->irq_enabled) + if (!rdev->irq.installed) return; spin_lock_irqsave(&rdev->irq.lock, irqflags); @@ -537,7 +567,7 @@ void radeon_irq_kms_disable_hpd(struct radeon_device *rdev, unsigned hpd_mask) unsigned long irqflags; int i; - if (!rdev->ddev->irq_enabled) + if (!rdev->irq.installed) return; spin_lock_irqsave(&rdev->irq.lock, irqflags); diff --git a/drivers/gpu/drm/radeon/radeon_kms.h b/drivers/gpu/drm/radeon/radeon_kms.h index 9b97bf38acd4..36e73cea9215 100644 --- a/drivers/gpu/drm/radeon/radeon_kms.h +++ b/drivers/gpu/drm/radeon/radeon_kms.h @@ -31,9 +31,5 @@ u32 radeon_get_vblank_counter_kms(struct drm_crtc *crtc); int radeon_enable_vblank_kms(struct drm_crtc *crtc); void radeon_disable_vblank_kms(struct drm_crtc *crtc); -irqreturn_t radeon_driver_irq_handler_kms(int irq, void *arg); -void radeon_driver_irq_preinstall_kms(struct drm_device *dev); -int radeon_driver_irq_postinstall_kms(struct drm_device *dev); -void radeon_driver_irq_uninstall_kms(struct drm_device *dev); #endif /* __RADEON_KMS_H__ */ diff --git a/drivers/gpu/drm/radeon/radeon_vm.c b/drivers/gpu/drm/radeon/radeon_vm.c index 36a38adaaea9..bb53016f3138 100644 --- a/drivers/gpu/drm/radeon/radeon_vm.c +++ b/drivers/gpu/drm/radeon/radeon_vm.c @@ -41,7 +41,7 @@ * (uncached system pages). * Each VM has an ID associated with it and there is a page table * associated with each VMID. When execting a command buffer, - * the kernel tells the the ring what VMID to use for that command + * the kernel tells the ring what VMID to use for that command * buffer. VMIDs are allocated dynamically as commands are submitted. * The userspace drivers maintain their own address space and the kernel * sets up their pages tables accordingly when they submit their diff --git a/drivers/gpu/drm/radeon/rs600.c b/drivers/gpu/drm/radeon/rs600.c index b2d22e25eee1..b87dd551e939 100644 --- a/drivers/gpu/drm/radeon/rs600.c +++ b/drivers/gpu/drm/radeon/rs600.c @@ -41,6 +41,7 @@ #include <drm/drm_device.h> #include <drm/drm_vblank.h> +#include <drm/drm_fourcc.h> #include "atom.h" #include "radeon.h" @@ -118,6 +119,7 @@ void avivo_wait_for_vblank(struct radeon_device *rdev, int crtc) void rs600_page_flip(struct radeon_device *rdev, int crtc_id, u64 crtc_base, bool async) { struct radeon_crtc *radeon_crtc = rdev->mode_info.crtcs[crtc_id]; + struct drm_framebuffer *fb = radeon_crtc->base.primary->fb; u32 tmp = RREG32(AVIVO_D1GRPH_UPDATE + radeon_crtc->crtc_offset); int i; @@ -125,9 +127,13 @@ void rs600_page_flip(struct radeon_device *rdev, int crtc_id, u64 crtc_base, boo tmp |= AVIVO_D1GRPH_UPDATE_LOCK; WREG32(AVIVO_D1GRPH_UPDATE + radeon_crtc->crtc_offset, tmp); - /* update the scanout addresses */ + /* flip at hsync for async, default is vsync */ WREG32(AVIVO_D1GRPH_FLIP_CONTROL + radeon_crtc->crtc_offset, async ? AVIVO_D1GRPH_SURFACE_UPDATE_H_RETRACE_EN : 0); + /* update pitch */ + WREG32(AVIVO_D1GRPH_PITCH + radeon_crtc->crtc_offset, + fb->pitches[0] / fb->format->cpp[0]); + /* update the scanout addresses */ WREG32(AVIVO_D1GRPH_SECONDARY_SURFACE_ADDRESS + radeon_crtc->crtc_offset, (u32)crtc_base); WREG32(AVIVO_D1GRPH_PRIMARY_SURFACE_ADDRESS + radeon_crtc->crtc_offset, diff --git a/drivers/gpu/drm/radeon/rv770.c b/drivers/gpu/drm/radeon/rv770.c index 74499307285b..e592e57be1bb 100644 --- a/drivers/gpu/drm/radeon/rv770.c +++ b/drivers/gpu/drm/radeon/rv770.c @@ -32,6 +32,7 @@ #include <drm/drm_device.h> #include <drm/radeon_drm.h> +#include <drm/drm_fourcc.h> #include "atom.h" #include "avivod.h" @@ -809,6 +810,7 @@ u32 rv770_get_xclk(struct radeon_device *rdev) void rv770_page_flip(struct radeon_device *rdev, int crtc_id, u64 crtc_base, bool async) { struct radeon_crtc *radeon_crtc = rdev->mode_info.crtcs[crtc_id]; + struct drm_framebuffer *fb = radeon_crtc->base.primary->fb; u32 tmp = RREG32(AVIVO_D1GRPH_UPDATE + radeon_crtc->crtc_offset); int i; @@ -816,9 +818,13 @@ void rv770_page_flip(struct radeon_device *rdev, int crtc_id, u64 crtc_base, boo tmp |= AVIVO_D1GRPH_UPDATE_LOCK; WREG32(AVIVO_D1GRPH_UPDATE + radeon_crtc->crtc_offset, tmp); - /* update the scanout addresses */ + /* flip at hsync for async, default is vsync */ WREG32(AVIVO_D1GRPH_FLIP_CONTROL + radeon_crtc->crtc_offset, async ? AVIVO_D1GRPH_SURFACE_UPDATE_H_RETRACE_EN : 0); + /* update pitch */ + WREG32(AVIVO_D1GRPH_PITCH + radeon_crtc->crtc_offset, + fb->pitches[0] / fb->format->cpp[0]); + /* update the scanout addresses */ if (radeon_crtc->crtc_id) { WREG32(D2GRPH_SECONDARY_SURFACE_ADDRESS_HIGH, upper_32_bits(crtc_base)); WREG32(D2GRPH_PRIMARY_SURFACE_ADDRESS_HIGH, upper_32_bits(crtc_base)); diff --git a/drivers/gpu/drm/rcar-du/rcar_du_drv.c b/drivers/gpu/drm/rcar-du/rcar_du_drv.c index bfbff90588cb..4ac26d08ebb4 100644 --- a/drivers/gpu/drm/rcar-du/rcar_du_drv.c +++ b/drivers/gpu/drm/rcar-du/rcar_du_drv.c @@ -553,14 +553,20 @@ static int rcar_du_remove(struct platform_device *pdev) struct drm_device *ddev = &rcdu->ddev; drm_dev_unregister(ddev); + drm_atomic_helper_shutdown(ddev); drm_kms_helper_poll_fini(ddev); - drm_dev_put(ddev); - return 0; } +static void rcar_du_shutdown(struct platform_device *pdev) +{ + struct rcar_du_device *rcdu = platform_get_drvdata(pdev); + + drm_atomic_helper_shutdown(&rcdu->ddev); +} + static int rcar_du_probe(struct platform_device *pdev) { struct rcar_du_device *rcdu; @@ -593,8 +599,6 @@ static int rcar_du_probe(struct platform_device *pdev) goto error; } - rcdu->ddev.irq_enabled = 1; - /* * Register the DRM device with the core and the connectors with * sysfs. @@ -617,6 +621,7 @@ error: static struct platform_driver rcar_du_platform_driver = { .probe = rcar_du_probe, .remove = rcar_du_remove, + .shutdown = rcar_du_shutdown, .driver = { .name = "rcar-du", .pm = &rcar_du_pm_ops, diff --git a/drivers/gpu/drm/rcar-du/rcar_du_encoder.c b/drivers/gpu/drm/rcar-du/rcar_du_encoder.c index ca3761772211..0daa8bba50f5 100644 --- a/drivers/gpu/drm/rcar-du/rcar_du_encoder.c +++ b/drivers/gpu/drm/rcar-du/rcar_du_encoder.c @@ -11,6 +11,7 @@ #include <linux/slab.h> #include <drm/drm_bridge.h> +#include <drm/drm_bridge_connector.h> #include <drm/drm_crtc.h> #include <drm/drm_managed.h> #include <drm/drm_modeset_helper_vtables.h> @@ -53,7 +54,9 @@ int rcar_du_encoder_init(struct rcar_du_device *rcdu, struct device_node *enc_node) { struct rcar_du_encoder *renc; + struct drm_connector *connector; struct drm_bridge *bridge; + int ret; /* * Locate the DRM bridge from the DT node. For the DPAD outputs, if the @@ -103,9 +106,22 @@ int rcar_du_encoder_init(struct rcar_du_device *rcdu, renc->output = output; - /* - * Attach the bridge to the encoder. The bridge will create the - * connector. - */ - return drm_bridge_attach(&renc->base, bridge, NULL, 0); + /* Attach the bridge to the encoder. */ + ret = drm_bridge_attach(&renc->base, bridge, NULL, + DRM_BRIDGE_ATTACH_NO_CONNECTOR); + if (ret) { + dev_err(rcdu->dev, "failed to attach bridge for output %u\n", + output); + return ret; + } + + /* Create the connector for the chain of bridges. */ + connector = drm_bridge_connector_init(&rcdu->ddev, &renc->base); + if (IS_ERR(connector)) { + dev_err(rcdu->dev, + "failed to created connector for output %u\n", output); + return PTR_ERR(connector); + } + + return drm_connector_attach_encoder(connector, &renc->base); } diff --git a/drivers/gpu/drm/rcar-du/rcar_dw_hdmi.c b/drivers/gpu/drm/rcar-du/rcar_dw_hdmi.c index 7b8ec8310699..18ed14911b98 100644 --- a/drivers/gpu/drm/rcar-du/rcar_dw_hdmi.c +++ b/drivers/gpu/drm/rcar-du/rcar_dw_hdmi.c @@ -75,6 +75,7 @@ static int rcar_hdmi_phy_configure(struct dw_hdmi *hdmi, void *data, } static const struct dw_hdmi_plat_data rcar_dw_hdmi_plat_data = { + .output_port = 1, .mode_valid = rcar_hdmi_mode_valid, .configure_phy = rcar_hdmi_phy_configure, }; diff --git a/drivers/gpu/drm/rcar-du/rcar_lvds.c b/drivers/gpu/drm/rcar-du/rcar_lvds.c index 70dbbe44bb23..d061b8de748f 100644 --- a/drivers/gpu/drm/rcar-du/rcar_lvds.c +++ b/drivers/gpu/drm/rcar-du/rcar_lvds.c @@ -63,7 +63,6 @@ struct rcar_lvds { struct drm_bridge bridge; struct drm_bridge *next_bridge; - struct drm_connector connector; struct drm_panel *panel; void __iomem *mmio; @@ -80,74 +79,12 @@ struct rcar_lvds { #define bridge_to_rcar_lvds(b) \ container_of(b, struct rcar_lvds, bridge) -#define connector_to_rcar_lvds(c) \ - container_of(c, struct rcar_lvds, connector) - static void rcar_lvds_write(struct rcar_lvds *lvds, u32 reg, u32 data) { iowrite32(data, lvds->mmio + reg); } /* ----------------------------------------------------------------------------- - * Connector & Panel - */ - -static int rcar_lvds_connector_get_modes(struct drm_connector *connector) -{ - struct rcar_lvds *lvds = connector_to_rcar_lvds(connector); - - return drm_panel_get_modes(lvds->panel, connector); -} - -static int rcar_lvds_connector_atomic_check(struct drm_connector *connector, - struct drm_atomic_state *state) -{ - struct rcar_lvds *lvds = connector_to_rcar_lvds(connector); - const struct drm_display_mode *panel_mode; - struct drm_connector_state *conn_state; - struct drm_crtc_state *crtc_state; - - conn_state = drm_atomic_get_new_connector_state(state, connector); - if (!conn_state->crtc) - return 0; - - if (list_empty(&connector->modes)) { - dev_dbg(lvds->dev, "connector: empty modes list\n"); - return -EINVAL; - } - - panel_mode = list_first_entry(&connector->modes, - struct drm_display_mode, head); - - /* We're not allowed to modify the resolution. */ - crtc_state = drm_atomic_get_crtc_state(state, conn_state->crtc); - if (IS_ERR(crtc_state)) - return PTR_ERR(crtc_state); - - if (crtc_state->mode.hdisplay != panel_mode->hdisplay || - crtc_state->mode.vdisplay != panel_mode->vdisplay) - return -EINVAL; - - /* The flat panel mode is fixed, just copy it to the adjusted mode. */ - drm_mode_copy(&crtc_state->adjusted_mode, panel_mode); - - return 0; -} - -static const struct drm_connector_helper_funcs rcar_lvds_conn_helper_funcs = { - .get_modes = rcar_lvds_connector_get_modes, - .atomic_check = rcar_lvds_connector_atomic_check, -}; - -static const struct drm_connector_funcs rcar_lvds_conn_funcs = { - .reset = drm_atomic_helper_connector_reset, - .fill_modes = drm_helper_probe_single_connector_modes, - .destroy = drm_connector_cleanup, - .atomic_duplicate_state = drm_atomic_helper_connector_duplicate_state, - .atomic_destroy_state = drm_atomic_helper_connector_destroy_state, -}; - -/* ----------------------------------------------------------------------------- * PLL Setup */ @@ -583,11 +520,6 @@ static void __rcar_lvds_atomic_enable(struct drm_bridge *bridge, /* Turn the output on. */ lvdcr0 |= LVDCR0_LVRES; rcar_lvds_write(lvds, LVDCR0, lvdcr0); - - if (lvds->panel) { - drm_panel_prepare(lvds->panel); - drm_panel_enable(lvds->panel); - } } static void rcar_lvds_atomic_enable(struct drm_bridge *bridge, @@ -609,11 +541,6 @@ static void rcar_lvds_atomic_disable(struct drm_bridge *bridge, { struct rcar_lvds *lvds = bridge_to_rcar_lvds(bridge); - if (lvds->panel) { - drm_panel_disable(lvds->panel); - drm_panel_unprepare(lvds->panel); - } - rcar_lvds_write(lvds, LVDCR0, 0); rcar_lvds_write(lvds, LVDCR1, 0); rcar_lvds_write(lvds, LVDPLLCR, 0); @@ -648,45 +575,13 @@ static int rcar_lvds_attach(struct drm_bridge *bridge, enum drm_bridge_attach_flags flags) { struct rcar_lvds *lvds = bridge_to_rcar_lvds(bridge); - struct drm_connector *connector = &lvds->connector; - struct drm_encoder *encoder = bridge->encoder; - int ret; - - /* If we have a next bridge just attach it. */ - if (lvds->next_bridge) - return drm_bridge_attach(bridge->encoder, lvds->next_bridge, - bridge, flags); - - if (flags & DRM_BRIDGE_ATTACH_NO_CONNECTOR) { - DRM_ERROR("Fix bridge driver to make connector optional!"); - return -EINVAL; - } - - /* Otherwise if we have a panel, create a connector. */ - if (!lvds->panel) - return 0; - - ret = drm_connector_init(bridge->dev, connector, &rcar_lvds_conn_funcs, - DRM_MODE_CONNECTOR_LVDS); - if (ret < 0) - return ret; - - drm_connector_helper_add(connector, &rcar_lvds_conn_helper_funcs); - - ret = drm_connector_attach_encoder(connector, encoder); - if (ret < 0) - return ret; - - return 0; -} -static void rcar_lvds_detach(struct drm_bridge *bridge) -{ + return drm_bridge_attach(bridge->encoder, lvds->next_bridge, bridge, + flags); } static const struct drm_bridge_funcs rcar_lvds_bridge_ops = { .attach = rcar_lvds_attach, - .detach = rcar_lvds_detach, .atomic_duplicate_state = drm_atomic_helper_bridge_duplicate_state, .atomic_destroy_state = drm_atomic_helper_bridge_destroy_state, .atomic_reset = drm_atomic_helper_bridge_reset, @@ -759,7 +654,7 @@ static int rcar_lvds_parse_dt_companion(struct rcar_lvds *lvds) * that we are expected to generate even pixels from the primary * encoder, and odd pixels from the companion encoder. */ - if (lvds->next_bridge && lvds->next_bridge->timings && + if (lvds->next_bridge->timings && lvds->next_bridge->timings->dual_link) lvds->link_type = RCAR_LVDS_DUAL_LINK_EVEN_ODD_PIXELS; else @@ -811,6 +706,15 @@ static int rcar_lvds_parse_dt(struct rcar_lvds *lvds) if (ret) goto done; + if (lvds->panel) { + lvds->next_bridge = devm_drm_panel_bridge_add(lvds->dev, + lvds->panel); + if (IS_ERR_OR_NULL(lvds->next_bridge)) { + ret = -EINVAL; + goto done; + } + } + if (lvds->info->quirks & RCAR_LVDS_QUIRK_DUAL_LINK) ret = rcar_lvds_parse_dt_companion(lvds); @@ -839,9 +743,8 @@ static struct clk *rcar_lvds_get_clock(struct rcar_lvds *lvds, const char *name, if (PTR_ERR(clk) == -ENOENT && optional) return NULL; - if (PTR_ERR(clk) != -EPROBE_DEFER) - dev_err(lvds->dev, "failed to get %s clock\n", - name ? name : "module"); + dev_err_probe(lvds->dev, PTR_ERR(clk), "failed to get %s clock\n", + name ? name : "module"); return clk; } @@ -919,7 +822,6 @@ static int rcar_lvds_probe(struct platform_device *pdev) if (ret < 0) return ret; - lvds->bridge.driver_private = lvds; lvds->bridge.funcs = &rcar_lvds_bridge_ops; lvds->bridge.of_node = pdev->dev.of_node; diff --git a/drivers/gpu/drm/rockchip/Kconfig b/drivers/gpu/drm/rockchip/Kconfig index cb25c0e8fc9b..558f1b58bd69 100644 --- a/drivers/gpu/drm/rockchip/Kconfig +++ b/drivers/gpu/drm/rockchip/Kconfig @@ -10,6 +10,8 @@ config DRM_ROCKCHIP select DRM_DW_HDMI if ROCKCHIP_DW_HDMI select DRM_DW_MIPI_DSI if ROCKCHIP_DW_MIPI_DSI select DRM_RGB if ROCKCHIP_RGB + select GENERIC_PHY if ROCKCHIP_DW_MIPI_DSI + select GENERIC_PHY_MIPI_DPHY if ROCKCHIP_DW_MIPI_DSI select SND_SOC_HDMI_CODEC if ROCKCHIP_CDN_DP && SND_SOC help Choose this option if you have a Rockchip soc chipset. diff --git a/drivers/gpu/drm/rockchip/dw-mipi-dsi-rockchip.c b/drivers/gpu/drm/rockchip/dw-mipi-dsi-rockchip.c index ec7729d18cb8..a2262bee5aa4 100644 --- a/drivers/gpu/drm/rockchip/dw-mipi-dsi-rockchip.c +++ b/drivers/gpu/drm/rockchip/dw-mipi-dsi-rockchip.c @@ -14,6 +14,7 @@ #include <linux/of_device.h> #include <linux/phy/phy.h> #include <linux/pm_runtime.h> +#include <linux/phy/phy.h> #include <linux/regmap.h> #include <video/mipi_display.h> @@ -125,7 +126,9 @@ #define BANDGAP_AND_BIAS_CONTROL 0x20 #define TERMINATION_RESISTER_CONTROL 0x21 #define AFE_BIAS_BANDGAP_ANALOG_PROGRAMMABILITY 0x22 +#define HS_RX_CONTROL_OF_LANE_CLK 0x34 #define HS_RX_CONTROL_OF_LANE_0 0x44 +#define HS_RX_CONTROL_OF_LANE_1 0x54 #define HS_TX_CLOCK_LANE_REQUEST_STATE_TIME_CONTROL 0x60 #define HS_TX_CLOCK_LANE_PREPARE_STATE_TIME_CONTROL 0x61 #define HS_TX_CLOCK_LANE_HS_ZERO_STATE_TIME_CONTROL 0x62 @@ -137,6 +140,9 @@ #define HS_TX_DATA_LANE_HS_ZERO_STATE_TIME_CONTROL 0x72 #define HS_TX_DATA_LANE_TRAIL_STATE_TIME_CONTROL 0x73 #define HS_TX_DATA_LANE_EXIT_STATE_TIME_CONTROL 0x74 +#define HS_RX_DATA_LANE_THS_SETTLE_CONTROL 0x75 +#define HS_RX_CONTROL_OF_LANE_2 0x84 +#define HS_RX_CONTROL_OF_LANE_3 0x94 #define DW_MIPI_NEEDS_PHY_CFG_CLK BIT(0) #define DW_MIPI_NEEDS_GRF_CLK BIT(1) @@ -171,12 +177,20 @@ #define RK3399_TXRX_MASTERSLAVEZ BIT(7) #define RK3399_TXRX_ENABLECLK BIT(6) #define RK3399_TXRX_BASEDIR BIT(5) +#define RK3399_TXRX_SRC_SEL_ISP0 BIT(4) +#define RK3399_TXRX_TURNREQUEST GENMASK(3, 0) #define HIWORD_UPDATE(val, mask) (val | (mask) << 16) #define to_dsi(nm) container_of(nm, struct dw_mipi_dsi_rockchip, nm) enum { + DW_DSI_USAGE_IDLE, + DW_DSI_USAGE_DSI, + DW_DSI_USAGE_PHY, +}; + +enum { BANDGAP_97_07, BANDGAP_98_05, BANDGAP_99_02, @@ -213,6 +227,10 @@ struct rockchip_dw_dsi_chip_data { u32 lanecfg2_grf_reg; u32 lanecfg2; + int (*dphy_rx_init)(struct phy *phy); + int (*dphy_rx_power_on)(struct phy *phy); + int (*dphy_rx_power_off)(struct phy *phy); + unsigned int flags; unsigned int max_data_lanes; }; @@ -223,6 +241,7 @@ struct dw_mipi_dsi_rockchip { void __iomem *base; struct regmap *grf_regmap; + struct clk *pclk; struct clk *pllref_clk; struct clk *grf_clk; struct clk *phy_cfg_clk; @@ -235,6 +254,12 @@ struct dw_mipi_dsi_rockchip { struct phy *phy; union phy_configure_opts phy_opts; + /* being a phy for other mipi hosts */ + unsigned int usage_mode; + struct mutex usage_mutex; + struct phy *dphy; + struct phy_configure_opts_mipi_dphy dphy_config; + unsigned int lane_mbps; /* per lane */ u16 input_div; u16 feedback_div; @@ -978,6 +1003,17 @@ static int dw_mipi_dsi_rockchip_host_attach(void *priv_data, struct device *second; int ret; + mutex_lock(&dsi->usage_mutex); + + if (dsi->usage_mode != DW_DSI_USAGE_IDLE) { + DRM_DEV_ERROR(dsi->dev, "dsi controller already in use\n"); + mutex_unlock(&dsi->usage_mutex); + return -EBUSY; + } + + dsi->usage_mode = DW_DSI_USAGE_DSI; + mutex_unlock(&dsi->usage_mutex); + ret = component_add(dsi->dev, &dw_mipi_dsi_rockchip_ops); if (ret) { DRM_DEV_ERROR(dsi->dev, "Failed to register component: %d\n", @@ -1013,6 +1049,10 @@ static int dw_mipi_dsi_rockchip_host_detach(void *priv_data, component_del(dsi->dev, &dw_mipi_dsi_rockchip_ops); + mutex_lock(&dsi->usage_mutex); + dsi->usage_mode = DW_DSI_USAGE_IDLE; + mutex_unlock(&dsi->usage_mutex); + return 0; } @@ -1021,11 +1061,227 @@ static const struct dw_mipi_dsi_host_ops dw_mipi_dsi_rockchip_host_ops = { .detach = dw_mipi_dsi_rockchip_host_detach, }; +static int dw_mipi_dsi_rockchip_dphy_bind(struct device *dev, + struct device *master, + void *data) +{ + /* + * Nothing to do when used as a dphy. + * Just make the rest of Rockchip-DRM happy + * by being here. + */ + + return 0; +} + +static void dw_mipi_dsi_rockchip_dphy_unbind(struct device *dev, + struct device *master, + void *data) +{ + /* Nothing to do when used as a dphy. */ +} + +static const struct component_ops dw_mipi_dsi_rockchip_dphy_ops = { + .bind = dw_mipi_dsi_rockchip_dphy_bind, + .unbind = dw_mipi_dsi_rockchip_dphy_unbind, +}; + +static int dw_mipi_dsi_dphy_init(struct phy *phy) +{ + struct dw_mipi_dsi_rockchip *dsi = phy_get_drvdata(phy); + int ret; + + mutex_lock(&dsi->usage_mutex); + + if (dsi->usage_mode != DW_DSI_USAGE_IDLE) { + DRM_DEV_ERROR(dsi->dev, "dsi controller already in use\n"); + mutex_unlock(&dsi->usage_mutex); + return -EBUSY; + } + + dsi->usage_mode = DW_DSI_USAGE_PHY; + mutex_unlock(&dsi->usage_mutex); + + ret = component_add(dsi->dev, &dw_mipi_dsi_rockchip_dphy_ops); + if (ret < 0) + goto err_graph; + + if (dsi->cdata->dphy_rx_init) { + ret = clk_prepare_enable(dsi->pclk); + if (ret < 0) + goto err_init; + + ret = clk_prepare_enable(dsi->grf_clk); + if (ret) { + clk_disable_unprepare(dsi->pclk); + goto err_init; + } + + ret = dsi->cdata->dphy_rx_init(phy); + clk_disable_unprepare(dsi->grf_clk); + clk_disable_unprepare(dsi->pclk); + if (ret < 0) + goto err_init; + } + + return 0; + +err_init: + component_del(dsi->dev, &dw_mipi_dsi_rockchip_dphy_ops); +err_graph: + mutex_lock(&dsi->usage_mutex); + dsi->usage_mode = DW_DSI_USAGE_IDLE; + mutex_unlock(&dsi->usage_mutex); + + return ret; +} + +static int dw_mipi_dsi_dphy_exit(struct phy *phy) +{ + struct dw_mipi_dsi_rockchip *dsi = phy_get_drvdata(phy); + + component_del(dsi->dev, &dw_mipi_dsi_rockchip_dphy_ops); + + mutex_lock(&dsi->usage_mutex); + dsi->usage_mode = DW_DSI_USAGE_IDLE; + mutex_unlock(&dsi->usage_mutex); + + return 0; +} + +static int dw_mipi_dsi_dphy_configure(struct phy *phy, union phy_configure_opts *opts) +{ + struct phy_configure_opts_mipi_dphy *config = &opts->mipi_dphy; + struct dw_mipi_dsi_rockchip *dsi = phy_get_drvdata(phy); + int ret; + + ret = phy_mipi_dphy_config_validate(&opts->mipi_dphy); + if (ret) + return ret; + + dsi->dphy_config = *config; + dsi->lane_mbps = div_u64(config->hs_clk_rate, 1000 * 1000 * 1); + + return 0; +} + +static int dw_mipi_dsi_dphy_power_on(struct phy *phy) +{ + struct dw_mipi_dsi_rockchip *dsi = phy_get_drvdata(phy); + int i, ret; + + DRM_DEV_DEBUG(dsi->dev, "lanes %d - data_rate_mbps %u\n", + dsi->dphy_config.lanes, dsi->lane_mbps); + + i = max_mbps_to_parameter(dsi->lane_mbps); + if (i < 0) { + DRM_DEV_ERROR(dsi->dev, "failed to get parameter for %dmbps clock\n", + dsi->lane_mbps); + return i; + } + + ret = pm_runtime_get_sync(dsi->dev); + if (ret < 0) { + DRM_DEV_ERROR(dsi->dev, "failed to enable device: %d\n", ret); + return ret; + } + + ret = clk_prepare_enable(dsi->pclk); + if (ret) { + DRM_DEV_ERROR(dsi->dev, "Failed to enable pclk: %d\n", ret); + goto err_pclk; + } + + ret = clk_prepare_enable(dsi->grf_clk); + if (ret) { + DRM_DEV_ERROR(dsi->dev, "Failed to enable grf_clk: %d\n", ret); + goto err_grf_clk; + } + + ret = clk_prepare_enable(dsi->phy_cfg_clk); + if (ret) { + DRM_DEV_ERROR(dsi->dev, "Failed to enable phy_cfg_clk: %d\n", ret); + goto err_phy_cfg_clk; + } + + /* do soc-variant specific init */ + if (dsi->cdata->dphy_rx_power_on) { + ret = dsi->cdata->dphy_rx_power_on(phy); + if (ret < 0) { + DRM_DEV_ERROR(dsi->dev, "hardware-specific phy bringup failed: %d\n", ret); + goto err_pwr_on; + } + } + + /* + * Configure hsfreqrange according to frequency values + * Set clock lane and hsfreqrange by lane0(test code 0x44) + */ + dw_mipi_dsi_phy_write(dsi, HS_RX_CONTROL_OF_LANE_CLK, 0); + dw_mipi_dsi_phy_write(dsi, HS_RX_CONTROL_OF_LANE_0, + HSFREQRANGE_SEL(dppa_map[i].hsfreqrange)); + dw_mipi_dsi_phy_write(dsi, HS_RX_CONTROL_OF_LANE_1, 0); + dw_mipi_dsi_phy_write(dsi, HS_RX_CONTROL_OF_LANE_2, 0); + dw_mipi_dsi_phy_write(dsi, HS_RX_CONTROL_OF_LANE_3, 0); + + /* Normal operation */ + dw_mipi_dsi_phy_write(dsi, 0x0, 0); + + clk_disable_unprepare(dsi->phy_cfg_clk); + clk_disable_unprepare(dsi->grf_clk); + + return ret; + +err_pwr_on: + clk_disable_unprepare(dsi->phy_cfg_clk); +err_phy_cfg_clk: + clk_disable_unprepare(dsi->grf_clk); +err_grf_clk: + clk_disable_unprepare(dsi->pclk); +err_pclk: + pm_runtime_put(dsi->dev); + return ret; +} + +static int dw_mipi_dsi_dphy_power_off(struct phy *phy) +{ + struct dw_mipi_dsi_rockchip *dsi = phy_get_drvdata(phy); + int ret; + + ret = clk_prepare_enable(dsi->grf_clk); + if (ret) { + DRM_DEV_ERROR(dsi->dev, "Failed to enable grf_clk: %d\n", ret); + return ret; + } + + if (dsi->cdata->dphy_rx_power_off) { + ret = dsi->cdata->dphy_rx_power_off(phy); + if (ret < 0) + DRM_DEV_ERROR(dsi->dev, "hardware-specific phy shutdown failed: %d\n", ret); + } + + clk_disable_unprepare(dsi->grf_clk); + clk_disable_unprepare(dsi->pclk); + + pm_runtime_put(dsi->dev); + + return ret; +} + +static const struct phy_ops dw_mipi_dsi_dphy_ops = { + .configure = dw_mipi_dsi_dphy_configure, + .power_on = dw_mipi_dsi_dphy_power_on, + .power_off = dw_mipi_dsi_dphy_power_off, + .init = dw_mipi_dsi_dphy_init, + .exit = dw_mipi_dsi_dphy_exit, +}; + static int dw_mipi_dsi_rockchip_probe(struct platform_device *pdev) { struct device *dev = &pdev->dev; struct device_node *np = dev->of_node; struct dw_mipi_dsi_rockchip *dsi; + struct phy_provider *phy_provider; struct resource *res; const struct rockchip_dw_dsi_chip_data *cdata = of_device_get_match_data(dev); @@ -1065,6 +1321,13 @@ static int dw_mipi_dsi_rockchip_probe(struct platform_device *pdev) return ret; } + dsi->pclk = devm_clk_get(dev, "pclk"); + if (IS_ERR(dsi->pclk)) { + ret = PTR_ERR(dsi->pclk); + DRM_DEV_ERROR(dev, "Unable to get pclk: %d\n", ret); + return ret; + } + dsi->pllref_clk = devm_clk_get(dev, "ref"); if (IS_ERR(dsi->pllref_clk)) { if (dsi->phy) { @@ -1115,6 +1378,19 @@ static int dw_mipi_dsi_rockchip_probe(struct platform_device *pdev) dsi->pdata.priv_data = dsi; platform_set_drvdata(pdev, dsi); + mutex_init(&dsi->usage_mutex); + + dsi->dphy = devm_phy_create(dev, NULL, &dw_mipi_dsi_dphy_ops); + if (IS_ERR(dsi->dphy)) { + DRM_DEV_ERROR(&pdev->dev, "failed to create PHY\n"); + return PTR_ERR(dsi->dphy); + } + + phy_set_drvdata(dsi->dphy, dsi); + phy_provider = devm_of_phy_provider_register(dev, of_phy_simple_xlate); + if (IS_ERR(phy_provider)) + return PTR_ERR(phy_provider); + dsi->dmd = dw_mipi_dsi_probe(pdev, &dsi->pdata); if (IS_ERR(dsi->dmd)) { ret = PTR_ERR(dsi->dmd); @@ -1178,6 +1454,75 @@ static const struct rockchip_dw_dsi_chip_data rk3288_chip_data[] = { { /* sentinel */ } }; +static int rk3399_dphy_tx1rx1_init(struct phy *phy) +{ + struct dw_mipi_dsi_rockchip *dsi = phy_get_drvdata(phy); + + /* + * Set TX1RX1 source to isp1. + * Assume ISP0 is supplied by the RX0 dphy. + */ + regmap_write(dsi->grf_regmap, RK3399_GRF_SOC_CON24, + HIWORD_UPDATE(0, RK3399_TXRX_SRC_SEL_ISP0)); + regmap_write(dsi->grf_regmap, RK3399_GRF_SOC_CON24, + HIWORD_UPDATE(0, RK3399_TXRX_MASTERSLAVEZ)); + regmap_write(dsi->grf_regmap, RK3399_GRF_SOC_CON24, + HIWORD_UPDATE(0, RK3399_TXRX_BASEDIR)); + regmap_write(dsi->grf_regmap, RK3399_GRF_SOC_CON23, + HIWORD_UPDATE(0, RK3399_DSI1_ENABLE)); + + return 0; +} + +static int rk3399_dphy_tx1rx1_power_on(struct phy *phy) +{ + struct dw_mipi_dsi_rockchip *dsi = phy_get_drvdata(phy); + + /* tester reset pulse */ + dsi_write(dsi, DSI_PHY_TST_CTRL0, PHY_TESTCLK | PHY_TESTCLR); + usleep_range(100, 150); + + regmap_write(dsi->grf_regmap, RK3399_GRF_SOC_CON24, + HIWORD_UPDATE(0, RK3399_TXRX_MASTERSLAVEZ)); + regmap_write(dsi->grf_regmap, RK3399_GRF_SOC_CON24, + HIWORD_UPDATE(RK3399_TXRX_BASEDIR, RK3399_TXRX_BASEDIR)); + + regmap_write(dsi->grf_regmap, RK3399_GRF_SOC_CON23, + HIWORD_UPDATE(0, RK3399_DSI1_FORCERXMODE)); + regmap_write(dsi->grf_regmap, RK3399_GRF_SOC_CON23, + HIWORD_UPDATE(0, RK3399_DSI1_FORCETXSTOPMODE)); + + /* Disable lane turn around, which is ignored in receive mode */ + regmap_write(dsi->grf_regmap, RK3399_GRF_SOC_CON24, + HIWORD_UPDATE(0, RK3399_TXRX_TURNREQUEST)); + regmap_write(dsi->grf_regmap, RK3399_GRF_SOC_CON23, + HIWORD_UPDATE(RK3399_DSI1_TURNDISABLE, + RK3399_DSI1_TURNDISABLE)); + usleep_range(100, 150); + + dsi_write(dsi, DSI_PHY_TST_CTRL0, PHY_TESTCLK | PHY_UNTESTCLR); + usleep_range(100, 150); + + /* Enable dphy lanes */ + regmap_write(dsi->grf_regmap, RK3399_GRF_SOC_CON23, + HIWORD_UPDATE(GENMASK(dsi->dphy_config.lanes - 1, 0), + RK3399_DSI1_ENABLE)); + + usleep_range(100, 150); + + return 0; +} + +static int rk3399_dphy_tx1rx1_power_off(struct phy *phy) +{ + struct dw_mipi_dsi_rockchip *dsi = phy_get_drvdata(phy); + + regmap_write(dsi->grf_regmap, RK3399_GRF_SOC_CON23, + HIWORD_UPDATE(0, RK3399_DSI1_ENABLE)); + + return 0; +} + static const struct rockchip_dw_dsi_chip_data rk3399_chip_data[] = { { .reg = 0xff960000, @@ -1220,6 +1565,10 @@ static const struct rockchip_dw_dsi_chip_data rk3399_chip_data[] = { .flags = DW_MIPI_NEEDS_PHY_CFG_CLK | DW_MIPI_NEEDS_GRF_CLK, .max_data_lanes = 4, + + .dphy_rx_init = rk3399_dphy_tx1rx1_init, + .dphy_rx_power_on = rk3399_dphy_tx1rx1_power_on, + .dphy_rx_power_off = rk3399_dphy_tx1rx1_power_off, }, { /* sentinel */ } }; diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_drv.c b/drivers/gpu/drm/rockchip/rockchip_drm_drv.c index b730b8d5d949..bfba9793d238 100644 --- a/drivers/gpu/drm/rockchip/rockchip_drm_drv.c +++ b/drivers/gpu/drm/rockchip/rockchip_drm_drv.c @@ -116,7 +116,7 @@ static int rockchip_drm_bind(struct device *dev) int ret; /* Remove existing drivers that may own the framebuffer memory. */ - ret = drm_aperture_remove_framebuffers(false, "rockchip-drm-fb"); + ret = drm_aperture_remove_framebuffers(false, &rockchip_drm_driver); if (ret) { DRM_DEV_ERROR(dev, "Failed to remove existing framebuffers - %d.\n", @@ -162,12 +162,6 @@ static int rockchip_drm_bind(struct device *dev) drm_mode_config_reset(drm_dev); - /* - * enable drm irq mode. - * - with irq_enabled = true, we can use the vblank feature. - */ - drm_dev->irq_enabled = true; - ret = rockchip_drm_fbdev_init(drm_dev); if (ret) goto err_unbind_all; diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_vop.c b/drivers/gpu/drm/rockchip/rockchip_drm_vop.c index f5b9028a16a3..ba9e14da41b4 100644 --- a/drivers/gpu/drm/rockchip/rockchip_drm_vop.c +++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop.c @@ -1110,7 +1110,6 @@ static const struct drm_plane_helper_funcs plane_helper_funcs = { .atomic_disable = vop_plane_atomic_disable, .atomic_async_check = vop_plane_atomic_async_check, .atomic_async_update = vop_plane_atomic_async_update, - .prepare_fb = drm_gem_plane_helper_prepare_fb, }; static const struct drm_plane_funcs vop_plane_funcs = { diff --git a/drivers/gpu/drm/rockchip/rockchip_lvds.c b/drivers/gpu/drm/rockchip/rockchip_lvds.c index 489d63c05c0d..551653940e39 100644 --- a/drivers/gpu/drm/rockchip/rockchip_lvds.c +++ b/drivers/gpu/drm/rockchip/rockchip_lvds.c @@ -636,11 +636,8 @@ static int rockchip_lvds_bind(struct device *dev, struct device *master, } } else { ret = drm_bridge_attach(encoder, lvds->bridge, NULL, 0); - if (ret) { - DRM_DEV_ERROR(drm_dev->dev, - "failed to attach bridge: %d\n", ret); + if (ret) goto err_free_encoder; - } } pm_runtime_enable(dev); diff --git a/drivers/gpu/drm/rockchip/rockchip_rgb.c b/drivers/gpu/drm/rockchip/rockchip_rgb.c index c079714477d8..d691d9bef8e7 100644 --- a/drivers/gpu/drm/rockchip/rockchip_rgb.c +++ b/drivers/gpu/drm/rockchip/rockchip_rgb.c @@ -143,11 +143,8 @@ struct rockchip_rgb *rockchip_rgb_init(struct device *dev, rgb->bridge = bridge; ret = drm_bridge_attach(encoder, rgb->bridge, NULL, 0); - if (ret) { - DRM_DEV_ERROR(drm_dev->dev, - "failed to attach bridge: %d\n", ret); + if (ret) goto err_free_encoder; - } return rgb; diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c index a2a953693b45..67382621b429 100644 --- a/drivers/gpu/drm/scheduler/sched_main.c +++ b/drivers/gpu/drm/scheduler/sched_main.c @@ -232,7 +232,7 @@ static void drm_sched_start_timeout(struct drm_gpu_scheduler *sched) { if (sched->timeout != MAX_SCHEDULE_TIMEOUT && !list_empty(&sched->pending_list)) - schedule_delayed_work(&sched->work_tdr, sched->timeout); + queue_delayed_work(sched->timeout_wq, &sched->work_tdr, sched->timeout); } /** @@ -244,7 +244,7 @@ static void drm_sched_start_timeout(struct drm_gpu_scheduler *sched) */ void drm_sched_fault(struct drm_gpu_scheduler *sched) { - mod_delayed_work(system_wq, &sched->work_tdr, 0); + mod_delayed_work(sched->timeout_wq, &sched->work_tdr, 0); } EXPORT_SYMBOL(drm_sched_fault); @@ -270,7 +270,7 @@ unsigned long drm_sched_suspend_timeout(struct drm_gpu_scheduler *sched) * Modify the timeout to an arbitrarily large value. This also prevents * the timeout to be restarted when new submissions arrive */ - if (mod_delayed_work(system_wq, &sched->work_tdr, MAX_SCHEDULE_TIMEOUT) + if (mod_delayed_work(sched->timeout_wq, &sched->work_tdr, MAX_SCHEDULE_TIMEOUT) && time_after(sched_timeout, now)) return sched_timeout - now; else @@ -294,7 +294,7 @@ void drm_sched_resume_timeout(struct drm_gpu_scheduler *sched, if (list_empty(&sched->pending_list)) cancel_delayed_work(&sched->work_tdr); else - mod_delayed_work(system_wq, &sched->work_tdr, remaining); + mod_delayed_work(sched->timeout_wq, &sched->work_tdr, remaining); spin_unlock(&sched->job_list_lock); } @@ -802,10 +802,10 @@ static int drm_sched_main(void *param) sched_job = drm_sched_entity_pop_job(entity); - complete(&entity->entity_idle); - - if (!sched_job) + if (!sched_job) { + complete(&entity->entity_idle); continue; + } s_fence = sched_job->s_fence; @@ -814,6 +814,7 @@ static int drm_sched_main(void *param) trace_drm_run_job(sched_job, entity); fence = sched->ops->run_job(sched_job); + complete(&entity->entity_idle); drm_sched_fence_scheduled(s_fence); if (!IS_ERR_OR_NULL(fence)) { @@ -846,6 +847,8 @@ static int drm_sched_main(void *param) * @hw_submission: number of hw submissions that can be in flight * @hang_limit: number of times to allow a job to hang before dropping it * @timeout: timeout value in jiffies for the scheduler + * @timeout_wq: workqueue to use for timeout work. If NULL, the system_wq is + * used * @score: optional score atomic shared with other schedulers * @name: name used for debugging * @@ -853,7 +856,8 @@ static int drm_sched_main(void *param) */ int drm_sched_init(struct drm_gpu_scheduler *sched, const struct drm_sched_backend_ops *ops, - unsigned hw_submission, unsigned hang_limit, long timeout, + unsigned hw_submission, unsigned hang_limit, + long timeout, struct workqueue_struct *timeout_wq, atomic_t *score, const char *name) { int i, ret; @@ -861,6 +865,7 @@ int drm_sched_init(struct drm_gpu_scheduler *sched, sched->hw_submission_limit = hw_submission; sched->name = name; sched->timeout = timeout; + sched->timeout_wq = timeout_wq ? : system_wq; sched->hang_limit = hang_limit; sched->score = score ? score : &sched->_score; for (i = DRM_SCHED_PRIORITY_MIN; i < DRM_SCHED_PRIORITY_COUNT; i++) diff --git a/drivers/gpu/drm/selftests/test-drm_damage_helper.c b/drivers/gpu/drm/selftests/test-drm_damage_helper.c index 9d2bcdf8bc29..1c19a5d3eefb 100644 --- a/drivers/gpu/drm/selftests/test-drm_damage_helper.c +++ b/drivers/gpu/drm/selftests/test-drm_damage_helper.c @@ -6,9 +6,37 @@ #define pr_fmt(fmt) "drm_damage_helper: " fmt #include <drm/drm_damage_helper.h> +#include <drm/drm_plane.h> +#include <drm/drm_drv.h> #include "test-drm_modeset_common.h" +struct drm_driver mock_driver; +static struct drm_device mock_device; +static struct drm_object_properties mock_obj_props; +static struct drm_plane mock_plane; +static struct drm_property mock_prop; + +static void mock_setup(struct drm_plane_state *state) +{ + static bool setup_done = false; + + state->plane = &mock_plane; + + if (setup_done) + return; + + /* just enough so that drm_plane_enable_fb_damage_clips() works */ + mock_device.driver = &mock_driver; + mock_device.mode_config.prop_fb_damage_clips = &mock_prop; + mock_plane.dev = &mock_device; + mock_plane.base.properties = &mock_obj_props; + mock_prop.base.id = 1; /* 0 is an invalid id */ + mock_prop.dev = &mock_device; + + drm_plane_enable_fb_damage_clips(&mock_plane); +} + static void set_plane_src(struct drm_plane_state *state, int x1, int y1, int x2, int y2) { @@ -70,23 +98,29 @@ static bool check_damage_clip(struct drm_plane_state *state, struct drm_rect *r, return true; } +const struct drm_framebuffer fb = { + .width = 2048, + .height = 2048 +}; + +/* common mocked structs many tests need */ +#define MOCK_VARIABLES() \ + struct drm_plane_state old_state; \ + struct drm_plane_state state = { \ + .crtc = ZERO_SIZE_PTR, \ + .fb = (struct drm_framebuffer *) &fb, \ + .visible = true, \ + }; \ + mock_setup(&old_state); \ + mock_setup(&state); + int igt_damage_iter_no_damage(void *ignored) { struct drm_atomic_helper_damage_iter iter; - struct drm_plane_state old_state; struct drm_rect clip; uint32_t num_hits = 0; - struct drm_framebuffer fb = { - .width = 2048, - .height = 2048 - }; - - struct drm_plane_state state = { - .crtc = ZERO_SIZE_PTR, - .fb = &fb, - .visible = true, - }; + MOCK_VARIABLES(); /* Plane src same as fb size. */ set_plane_src(&old_state, 0, 0, fb.width << 16, fb.height << 16); @@ -104,20 +138,10 @@ int igt_damage_iter_no_damage(void *ignored) int igt_damage_iter_no_damage_fractional_src(void *ignored) { struct drm_atomic_helper_damage_iter iter; - struct drm_plane_state old_state; struct drm_rect clip; uint32_t num_hits = 0; - struct drm_framebuffer fb = { - .width = 2048, - .height = 2048 - }; - - struct drm_plane_state state = { - .crtc = ZERO_SIZE_PTR, - .fb = &fb, - .visible = true, - }; + MOCK_VARIABLES(); /* Plane src has fractional part. */ set_plane_src(&old_state, 0x3fffe, 0x3fffe, @@ -137,20 +161,10 @@ int igt_damage_iter_no_damage_fractional_src(void *ignored) int igt_damage_iter_no_damage_src_moved(void *ignored) { struct drm_atomic_helper_damage_iter iter; - struct drm_plane_state old_state; struct drm_rect clip; uint32_t num_hits = 0; - struct drm_framebuffer fb = { - .width = 2048, - .height = 2048 - }; - - struct drm_plane_state state = { - .crtc = ZERO_SIZE_PTR, - .fb = &fb, - .visible = true, - }; + MOCK_VARIABLES(); /* Plane src moved since old plane state. */ set_plane_src(&old_state, 0, 0, 1024 << 16, 768 << 16); @@ -169,20 +183,10 @@ int igt_damage_iter_no_damage_src_moved(void *ignored) int igt_damage_iter_no_damage_fractional_src_moved(void *ignored) { struct drm_atomic_helper_damage_iter iter; - struct drm_plane_state old_state; struct drm_rect clip; uint32_t num_hits = 0; - struct drm_framebuffer fb = { - .width = 2048, - .height = 2048 - }; - - struct drm_plane_state state = { - .crtc = ZERO_SIZE_PTR, - .fb = &fb, - .visible = true, - }; + MOCK_VARIABLES(); /* Plane src has fractional part and it moved since old plane state. */ set_plane_src(&old_state, 0x3fffe, 0x3fffe, @@ -202,20 +206,14 @@ int igt_damage_iter_no_damage_fractional_src_moved(void *ignored) int igt_damage_iter_no_damage_not_visible(void *ignored) { struct drm_atomic_helper_damage_iter iter; - struct drm_plane_state old_state; struct drm_rect clip; uint32_t num_hits = 0; - struct drm_framebuffer fb = { - .width = 2048, - .height = 2048 - }; + MOCK_VARIABLES(); - struct drm_plane_state state = { - .crtc = ZERO_SIZE_PTR, - .fb = &fb, - .visible = false, - }; + state.visible = false; + + mock_setup(&old_state); set_plane_src(&old_state, 0, 0, 1024 << 16, 768 << 16); set_plane_src(&state, 0, 0, 1024 << 16, 768 << 16); @@ -231,19 +229,12 @@ int igt_damage_iter_no_damage_not_visible(void *ignored) int igt_damage_iter_no_damage_no_crtc(void *ignored) { struct drm_atomic_helper_damage_iter iter; - struct drm_plane_state old_state; struct drm_rect clip; uint32_t num_hits = 0; - struct drm_framebuffer fb = { - .width = 2048, - .height = 2048 - }; + MOCK_VARIABLES(); - struct drm_plane_state state = { - .crtc = 0, - .fb = &fb, - }; + state.crtc = NULL; set_plane_src(&old_state, 0, 0, 1024 << 16, 768 << 16); set_plane_src(&state, 0, 0, 1024 << 16, 768 << 16); @@ -268,6 +259,8 @@ int igt_damage_iter_no_damage_no_fb(void *ignored) .fb = 0, }; + mock_setup(&old_state); + set_plane_src(&old_state, 0, 0, 1024 << 16, 768 << 16); set_plane_src(&state, 0, 0, 1024 << 16, 768 << 16); drm_atomic_helper_damage_iter_init(&iter, &old_state, &state); @@ -282,22 +275,12 @@ int igt_damage_iter_no_damage_no_fb(void *ignored) int igt_damage_iter_simple_damage(void *ignored) { struct drm_atomic_helper_damage_iter iter; - struct drm_plane_state old_state; struct drm_property_blob damage_blob; struct drm_mode_rect damage; struct drm_rect clip; uint32_t num_hits = 0; - struct drm_framebuffer fb = { - .width = 2048, - .height = 2048 - }; - - struct drm_plane_state state = { - .crtc = ZERO_SIZE_PTR, - .fb = &fb, - .visible = true, - }; + MOCK_VARIABLES(); set_plane_src(&old_state, 0, 0, 1024 << 16, 768 << 16); set_plane_src(&state, 0, 0, 1024 << 16, 768 << 16); @@ -318,22 +301,12 @@ int igt_damage_iter_simple_damage(void *ignored) int igt_damage_iter_single_damage(void *ignored) { struct drm_atomic_helper_damage_iter iter; - struct drm_plane_state old_state; struct drm_property_blob damage_blob; struct drm_mode_rect damage; struct drm_rect clip; uint32_t num_hits = 0; - struct drm_framebuffer fb = { - .width = 2048, - .height = 2048 - }; - - struct drm_plane_state state = { - .crtc = ZERO_SIZE_PTR, - .fb = &fb, - .visible = true, - }; + MOCK_VARIABLES(); set_plane_src(&old_state, 0, 0, 1024 << 16, 768 << 16); set_plane_src(&state, 0, 0, 1024 << 16, 768 << 16); @@ -353,22 +326,12 @@ int igt_damage_iter_single_damage(void *ignored) int igt_damage_iter_single_damage_intersect_src(void *ignored) { struct drm_atomic_helper_damage_iter iter; - struct drm_plane_state old_state; struct drm_property_blob damage_blob; struct drm_mode_rect damage; struct drm_rect clip; uint32_t num_hits = 0; - struct drm_framebuffer fb = { - .width = 2048, - .height = 2048 - }; - - struct drm_plane_state state = { - .crtc = ZERO_SIZE_PTR, - .fb = &fb, - .visible = true, - }; + MOCK_VARIABLES(); set_plane_src(&old_state, 0, 0, 1024 << 16, 768 << 16); set_plane_src(&state, 0, 0, 1024 << 16, 768 << 16); @@ -389,22 +352,12 @@ int igt_damage_iter_single_damage_intersect_src(void *ignored) int igt_damage_iter_single_damage_outside_src(void *ignored) { struct drm_atomic_helper_damage_iter iter; - struct drm_plane_state old_state; struct drm_property_blob damage_blob; struct drm_mode_rect damage; struct drm_rect clip; uint32_t num_hits = 0; - struct drm_framebuffer fb = { - .width = 2048, - .height = 2048 - }; - - struct drm_plane_state state = { - .crtc = ZERO_SIZE_PTR, - .fb = &fb, - .visible = true, - }; + MOCK_VARIABLES(); set_plane_src(&old_state, 0, 0, 1024 << 16, 768 << 16); set_plane_src(&state, 0, 0, 1024 << 16, 768 << 16); @@ -424,22 +377,12 @@ int igt_damage_iter_single_damage_outside_src(void *ignored) int igt_damage_iter_single_damage_fractional_src(void *ignored) { struct drm_atomic_helper_damage_iter iter; - struct drm_plane_state old_state; struct drm_property_blob damage_blob; struct drm_mode_rect damage; struct drm_rect clip; uint32_t num_hits = 0; - struct drm_framebuffer fb = { - .width = 2048, - .height = 2048 - }; - - struct drm_plane_state state = { - .crtc = ZERO_SIZE_PTR, - .fb = &fb, - .visible = true, - }; + MOCK_VARIABLES(); /* Plane src has fractional part. */ set_plane_src(&old_state, 0x40002, 0x40002, @@ -462,22 +405,12 @@ int igt_damage_iter_single_damage_fractional_src(void *ignored) int igt_damage_iter_single_damage_intersect_fractional_src(void *ignored) { struct drm_atomic_helper_damage_iter iter; - struct drm_plane_state old_state; struct drm_property_blob damage_blob; struct drm_mode_rect damage; struct drm_rect clip; uint32_t num_hits = 0; - struct drm_framebuffer fb = { - .width = 2048, - .height = 2048 - }; - - struct drm_plane_state state = { - .crtc = ZERO_SIZE_PTR, - .fb = &fb, - .visible = true, - }; + MOCK_VARIABLES(); /* Plane src has fractional part. */ set_plane_src(&old_state, 0x40002, 0x40002, @@ -501,22 +434,12 @@ int igt_damage_iter_single_damage_intersect_fractional_src(void *ignored) int igt_damage_iter_single_damage_outside_fractional_src(void *ignored) { struct drm_atomic_helper_damage_iter iter; - struct drm_plane_state old_state; struct drm_property_blob damage_blob; struct drm_mode_rect damage; struct drm_rect clip; uint32_t num_hits = 0; - struct drm_framebuffer fb = { - .width = 2048, - .height = 2048 - }; - - struct drm_plane_state state = { - .crtc = ZERO_SIZE_PTR, - .fb = &fb, - .visible = true, - }; + MOCK_VARIABLES(); /* Plane src has fractional part. */ set_plane_src(&old_state, 0x40002, 0x40002, @@ -539,22 +462,12 @@ int igt_damage_iter_single_damage_outside_fractional_src(void *ignored) int igt_damage_iter_single_damage_src_moved(void *ignored) { struct drm_atomic_helper_damage_iter iter; - struct drm_plane_state old_state; struct drm_property_blob damage_blob; struct drm_mode_rect damage; struct drm_rect clip; uint32_t num_hits = 0; - struct drm_framebuffer fb = { - .width = 2048, - .height = 2048 - }; - - struct drm_plane_state state = { - .crtc = ZERO_SIZE_PTR, - .fb = &fb, - .visible = true, - }; + MOCK_VARIABLES(); /* Plane src moved since old plane state. */ set_plane_src(&old_state, 0, 0, 1024 << 16, 768 << 16); @@ -576,22 +489,12 @@ int igt_damage_iter_single_damage_src_moved(void *ignored) int igt_damage_iter_single_damage_fractional_src_moved(void *ignored) { struct drm_atomic_helper_damage_iter iter; - struct drm_plane_state old_state; struct drm_property_blob damage_blob; struct drm_mode_rect damage; struct drm_rect clip; uint32_t num_hits = 0; - struct drm_framebuffer fb = { - .width = 2048, - .height = 2048 - }; - - struct drm_plane_state state = { - .crtc = ZERO_SIZE_PTR, - .fb = &fb, - .visible = true, - }; + MOCK_VARIABLES(); /* Plane src with fractional part moved since old plane state. */ set_plane_src(&old_state, 0x3fffe, 0x3fffe, @@ -615,22 +518,12 @@ int igt_damage_iter_single_damage_fractional_src_moved(void *ignored) int igt_damage_iter_damage(void *ignored) { struct drm_atomic_helper_damage_iter iter; - struct drm_plane_state old_state; struct drm_property_blob damage_blob; struct drm_mode_rect damage[2]; struct drm_rect clip; uint32_t num_hits = 0; - struct drm_framebuffer fb = { - .width = 2048, - .height = 2048 - }; - - struct drm_plane_state state = { - .crtc = ZERO_SIZE_PTR, - .fb = &fb, - .visible = true, - }; + MOCK_VARIABLES(); set_plane_src(&old_state, 0, 0, 1024 << 16, 768 << 16); set_plane_src(&state, 0, 0, 1024 << 16, 768 << 16); @@ -656,22 +549,12 @@ int igt_damage_iter_damage(void *ignored) int igt_damage_iter_damage_one_intersect(void *ignored) { struct drm_atomic_helper_damage_iter iter; - struct drm_plane_state old_state; struct drm_property_blob damage_blob; struct drm_mode_rect damage[2]; struct drm_rect clip; uint32_t num_hits = 0; - struct drm_framebuffer fb = { - .width = 2048, - .height = 2048 - }; - - struct drm_plane_state state = { - .crtc = ZERO_SIZE_PTR, - .fb = &fb, - .visible = true, - }; + MOCK_VARIABLES(); set_plane_src(&old_state, 0x40002, 0x40002, 0x40002 + (1024 << 16), 0x40002 + (768 << 16)); @@ -699,22 +582,12 @@ int igt_damage_iter_damage_one_intersect(void *ignored) int igt_damage_iter_damage_one_outside(void *ignored) { struct drm_atomic_helper_damage_iter iter; - struct drm_plane_state old_state; struct drm_property_blob damage_blob; struct drm_mode_rect damage[2]; struct drm_rect clip; uint32_t num_hits = 0; - struct drm_framebuffer fb = { - .width = 2048, - .height = 2048 - }; - - struct drm_plane_state state = { - .crtc = ZERO_SIZE_PTR, - .fb = &fb, - .visible = true, - }; + MOCK_VARIABLES(); set_plane_src(&old_state, 0, 0, 1024 << 16, 768 << 16); set_plane_src(&state, 0, 0, 1024 << 16, 768 << 16); @@ -736,22 +609,12 @@ int igt_damage_iter_damage_one_outside(void *ignored) int igt_damage_iter_damage_src_moved(void *ignored) { struct drm_atomic_helper_damage_iter iter; - struct drm_plane_state old_state; struct drm_property_blob damage_blob; struct drm_mode_rect damage[2]; struct drm_rect clip; uint32_t num_hits = 0; - struct drm_framebuffer fb = { - .width = 2048, - .height = 2048 - }; - - struct drm_plane_state state = { - .crtc = ZERO_SIZE_PTR, - .fb = &fb, - .visible = true, - }; + MOCK_VARIABLES(); set_plane_src(&old_state, 0x40002, 0x40002, 0x40002 + (1024 << 16), 0x40002 + (768 << 16)); @@ -775,22 +638,14 @@ int igt_damage_iter_damage_src_moved(void *ignored) int igt_damage_iter_damage_not_visible(void *ignored) { struct drm_atomic_helper_damage_iter iter; - struct drm_plane_state old_state; struct drm_property_blob damage_blob; struct drm_mode_rect damage[2]; struct drm_rect clip; uint32_t num_hits = 0; - struct drm_framebuffer fb = { - .width = 2048, - .height = 2048 - }; + MOCK_VARIABLES(); - struct drm_plane_state state = { - .crtc = ZERO_SIZE_PTR, - .fb = &fb, - .visible = false, - }; + state.visible = false; set_plane_src(&old_state, 0x40002, 0x40002, 0x40002 + (1024 << 16), 0x40002 + (768 << 16)); diff --git a/drivers/gpu/drm/shmobile/shmob_drm_drv.c b/drivers/gpu/drm/shmobile/shmob_drm_drv.c index 0a02b7092c04..7db01904d18d 100644 --- a/drivers/gpu/drm/shmobile/shmob_drm_drv.c +++ b/drivers/gpu/drm/shmobile/shmob_drm_drv.c @@ -18,7 +18,6 @@ #include <drm/drm_crtc_helper.h> #include <drm/drm_drv.h> #include <drm/drm_gem_cma_helper.h> -#include <drm/drm_irq.h> #include <drm/drm_probe_helper.h> #include <drm/drm_vblank.h> @@ -130,7 +129,6 @@ DEFINE_DRM_GEM_CMA_FOPS(shmob_drm_fops); static const struct drm_driver shmob_drm_driver = { .driver_features = DRIVER_GEM | DRIVER_MODESET, - .irq_handler = shmob_drm_irq, DRM_GEM_CMA_DRIVER_OPS, .fops = &shmob_drm_fops, .name = "shmob-drm", @@ -183,7 +181,7 @@ static int shmob_drm_remove(struct platform_device *pdev) drm_dev_unregister(ddev); drm_kms_helper_poll_fini(ddev); - drm_irq_uninstall(ddev); + free_irq(sdev->irq, ddev); drm_dev_put(ddev); return 0; @@ -258,7 +256,13 @@ static int shmob_drm_probe(struct platform_device *pdev) goto err_modeset_cleanup; } - ret = drm_irq_install(ddev, platform_get_irq(pdev, 0)); + ret = platform_get_irq(pdev, 0); + if (ret < 0) + goto err_modeset_cleanup; + sdev->irq = ret; + + ret = request_irq(sdev->irq, shmob_drm_irq, 0, ddev->driver->name, + ddev); if (ret < 0) { dev_err(&pdev->dev, "failed to install IRQ handler\n"); goto err_modeset_cleanup; @@ -275,7 +279,7 @@ static int shmob_drm_probe(struct platform_device *pdev) return 0; err_irq_uninstall: - drm_irq_uninstall(ddev); + free_irq(sdev->irq, ddev); err_modeset_cleanup: drm_kms_helper_poll_fini(ddev); err_free_drm_dev: diff --git a/drivers/gpu/drm/shmobile/shmob_drm_drv.h b/drivers/gpu/drm/shmobile/shmob_drm_drv.h index 80dc4b1020aa..4964ddd5ab74 100644 --- a/drivers/gpu/drm/shmobile/shmob_drm_drv.h +++ b/drivers/gpu/drm/shmobile/shmob_drm_drv.h @@ -29,6 +29,7 @@ struct shmob_drm_device { u32 lddckr; u32 ldmt1r; + unsigned int irq; spinlock_t irq_lock; /* Protects hardware LDINTR register */ struct drm_device *ddev; diff --git a/drivers/gpu/drm/sti/sti_compositor.c b/drivers/gpu/drm/sti/sti_compositor.c index 319962a2c17b..9caaf3ccfabe 100644 --- a/drivers/gpu/drm/sti/sti_compositor.c +++ b/drivers/gpu/drm/sti/sti_compositor.c @@ -145,8 +145,6 @@ static int sti_compositor_bind(struct device *dev, } drm_vblank_init(drm_dev, crtc_id); - /* Allow usage of vblank without having to call drm_irq_install */ - drm_dev->irq_enabled = 1; return 0; } diff --git a/drivers/gpu/drm/sti/sti_dvo.c b/drivers/gpu/drm/sti/sti_dvo.c index ddb4184f0726..b6ee8a82e656 100644 --- a/drivers/gpu/drm/sti/sti_dvo.c +++ b/drivers/gpu/drm/sti/sti_dvo.c @@ -463,10 +463,8 @@ static int sti_dvo_bind(struct device *dev, struct device *master, void *data) drm_bridge_add(bridge); err = drm_bridge_attach(encoder, bridge, NULL, 0); - if (err) { - DRM_ERROR("Failed to attach bridge\n"); + if (err) return err; - } dvo->bridge = bridge; connector->encoder = encoder; diff --git a/drivers/gpu/drm/stm/dw_mipi_dsi-stm.c b/drivers/gpu/drm/stm/dw_mipi_dsi-stm.c index 8399d337589d..32cb41b2202f 100644 --- a/drivers/gpu/drm/stm/dw_mipi_dsi-stm.c +++ b/drivers/gpu/drm/stm/dw_mipi_dsi-stm.c @@ -309,14 +309,23 @@ dw_mipi_dsi_get_lane_mbps(void *priv_data, const struct drm_display_mode *mode, return 0; } +#define DSI_PHY_DELAY(fp, vp, mbps) DIV_ROUND_UP((fp) * (mbps) + 1000 * (vp), 8000) + static int dw_mipi_dsi_phy_get_timing(void *priv_data, unsigned int lane_mbps, struct dw_mipi_dsi_dphy_timing *timing) { - timing->clk_hs2lp = 0x40; - timing->clk_lp2hs = 0x40; - timing->data_hs2lp = 0x40; - timing->data_lp2hs = 0x40; + /* + * From STM32MP157 datasheet, valid for STM32F469, STM32F7x9, STM32H747 + * phy_clkhs2lp_time = (272+136*UI)/(8*UI) + * phy_clklp2hs_time = (512+40*UI)/(8*UI) + * phy_hs2lp_time = (192+64*UI)/(8*UI) + * phy_lp2hs_time = (256+32*UI)/(8*UI) + */ + timing->clk_hs2lp = DSI_PHY_DELAY(272, 136, lane_mbps); + timing->clk_lp2hs = DSI_PHY_DELAY(512, 40, lane_mbps); + timing->data_hs2lp = DSI_PHY_DELAY(192, 64, lane_mbps); + timing->data_lp2hs = DSI_PHY_DELAY(256, 32, lane_mbps); return 0; } diff --git a/drivers/gpu/drm/stm/ltdc.c b/drivers/gpu/drm/stm/ltdc.c index 08b71248044d..195de30eb90c 100644 --- a/drivers/gpu/drm/stm/ltdc.c +++ b/drivers/gpu/drm/stm/ltdc.c @@ -947,7 +947,6 @@ static const struct drm_plane_funcs ltdc_plane_funcs = { }; static const struct drm_plane_helper_funcs ltdc_plane_helper_funcs = { - .prepare_fb = drm_gem_plane_helper_prepare_fb, .atomic_check = ltdc_plane_atomic_check, .atomic_update = ltdc_plane_atomic_update, .atomic_disable = ltdc_plane_atomic_disable, @@ -1122,8 +1121,9 @@ static int ltdc_encoder_init(struct drm_device *ddev, struct drm_bridge *bridge) ret = drm_bridge_attach(encoder, bridge, NULL, 0); if (ret) { - drm_encoder_cleanup(encoder); - return -EINVAL; + if (ret != -EPROBE_DEFER) + drm_encoder_cleanup(encoder); + return ret; } DRM_DEBUG_DRIVER("Bridge encoder:%d created\n", encoder->base.id); @@ -1266,7 +1266,8 @@ int ltdc_load(struct drm_device *ddev) if (bridge) { ret = ltdc_encoder_init(ddev, bridge); if (ret) { - DRM_ERROR("init encoder endpoint %d\n", i); + if (ret != -EPROBE_DEFER) + DRM_ERROR("init encoder endpoint %d\n", i); goto err; } } @@ -1339,9 +1340,6 @@ int ltdc_load(struct drm_device *ddev) goto err; } - /* Allow usage of vblank without having to call drm_irq_install */ - ddev->irq_enabled = 1; - clk_disable_unprepare(ldev->pixel_clk); pinctrl_pm_select_sleep_state(ddev->dev); diff --git a/drivers/gpu/drm/sun4i/sun4i_drv.c b/drivers/gpu/drm/sun4i/sun4i_drv.c index af335f58bdfc..54dd562e294c 100644 --- a/drivers/gpu/drm/sun4i/sun4i_drv.c +++ b/drivers/gpu/drm/sun4i/sun4i_drv.c @@ -97,10 +97,8 @@ static int sun4i_drv_bind(struct device *dev) if (ret) goto cleanup_mode_config; - drm->irq_enabled = true; - /* Remove early framebuffers (ie. simplefb) */ - ret = drm_aperture_remove_framebuffers(false, "sun4i-drm-fb"); + ret = drm_aperture_remove_framebuffers(false, &sun4i_drv_driver); if (ret) goto cleanup_mode_config; diff --git a/drivers/gpu/drm/sun4i/sun4i_layer.c b/drivers/gpu/drm/sun4i/sun4i_layer.c index 11771bdd6e7c..929e95f86b5b 100644 --- a/drivers/gpu/drm/sun4i/sun4i_layer.c +++ b/drivers/gpu/drm/sun4i/sun4i_layer.c @@ -127,7 +127,6 @@ static bool sun4i_layer_format_mod_supported(struct drm_plane *plane, } static const struct drm_plane_helper_funcs sun4i_backend_layer_helper_funcs = { - .prepare_fb = drm_gem_plane_helper_prepare_fb, .atomic_disable = sun4i_backend_layer_atomic_disable, .atomic_update = sun4i_backend_layer_atomic_update, }; diff --git a/drivers/gpu/drm/sun4i/sun4i_lvds.c b/drivers/gpu/drm/sun4i/sun4i_lvds.c index ac570437172e..6716e895ae8a 100644 --- a/drivers/gpu/drm/sun4i/sun4i_lvds.c +++ b/drivers/gpu/drm/sun4i/sun4i_lvds.c @@ -142,10 +142,8 @@ int sun4i_lvds_init(struct drm_device *drm, struct sun4i_tcon *tcon) if (bridge) { ret = drm_bridge_attach(encoder, bridge, NULL, 0); - if (ret) { - dev_err(drm->dev, "Couldn't attach our bridge\n"); + if (ret) goto err_cleanup_connector; - } } return 0; diff --git a/drivers/gpu/drm/sun4i/sun4i_rgb.c b/drivers/gpu/drm/sun4i/sun4i_rgb.c index e172426eb7e9..dfb6acc42f02 100644 --- a/drivers/gpu/drm/sun4i/sun4i_rgb.c +++ b/drivers/gpu/drm/sun4i/sun4i_rgb.c @@ -234,10 +234,8 @@ int sun4i_rgb_init(struct drm_device *drm, struct sun4i_tcon *tcon) if (rgb->bridge) { ret = drm_bridge_attach(encoder, rgb->bridge, NULL, 0); - if (ret) { - dev_err(drm->dev, "Couldn't attach our bridge\n"); + if (ret) goto err_cleanup_connector; - } } return 0; diff --git a/drivers/gpu/drm/sun4i/sun8i_ui_layer.c b/drivers/gpu/drm/sun4i/sun8i_ui_layer.c index e779855bcd6e..7845c2a53a7f 100644 --- a/drivers/gpu/drm/sun4i/sun8i_ui_layer.c +++ b/drivers/gpu/drm/sun4i/sun8i_ui_layer.c @@ -332,7 +332,6 @@ static void sun8i_ui_layer_atomic_update(struct drm_plane *plane, } static const struct drm_plane_helper_funcs sun8i_ui_layer_helper_funcs = { - .prepare_fb = drm_gem_plane_helper_prepare_fb, .atomic_check = sun8i_ui_layer_atomic_check, .atomic_disable = sun8i_ui_layer_atomic_disable, .atomic_update = sun8i_ui_layer_atomic_update, diff --git a/drivers/gpu/drm/sun4i/sun8i_vi_layer.c b/drivers/gpu/drm/sun4i/sun8i_vi_layer.c index 1c86c2dd0bbf..bb7c43036dfa 100644 --- a/drivers/gpu/drm/sun4i/sun8i_vi_layer.c +++ b/drivers/gpu/drm/sun4i/sun8i_vi_layer.c @@ -436,7 +436,6 @@ static void sun8i_vi_layer_atomic_update(struct drm_plane *plane, } static const struct drm_plane_helper_funcs sun8i_vi_layer_helper_funcs = { - .prepare_fb = drm_gem_plane_helper_prepare_fb, .atomic_check = sun8i_vi_layer_atomic_check, .atomic_disable = sun8i_vi_layer_atomic_disable, .atomic_update = sun8i_vi_layer_atomic_update, diff --git a/drivers/gpu/drm/tegra/Kconfig b/drivers/gpu/drm/tegra/Kconfig index 5043dcaf1cf9..1650a448eabd 100644 --- a/drivers/gpu/drm/tegra/Kconfig +++ b/drivers/gpu/drm/tegra/Kconfig @@ -9,6 +9,7 @@ config DRM_TEGRA select DRM_MIPI_DSI select DRM_PANEL select TEGRA_HOST1X + select INTERCONNECT select IOMMU_IOVA select CEC_CORE if CEC_NOTIFIER help diff --git a/drivers/gpu/drm/tegra/Makefile b/drivers/gpu/drm/tegra/Makefile index d6cf202414f0..d801909182cf 100644 --- a/drivers/gpu/drm/tegra/Makefile +++ b/drivers/gpu/drm/tegra/Makefile @@ -3,6 +3,9 @@ ccflags-$(CONFIG_DRM_TEGRA_DEBUG) += -DDEBUG tegra-drm-y := \ drm.o \ + uapi.o \ + submit.o \ + firewall.o \ gem.o \ fb.o \ dp.o \ diff --git a/drivers/gpu/drm/tegra/dc.c b/drivers/gpu/drm/tegra/dc.c index 51bbbc42a144..16c7aabb94d3 100644 --- a/drivers/gpu/drm/tegra/dc.c +++ b/drivers/gpu/drm/tegra/dc.c @@ -8,6 +8,7 @@ #include <linux/debugfs.h> #include <linux/delay.h> #include <linux/iommu.h> +#include <linux/interconnect.h> #include <linux/module.h> #include <linux/of_device.h> #include <linux/pm_runtime.h> @@ -618,9 +619,14 @@ static int tegra_plane_atomic_check(struct drm_plane *plane, struct tegra_dc *dc = to_tegra_dc(new_plane_state->crtc); int err; + plane_state->peak_memory_bandwidth = 0; + plane_state->avg_memory_bandwidth = 0; + /* no need for further checks if the plane is being disabled */ - if (!new_plane_state->crtc) + if (!new_plane_state->crtc) { + plane_state->total_peak_memory_bandwidth = 0; return 0; + } err = tegra_plane_format(new_plane_state->fb->format->format, &plane_state->format, @@ -808,6 +814,12 @@ static struct drm_plane *tegra_primary_plane_create(struct drm_device *drm, formats = dc->soc->primary_formats; modifiers = dc->soc->modifiers; + err = tegra_plane_interconnect_init(plane); + if (err) { + kfree(plane); + return ERR_PTR(err); + } + err = drm_universal_plane_init(drm, &plane->base, possible_crtcs, &tegra_plane_funcs, formats, num_formats, modifiers, type, NULL); @@ -845,12 +857,18 @@ static int tegra_cursor_atomic_check(struct drm_plane *plane, { struct drm_plane_state *new_plane_state = drm_atomic_get_new_plane_state(state, plane); + struct tegra_plane_state *plane_state = to_tegra_plane_state(new_plane_state); struct tegra_plane *tegra = to_tegra_plane(plane); int err; + plane_state->peak_memory_bandwidth = 0; + plane_state->avg_memory_bandwidth = 0; + /* no need for further checks if the plane is being disabled */ - if (!new_plane_state->crtc) + if (!new_plane_state->crtc) { + plane_state->total_peak_memory_bandwidth = 0; return 0; + } /* scaling not supported for cursor */ if ((new_plane_state->src_w >> 16 != new_plane_state->crtc_w) || @@ -1030,6 +1048,12 @@ static struct drm_plane *tegra_dc_cursor_plane_create(struct drm_device *drm, if (!dc->soc->has_nvdisplay) { num_formats = ARRAY_SIZE(tegra_legacy_cursor_plane_formats); formats = tegra_legacy_cursor_plane_formats; + + err = tegra_plane_interconnect_init(plane); + if (err) { + kfree(plane); + return ERR_PTR(err); + } } else { num_formats = ARRAY_SIZE(tegra_cursor_plane_formats); formats = tegra_cursor_plane_formats; @@ -1149,6 +1173,12 @@ static struct drm_plane *tegra_dc_overlay_plane_create(struct drm_device *drm, num_formats = dc->soc->num_overlay_formats; formats = dc->soc->overlay_formats; + err = tegra_plane_interconnect_init(plane); + if (err) { + kfree(plane); + return ERR_PTR(err); + } + if (!cursor) type = DRM_PLANE_TYPE_OVERLAY; else @@ -1572,6 +1602,11 @@ static int tegra_dc_show_stats(struct seq_file *s, void *data) seq_printf(s, "underflow: %lu\n", dc->stats.underflow); seq_printf(s, "overflow: %lu\n", dc->stats.overflow); + seq_printf(s, "frames total: %lu\n", dc->stats.frames_total); + seq_printf(s, "vblank total: %lu\n", dc->stats.vblank_total); + seq_printf(s, "underflow total: %lu\n", dc->stats.underflow_total); + seq_printf(s, "overflow total: %lu\n", dc->stats.overflow_total); + return 0; } @@ -1804,6 +1839,106 @@ static int tegra_dc_wait_idle(struct tegra_dc *dc, unsigned long timeout) return -ETIMEDOUT; } +static void +tegra_crtc_update_memory_bandwidth(struct drm_crtc *crtc, + struct drm_atomic_state *state, + bool prepare_bandwidth_transition) +{ + const struct tegra_plane_state *old_tegra_state, *new_tegra_state; + const struct tegra_dc_state *old_dc_state, *new_dc_state; + u32 i, new_avg_bw, old_avg_bw, new_peak_bw, old_peak_bw; + const struct drm_plane_state *old_plane_state; + const struct drm_crtc_state *old_crtc_state; + struct tegra_dc_window window, old_window; + struct tegra_dc *dc = to_tegra_dc(crtc); + struct tegra_plane *tegra; + struct drm_plane *plane; + + if (dc->soc->has_nvdisplay) + return; + + old_crtc_state = drm_atomic_get_old_crtc_state(state, crtc); + old_dc_state = to_const_dc_state(old_crtc_state); + new_dc_state = to_const_dc_state(crtc->state); + + if (!crtc->state->active) { + if (!old_crtc_state->active) + return; + + /* + * When CRTC is disabled on DPMS, the state of attached planes + * is kept unchanged. Hence we need to enforce removal of the + * bandwidths from the ICC paths. + */ + drm_atomic_crtc_for_each_plane(plane, crtc) { + tegra = to_tegra_plane(plane); + + icc_set_bw(tegra->icc_mem, 0, 0); + icc_set_bw(tegra->icc_mem_vfilter, 0, 0); + } + + return; + } + + for_each_old_plane_in_state(old_crtc_state->state, plane, + old_plane_state, i) { + old_tegra_state = to_const_tegra_plane_state(old_plane_state); + new_tegra_state = to_const_tegra_plane_state(plane->state); + tegra = to_tegra_plane(plane); + + /* + * We're iterating over the global atomic state and it contains + * planes from another CRTC, hence we need to filter out the + * planes unrelated to this CRTC. + */ + if (tegra->dc != dc) + continue; + + new_avg_bw = new_tegra_state->avg_memory_bandwidth; + old_avg_bw = old_tegra_state->avg_memory_bandwidth; + + new_peak_bw = new_tegra_state->total_peak_memory_bandwidth; + old_peak_bw = old_tegra_state->total_peak_memory_bandwidth; + + /* + * See the comment related to !crtc->state->active above, + * which explains why bandwidths need to be updated when + * CRTC is turning ON. + */ + if (new_avg_bw == old_avg_bw && new_peak_bw == old_peak_bw && + old_crtc_state->active) + continue; + + window.src.h = drm_rect_height(&plane->state->src) >> 16; + window.dst.h = drm_rect_height(&plane->state->dst); + + old_window.src.h = drm_rect_height(&old_plane_state->src) >> 16; + old_window.dst.h = drm_rect_height(&old_plane_state->dst); + + /* + * During the preparation phase (atomic_begin), the memory + * freq should go high before the DC changes are committed + * if bandwidth requirement goes up, otherwise memory freq + * should to stay high if BW requirement goes down. The + * opposite applies to the completion phase (post_commit). + */ + if (prepare_bandwidth_transition) { + new_avg_bw = max(old_avg_bw, new_avg_bw); + new_peak_bw = max(old_peak_bw, new_peak_bw); + + if (tegra_plane_use_vertical_filtering(tegra, &old_window)) + window = old_window; + } + + icc_set_bw(tegra->icc_mem, new_avg_bw, new_peak_bw); + + if (tegra_plane_use_vertical_filtering(tegra, &window)) + icc_set_bw(tegra->icc_mem_vfilter, new_avg_bw, new_peak_bw); + else + icc_set_bw(tegra->icc_mem_vfilter, 0, 0); + } +} + static void tegra_crtc_atomic_disable(struct drm_crtc *crtc, struct drm_atomic_state *state) { @@ -1985,6 +2120,8 @@ static void tegra_crtc_atomic_begin(struct drm_crtc *crtc, { unsigned long flags; + tegra_crtc_update_memory_bandwidth(crtc, state, true); + if (crtc->state->event) { spin_lock_irqsave(&crtc->dev->event_lock, flags); @@ -2017,7 +2154,207 @@ static void tegra_crtc_atomic_flush(struct drm_crtc *crtc, value = tegra_dc_readl(dc, DC_CMD_STATE_CONTROL); } +static bool tegra_plane_is_cursor(const struct drm_plane_state *state) +{ + const struct tegra_dc_soc_info *soc = to_tegra_dc(state->crtc)->soc; + const struct drm_format_info *fmt = state->fb->format; + unsigned int src_w = drm_rect_width(&state->src) >> 16; + unsigned int dst_w = drm_rect_width(&state->dst); + + if (state->plane->type != DRM_PLANE_TYPE_CURSOR) + return false; + + if (soc->supports_cursor) + return true; + + if (src_w != dst_w || fmt->num_planes != 1 || src_w * fmt->cpp[0] > 256) + return false; + + return true; +} + +static unsigned long +tegra_plane_overlap_mask(struct drm_crtc_state *state, + const struct drm_plane_state *plane_state) +{ + const struct drm_plane_state *other_state; + const struct tegra_plane *tegra; + unsigned long overlap_mask = 0; + struct drm_plane *plane; + struct drm_rect rect; + + if (!plane_state->visible || !plane_state->fb) + return 0; + + /* + * Data-prefetch FIFO will easily help to overcome temporal memory + * pressure if other plane overlaps with the cursor plane. + */ + if (tegra_plane_is_cursor(plane_state)) + return 0; + + drm_atomic_crtc_state_for_each_plane_state(plane, other_state, state) { + rect = plane_state->dst; + + tegra = to_tegra_plane(other_state->plane); + + if (!other_state->visible || !other_state->fb) + continue; + + /* + * Ignore cursor plane overlaps because it's not practical to + * assume that it contributes to the bandwidth in overlapping + * area if window width is small. + */ + if (tegra_plane_is_cursor(other_state)) + continue; + + if (drm_rect_intersect(&rect, &other_state->dst)) + overlap_mask |= BIT(tegra->index); + } + + return overlap_mask; +} + +static int tegra_crtc_calculate_memory_bandwidth(struct drm_crtc *crtc, + struct drm_atomic_state *state) +{ + ulong overlap_mask[TEGRA_DC_LEGACY_PLANES_NUM] = {}, mask; + u32 plane_peak_bw[TEGRA_DC_LEGACY_PLANES_NUM] = {}; + bool all_planes_overlap_simultaneously = true; + const struct tegra_plane_state *tegra_state; + const struct drm_plane_state *plane_state; + struct tegra_dc *dc = to_tegra_dc(crtc); + const struct drm_crtc_state *old_state; + struct drm_crtc_state *new_state; + struct tegra_plane *tegra; + struct drm_plane *plane; + + /* + * The nv-display uses shared planes. The algorithm below assumes + * maximum 3 planes per-CRTC, this assumption isn't applicable to + * the nv-display. Note that T124 support has additional windows, + * but currently they aren't supported by the driver. + */ + if (dc->soc->has_nvdisplay) + return 0; + + new_state = drm_atomic_get_new_crtc_state(state, crtc); + old_state = drm_atomic_get_old_crtc_state(state, crtc); + + /* + * For overlapping planes pixel's data is fetched for each plane at + * the same time, hence bandwidths are accumulated in this case. + * This needs to be taken into account for calculating total bandwidth + * consumed by all planes. + * + * Here we get the overlapping state of each plane, which is a + * bitmask of plane indices telling with what planes there is an + * overlap. Note that bitmask[plane] includes BIT(plane) in order + * to make further code nicer and simpler. + */ + drm_atomic_crtc_state_for_each_plane_state(plane, plane_state, new_state) { + tegra_state = to_const_tegra_plane_state(plane_state); + tegra = to_tegra_plane(plane); + + if (WARN_ON_ONCE(tegra->index >= TEGRA_DC_LEGACY_PLANES_NUM)) + return -EINVAL; + + plane_peak_bw[tegra->index] = tegra_state->peak_memory_bandwidth; + mask = tegra_plane_overlap_mask(new_state, plane_state); + overlap_mask[tegra->index] = mask; + + if (hweight_long(mask) != 3) + all_planes_overlap_simultaneously = false; + } + + /* + * Then we calculate maximum bandwidth of each plane state. + * The bandwidth includes the plane BW + BW of the "simultaneously" + * overlapping planes, where "simultaneously" means areas where DC + * fetches from the planes simultaneously during of scan-out process. + * + * For example, if plane A overlaps with planes B and C, but B and C + * don't overlap, then the peak bandwidth will be either in area where + * A-and-B or A-and-C planes overlap. + * + * The plane_peak_bw[] contains peak memory bandwidth values of + * each plane, this information is needed by interconnect provider + * in order to set up latency allowance based on the peak BW, see + * tegra_crtc_update_memory_bandwidth(). + */ + drm_atomic_crtc_state_for_each_plane_state(plane, plane_state, new_state) { + u32 i, old_peak_bw, new_peak_bw, overlap_bw = 0; + + /* + * Note that plane's atomic check doesn't touch the + * total_peak_memory_bandwidth of enabled plane, hence the + * current state contains the old bandwidth state from the + * previous CRTC commit. + */ + tegra_state = to_const_tegra_plane_state(plane_state); + tegra = to_tegra_plane(plane); + + for_each_set_bit(i, &overlap_mask[tegra->index], 3) { + if (i == tegra->index) + continue; + + if (all_planes_overlap_simultaneously) + overlap_bw += plane_peak_bw[i]; + else + overlap_bw = max(overlap_bw, plane_peak_bw[i]); + } + + new_peak_bw = plane_peak_bw[tegra->index] + overlap_bw; + old_peak_bw = tegra_state->total_peak_memory_bandwidth; + + /* + * If plane's peak bandwidth changed (for example plane isn't + * overlapped anymore) and plane isn't in the atomic state, + * then add plane to the state in order to have the bandwidth + * updated. + */ + if (old_peak_bw != new_peak_bw) { + struct tegra_plane_state *new_tegra_state; + struct drm_plane_state *new_plane_state; + + new_plane_state = drm_atomic_get_plane_state(state, plane); + if (IS_ERR(new_plane_state)) + return PTR_ERR(new_plane_state); + + new_tegra_state = to_tegra_plane_state(new_plane_state); + new_tegra_state->total_peak_memory_bandwidth = new_peak_bw; + } + } + + return 0; +} + +static int tegra_crtc_atomic_check(struct drm_crtc *crtc, + struct drm_atomic_state *state) +{ + int err; + + err = tegra_crtc_calculate_memory_bandwidth(crtc, state); + if (err) + return err; + + return 0; +} + +void tegra_crtc_atomic_post_commit(struct drm_crtc *crtc, + struct drm_atomic_state *state) +{ + /* + * Display bandwidth is allowed to go down only once hardware state + * is known to be armed, i.e. state was committed and VBLANK event + * received. + */ + tegra_crtc_update_memory_bandwidth(crtc, state, false); +} + static const struct drm_crtc_helper_funcs tegra_crtc_helper_funcs = { + .atomic_check = tegra_crtc_atomic_check, .atomic_begin = tegra_crtc_atomic_begin, .atomic_flush = tegra_crtc_atomic_flush, .atomic_enable = tegra_crtc_atomic_enable, @@ -2036,6 +2373,7 @@ static irqreturn_t tegra_dc_irq(int irq, void *data) /* dev_dbg(dc->dev, "%s(): frame end\n", __func__); */ + dc->stats.frames_total++; dc->stats.frames++; } @@ -2044,6 +2382,7 @@ static irqreturn_t tegra_dc_irq(int irq, void *data) dev_dbg(dc->dev, "%s(): vertical blank\n", __func__); */ drm_crtc_handle_vblank(&dc->base); + dc->stats.vblank_total++; dc->stats.vblank++; } @@ -2051,6 +2390,7 @@ static irqreturn_t tegra_dc_irq(int irq, void *data) /* dev_dbg(dc->dev, "%s(): underflow\n", __func__); */ + dc->stats.underflow_total++; dc->stats.underflow++; } @@ -2058,11 +2398,13 @@ static irqreturn_t tegra_dc_irq(int irq, void *data) /* dev_dbg(dc->dev, "%s(): overflow\n", __func__); */ + dc->stats.overflow_total++; dc->stats.overflow++; } if (status & HEAD_UF_INT) { dev_dbg_ratelimited(dc->dev, "%s(): head underflow\n", __func__); + dc->stats.underflow_total++; dc->stats.underflow++; } @@ -2343,7 +2685,9 @@ static const struct tegra_dc_soc_info tegra20_dc_soc_info = { .overlay_formats = tegra20_overlay_formats, .modifiers = tegra20_modifiers, .has_win_a_without_filters = true, + .has_win_b_vfilter_mem_client = true, .has_win_c_without_vert_filter = true, + .plane_tiled_memory_bandwidth_x2 = false, }; static const struct tegra_dc_soc_info tegra30_dc_soc_info = { @@ -2363,7 +2707,9 @@ static const struct tegra_dc_soc_info tegra30_dc_soc_info = { .overlay_formats = tegra20_overlay_formats, .modifiers = tegra20_modifiers, .has_win_a_without_filters = false, + .has_win_b_vfilter_mem_client = true, .has_win_c_without_vert_filter = false, + .plane_tiled_memory_bandwidth_x2 = true, }; static const struct tegra_dc_soc_info tegra114_dc_soc_info = { @@ -2383,7 +2729,9 @@ static const struct tegra_dc_soc_info tegra114_dc_soc_info = { .overlay_formats = tegra114_overlay_formats, .modifiers = tegra20_modifiers, .has_win_a_without_filters = false, + .has_win_b_vfilter_mem_client = false, .has_win_c_without_vert_filter = false, + .plane_tiled_memory_bandwidth_x2 = true, }; static const struct tegra_dc_soc_info tegra124_dc_soc_info = { @@ -2403,7 +2751,9 @@ static const struct tegra_dc_soc_info tegra124_dc_soc_info = { .overlay_formats = tegra124_overlay_formats, .modifiers = tegra124_modifiers, .has_win_a_without_filters = false, + .has_win_b_vfilter_mem_client = false, .has_win_c_without_vert_filter = false, + .plane_tiled_memory_bandwidth_x2 = false, }; static const struct tegra_dc_soc_info tegra210_dc_soc_info = { @@ -2423,7 +2773,9 @@ static const struct tegra_dc_soc_info tegra210_dc_soc_info = { .overlay_formats = tegra114_overlay_formats, .modifiers = tegra124_modifiers, .has_win_a_without_filters = false, + .has_win_b_vfilter_mem_client = false, .has_win_c_without_vert_filter = false, + .plane_tiled_memory_bandwidth_x2 = false, }; static const struct tegra_windowgroup_soc tegra186_dc_wgrps[] = { @@ -2473,6 +2825,7 @@ static const struct tegra_dc_soc_info tegra186_dc_soc_info = { .has_nvdisplay = true, .wgrps = tegra186_dc_wgrps, .num_wgrps = ARRAY_SIZE(tegra186_dc_wgrps), + .plane_tiled_memory_bandwidth_x2 = false, }; static const struct tegra_windowgroup_soc tegra194_dc_wgrps[] = { @@ -2522,6 +2875,7 @@ static const struct tegra_dc_soc_info tegra194_dc_soc_info = { .has_nvdisplay = true, .wgrps = tegra194_dc_wgrps, .num_wgrps = ARRAY_SIZE(tegra194_dc_wgrps), + .plane_tiled_memory_bandwidth_x2 = false, }; static const struct of_device_id tegra_dc_of_match[] = { diff --git a/drivers/gpu/drm/tegra/dc.h b/drivers/gpu/drm/tegra/dc.h index 5e13f1cfd749..f0cb691852a1 100644 --- a/drivers/gpu/drm/tegra/dc.h +++ b/drivers/gpu/drm/tegra/dc.h @@ -15,6 +15,8 @@ struct tegra_output; +#define TEGRA_DC_LEGACY_PLANES_NUM 7 + struct tegra_dc_state { struct drm_crtc_state base; @@ -33,11 +35,22 @@ static inline struct tegra_dc_state *to_dc_state(struct drm_crtc_state *state) return NULL; } +static inline const struct tegra_dc_state * +to_const_dc_state(const struct drm_crtc_state *state) +{ + return to_dc_state((struct drm_crtc_state *)state); +} + struct tegra_dc_stats { unsigned long frames; unsigned long vblank; unsigned long underflow; unsigned long overflow; + + unsigned long frames_total; + unsigned long vblank_total; + unsigned long underflow_total; + unsigned long overflow_total; }; struct tegra_windowgroup_soc { @@ -66,7 +79,9 @@ struct tegra_dc_soc_info { unsigned int num_overlay_formats; const u64 *modifiers; bool has_win_a_without_filters; + bool has_win_b_vfilter_mem_client; bool has_win_c_without_vert_filter; + bool plane_tiled_memory_bandwidth_x2; }; struct tegra_dc { @@ -152,6 +167,8 @@ int tegra_dc_state_setup_clock(struct tegra_dc *dc, struct drm_crtc_state *crtc_state, struct clk *clk, unsigned long pclk, unsigned int div); +void tegra_crtc_atomic_post_commit(struct drm_crtc *crtc, + struct drm_atomic_state *state); /* from rgb.c */ int tegra_dc_rgb_probe(struct tegra_dc *dc); diff --git a/drivers/gpu/drm/tegra/drm.c b/drivers/gpu/drm/tegra/drm.c index f96c237b2242..8d37d6b00562 100644 --- a/drivers/gpu/drm/tegra/drm.c +++ b/drivers/gpu/drm/tegra/drm.c @@ -21,24 +21,21 @@ #include <drm/drm_prime.h> #include <drm/drm_vblank.h> +#include "dc.h" #include "drm.h" #include "gem.h" +#include "uapi.h" #define DRIVER_NAME "tegra" #define DRIVER_DESC "NVIDIA Tegra graphics" #define DRIVER_DATE "20120330" -#define DRIVER_MAJOR 0 +#define DRIVER_MAJOR 1 #define DRIVER_MINOR 0 #define DRIVER_PATCHLEVEL 0 #define CARVEOUT_SZ SZ_64M #define CDMA_GATHER_FETCHES_MAX_NB 16383 -struct tegra_drm_file { - struct idr contexts; - struct mutex lock; -}; - static int tegra_atomic_check(struct drm_device *drm, struct drm_atomic_state *state) { @@ -60,6 +57,17 @@ static const struct drm_mode_config_funcs tegra_drm_mode_config_funcs = { .atomic_commit = drm_atomic_helper_commit, }; +static void tegra_atomic_post_commit(struct drm_device *drm, + struct drm_atomic_state *old_state) +{ + struct drm_crtc_state *old_crtc_state __maybe_unused; + struct drm_crtc *crtc; + unsigned int i; + + for_each_old_crtc_in_state(old_state, crtc, old_crtc_state, i) + tegra_crtc_atomic_post_commit(crtc, old_state); +} + static void tegra_atomic_commit_tail(struct drm_atomic_state *old_state) { struct drm_device *drm = old_state->dev; @@ -79,6 +87,8 @@ static void tegra_atomic_commit_tail(struct drm_atomic_state *old_state) } else { drm_atomic_helper_commit_tail_rpm(old_state); } + + tegra_atomic_post_commit(drm, old_state); } static const struct drm_mode_config_helper_funcs @@ -94,7 +104,9 @@ static int tegra_drm_open(struct drm_device *drm, struct drm_file *filp) if (!fpriv) return -ENOMEM; - idr_init_base(&fpriv->contexts, 1); + idr_init_base(&fpriv->legacy_contexts, 1); + xa_init_flags(&fpriv->contexts, XA_FLAGS_ALLOC1); + xa_init(&fpriv->syncpoints); mutex_init(&fpriv->lock); filp->driver_priv = fpriv; @@ -107,20 +119,6 @@ static void tegra_drm_context_free(struct tegra_drm_context *context) kfree(context); } -static struct host1x_bo * -host1x_bo_lookup(struct drm_file *file, u32 handle) -{ - struct drm_gem_object *gem; - struct tegra_bo *bo; - - gem = drm_gem_object_lookup(file, handle); - if (!gem) - return NULL; - - bo = to_tegra_bo(gem); - return &bo->base; -} - static int host1x_reloc_copy_from_user(struct host1x_reloc *dest, struct drm_tegra_reloc __user *src, struct drm_device *drm, @@ -151,11 +149,11 @@ static int host1x_reloc_copy_from_user(struct host1x_reloc *dest, dest->flags = HOST1X_RELOC_READ | HOST1X_RELOC_WRITE; - dest->cmdbuf.bo = host1x_bo_lookup(file, cmdbuf); + dest->cmdbuf.bo = tegra_gem_lookup(file, cmdbuf); if (!dest->cmdbuf.bo) return -ENOENT; - dest->target.bo = host1x_bo_lookup(file, target); + dest->target.bo = tegra_gem_lookup(file, target); if (!dest->target.bo) return -ENOENT; @@ -193,7 +191,7 @@ int tegra_drm_submit(struct tegra_drm_context *context, return -EINVAL; job = host1x_job_alloc(context->channel, args->num_cmdbufs, - args->num_relocs); + args->num_relocs, false); if (!job) return -ENOMEM; @@ -201,6 +199,7 @@ int tegra_drm_submit(struct tegra_drm_context *context, job->client = client; job->class = client->class; job->serialize = true; + job->syncpt_recovery = true; /* * Track referenced BOs so that they can be unreferenced after the @@ -237,7 +236,7 @@ int tegra_drm_submit(struct tegra_drm_context *context, goto fail; } - bo = host1x_bo_lookup(file, cmdbuf.handle); + bo = tegra_gem_lookup(file, cmdbuf.handle); if (!bo) { err = -ENOENT; goto fail; @@ -432,7 +431,7 @@ static int tegra_client_open(struct tegra_drm_file *fpriv, if (err < 0) return err; - err = idr_alloc(&fpriv->contexts, context, 1, 0, GFP_KERNEL); + err = idr_alloc(&fpriv->legacy_contexts, context, 1, 0, GFP_KERNEL); if (err < 0) { client->ops->close_channel(context); return err; @@ -487,13 +486,13 @@ static int tegra_close_channel(struct drm_device *drm, void *data, mutex_lock(&fpriv->lock); - context = idr_find(&fpriv->contexts, args->context); + context = idr_find(&fpriv->legacy_contexts, args->context); if (!context) { err = -EINVAL; goto unlock; } - idr_remove(&fpriv->contexts, context->id); + idr_remove(&fpriv->legacy_contexts, context->id); tegra_drm_context_free(context); unlock: @@ -512,7 +511,7 @@ static int tegra_get_syncpt(struct drm_device *drm, void *data, mutex_lock(&fpriv->lock); - context = idr_find(&fpriv->contexts, args->context); + context = idr_find(&fpriv->legacy_contexts, args->context); if (!context) { err = -ENODEV; goto unlock; @@ -541,7 +540,7 @@ static int tegra_submit(struct drm_device *drm, void *data, mutex_lock(&fpriv->lock); - context = idr_find(&fpriv->contexts, args->context); + context = idr_find(&fpriv->legacy_contexts, args->context); if (!context) { err = -ENODEV; goto unlock; @@ -566,7 +565,7 @@ static int tegra_get_syncpt_base(struct drm_device *drm, void *data, mutex_lock(&fpriv->lock); - context = idr_find(&fpriv->contexts, args->context); + context = idr_find(&fpriv->legacy_contexts, args->context); if (!context) { err = -ENODEV; goto unlock; @@ -735,10 +734,25 @@ static int tegra_gem_get_flags(struct drm_device *drm, void *data, static const struct drm_ioctl_desc tegra_drm_ioctls[] = { #ifdef CONFIG_DRM_TEGRA_STAGING - DRM_IOCTL_DEF_DRV(TEGRA_GEM_CREATE, tegra_gem_create, + DRM_IOCTL_DEF_DRV(TEGRA_CHANNEL_OPEN, tegra_drm_ioctl_channel_open, + DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(TEGRA_CHANNEL_CLOSE, tegra_drm_ioctl_channel_close, + DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(TEGRA_CHANNEL_MAP, tegra_drm_ioctl_channel_map, + DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(TEGRA_CHANNEL_UNMAP, tegra_drm_ioctl_channel_unmap, DRM_RENDER_ALLOW), - DRM_IOCTL_DEF_DRV(TEGRA_GEM_MMAP, tegra_gem_mmap, + DRM_IOCTL_DEF_DRV(TEGRA_CHANNEL_SUBMIT, tegra_drm_ioctl_channel_submit, DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(TEGRA_SYNCPOINT_ALLOCATE, tegra_drm_ioctl_syncpoint_allocate, + DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(TEGRA_SYNCPOINT_FREE, tegra_drm_ioctl_syncpoint_free, + DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(TEGRA_SYNCPOINT_WAIT, tegra_drm_ioctl_syncpoint_wait, + DRM_RENDER_ALLOW), + + DRM_IOCTL_DEF_DRV(TEGRA_GEM_CREATE, tegra_gem_create, DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(TEGRA_GEM_MMAP, tegra_gem_mmap, DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(TEGRA_SYNCPT_READ, tegra_syncpt_read, DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(TEGRA_SYNCPT_INCR, tegra_syncpt_incr, @@ -792,10 +806,11 @@ static void tegra_drm_postclose(struct drm_device *drm, struct drm_file *file) struct tegra_drm_file *fpriv = file->driver_priv; mutex_lock(&fpriv->lock); - idr_for_each(&fpriv->contexts, tegra_drm_context_cleanup, NULL); + idr_for_each(&fpriv->legacy_contexts, tegra_drm_context_cleanup, NULL); + tegra_drm_uapi_close_file(fpriv); mutex_unlock(&fpriv->lock); - idr_destroy(&fpriv->contexts); + idr_destroy(&fpriv->legacy_contexts); mutex_destroy(&fpriv->lock); kfree(fpriv); } @@ -853,7 +868,7 @@ static void tegra_debugfs_init(struct drm_minor *minor) static const struct drm_driver tegra_drm_driver = { .driver_features = DRIVER_MODESET | DRIVER_GEM | - DRIVER_ATOMIC | DRIVER_RENDER, + DRIVER_ATOMIC | DRIVER_RENDER | DRIVER_SYNCOBJ, .open = tegra_drm_open, .postclose = tegra_drm_postclose, .lastclose = drm_fb_helper_lastclose, @@ -883,6 +898,14 @@ static const struct drm_driver tegra_drm_driver = { int tegra_drm_register_client(struct tegra_drm *tegra, struct tegra_drm_client *client) { + /* + * When MLOCKs are implemented, change to allocate a shared channel + * only when MLOCKs are disabled. + */ + client->shared_channel = host1x_channel_request(&client->base); + if (!client->shared_channel) + return -EBUSY; + mutex_lock(&tegra->clients_lock); list_add_tail(&client->list, &tegra->clients); client->drm = tegra; @@ -899,6 +922,9 @@ int tegra_drm_unregister_client(struct tegra_drm *tegra, client->drm = NULL; mutex_unlock(&tegra->clients_lock); + if (client->shared_channel) + host1x_channel_put(client->shared_channel); + return 0; } @@ -1188,13 +1214,6 @@ static int host1x_drm_probe(struct host1x_device *dev) goto device; } - /* - * We don't use the drm_irq_install() helpers provided by the DRM - * core, so we need to set this manually in order to allow the - * DRM_IOCTL_WAIT_VBLANK to operate correctly. - */ - drm->irq_enabled = true; - /* syncpoints are used for full 32-bit hardware VBLANK counters */ drm->max_vblank_count = 0xffffffff; @@ -1204,7 +1223,7 @@ static int host1x_drm_probe(struct host1x_device *dev) drm_mode_config_reset(drm); - err = drm_aperture_remove_framebuffers(false, "tegradrmfb"); + err = drm_aperture_remove_framebuffers(false, &tegra_drm_driver); if (err < 0) goto hub; diff --git a/drivers/gpu/drm/tegra/drm.h b/drivers/gpu/drm/tegra/drm.h index 0cb868065348..8b28327c931c 100644 --- a/drivers/gpu/drm/tegra/drm.h +++ b/drivers/gpu/drm/tegra/drm.h @@ -64,12 +64,22 @@ struct tegra_drm { struct tegra_display_hub *hub; }; +static inline struct host1x *tegra_drm_to_host1x(struct tegra_drm *tegra) +{ + return dev_get_drvdata(tegra->drm->dev->parent); +} + struct tegra_drm_client; struct tegra_drm_context { struct tegra_drm_client *client; struct host1x_channel *channel; + + /* Only used by legacy UAPI. */ unsigned int id; + + /* Only used by new UAPI. */ + struct xarray mappings; }; struct tegra_drm_client_ops { @@ -91,7 +101,9 @@ struct tegra_drm_client { struct host1x_client base; struct list_head list; struct tegra_drm *drm; + struct host1x_channel *shared_channel; + /* Set by driver */ unsigned int version; const struct tegra_drm_client_ops *ops; }; diff --git a/drivers/gpu/drm/tegra/firewall.c b/drivers/gpu/drm/tegra/firewall.c new file mode 100644 index 000000000000..1824d2db0e2c --- /dev/null +++ b/drivers/gpu/drm/tegra/firewall.c @@ -0,0 +1,254 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright (c) 2010-2020 NVIDIA Corporation */ + +#include "drm.h" +#include "submit.h" +#include "uapi.h" + +struct tegra_drm_firewall { + struct tegra_drm_submit_data *submit; + struct tegra_drm_client *client; + u32 *data; + u32 pos; + u32 end; + u32 class; +}; + +static int fw_next(struct tegra_drm_firewall *fw, u32 *word) +{ + if (fw->pos == fw->end) + return -EINVAL; + + *word = fw->data[fw->pos++]; + + return 0; +} + +static bool fw_check_addr_valid(struct tegra_drm_firewall *fw, u32 offset) +{ + u32 i; + + for (i = 0; i < fw->submit->num_used_mappings; i++) { + struct tegra_drm_mapping *m = fw->submit->used_mappings[i].mapping; + + if (offset >= m->iova && offset <= m->iova_end) + return true; + } + + return false; +} + +static int fw_check_reg(struct tegra_drm_firewall *fw, u32 offset) +{ + bool is_addr; + u32 word; + int err; + + err = fw_next(fw, &word); + if (err) + return err; + + if (!fw->client->ops->is_addr_reg) + return 0; + + is_addr = fw->client->ops->is_addr_reg(fw->client->base.dev, fw->class, + offset); + + if (!is_addr) + return 0; + + if (!fw_check_addr_valid(fw, word)) + return -EINVAL; + + return 0; +} + +static int fw_check_regs_seq(struct tegra_drm_firewall *fw, u32 offset, + u32 count, bool incr) +{ + u32 i; + + for (i = 0; i < count; i++) { + if (fw_check_reg(fw, offset)) + return -EINVAL; + + if (incr) + offset++; + } + + return 0; +} + +static int fw_check_regs_mask(struct tegra_drm_firewall *fw, u32 offset, + u16 mask) +{ + unsigned long bmask = mask; + unsigned int bit; + + for_each_set_bit(bit, &bmask, 16) { + if (fw_check_reg(fw, offset+bit)) + return -EINVAL; + } + + return 0; +} + +static int fw_check_regs_imm(struct tegra_drm_firewall *fw, u32 offset) +{ + bool is_addr; + + is_addr = fw->client->ops->is_addr_reg(fw->client->base.dev, fw->class, + offset); + if (is_addr) + return -EINVAL; + + return 0; +} + +static int fw_check_class(struct tegra_drm_firewall *fw, u32 class) +{ + if (!fw->client->ops->is_valid_class) { + if (class == fw->client->base.class) + return 0; + else + return -EINVAL; + } + + if (!fw->client->ops->is_valid_class(class)) + return -EINVAL; + + return 0; +} + +enum { + HOST1X_OPCODE_SETCLASS = 0x00, + HOST1X_OPCODE_INCR = 0x01, + HOST1X_OPCODE_NONINCR = 0x02, + HOST1X_OPCODE_MASK = 0x03, + HOST1X_OPCODE_IMM = 0x04, + HOST1X_OPCODE_RESTART = 0x05, + HOST1X_OPCODE_GATHER = 0x06, + HOST1X_OPCODE_SETSTRMID = 0x07, + HOST1X_OPCODE_SETAPPID = 0x08, + HOST1X_OPCODE_SETPYLD = 0x09, + HOST1X_OPCODE_INCR_W = 0x0a, + HOST1X_OPCODE_NONINCR_W = 0x0b, + HOST1X_OPCODE_GATHER_W = 0x0c, + HOST1X_OPCODE_RESTART_W = 0x0d, + HOST1X_OPCODE_EXTEND = 0x0e, +}; + +int tegra_drm_fw_validate(struct tegra_drm_client *client, u32 *data, u32 start, + u32 words, struct tegra_drm_submit_data *submit, + u32 *job_class) +{ + struct tegra_drm_firewall fw = { + .submit = submit, + .client = client, + .data = data, + .pos = start, + .end = start+words, + .class = *job_class, + }; + bool payload_valid = false; + u32 payload; + int err; + + while (fw.pos != fw.end) { + u32 word, opcode, offset, count, mask, class; + + err = fw_next(&fw, &word); + if (err) + return err; + + opcode = (word & 0xf0000000) >> 28; + + switch (opcode) { + case HOST1X_OPCODE_SETCLASS: + offset = word >> 16 & 0xfff; + mask = word & 0x3f; + class = (word >> 6) & 0x3ff; + err = fw_check_class(&fw, class); + fw.class = class; + *job_class = class; + if (!err) + err = fw_check_regs_mask(&fw, offset, mask); + if (err) + dev_warn(client->base.dev, + "illegal SETCLASS(offset=0x%x, mask=0x%x, class=0x%x) at word %u", + offset, mask, class, fw.pos-1); + break; + case HOST1X_OPCODE_INCR: + offset = (word >> 16) & 0xfff; + count = word & 0xffff; + err = fw_check_regs_seq(&fw, offset, count, true); + if (err) + dev_warn(client->base.dev, + "illegal INCR(offset=0x%x, count=%u) in class 0x%x at word %u", + offset, count, fw.class, fw.pos-1); + break; + case HOST1X_OPCODE_NONINCR: + offset = (word >> 16) & 0xfff; + count = word & 0xffff; + err = fw_check_regs_seq(&fw, offset, count, false); + if (err) + dev_warn(client->base.dev, + "illegal NONINCR(offset=0x%x, count=%u) in class 0x%x at word %u", + offset, count, fw.class, fw.pos-1); + break; + case HOST1X_OPCODE_MASK: + offset = (word >> 16) & 0xfff; + mask = word & 0xffff; + err = fw_check_regs_mask(&fw, offset, mask); + if (err) + dev_warn(client->base.dev, + "illegal MASK(offset=0x%x, mask=0x%x) in class 0x%x at word %u", + offset, mask, fw.class, fw.pos-1); + break; + case HOST1X_OPCODE_IMM: + /* IMM cannot reasonably be used to write a pointer */ + offset = (word >> 16) & 0xfff; + err = fw_check_regs_imm(&fw, offset); + if (err) + dev_warn(client->base.dev, + "illegal IMM(offset=0x%x) in class 0x%x at word %u", + offset, fw.class, fw.pos-1); + break; + case HOST1X_OPCODE_SETPYLD: + payload = word & 0xffff; + payload_valid = true; + break; + case HOST1X_OPCODE_INCR_W: + if (!payload_valid) + return -EINVAL; + + offset = word & 0x3fffff; + err = fw_check_regs_seq(&fw, offset, payload, true); + if (err) + dev_warn(client->base.dev, + "illegal INCR_W(offset=0x%x) in class 0x%x at word %u", + offset, fw.class, fw.pos-1); + break; + case HOST1X_OPCODE_NONINCR_W: + if (!payload_valid) + return -EINVAL; + + offset = word & 0x3fffff; + err = fw_check_regs_seq(&fw, offset, payload, false); + if (err) + dev_warn(client->base.dev, + "illegal NONINCR(offset=0x%x) in class 0x%x at word %u", + offset, fw.class, fw.pos-1); + break; + default: + dev_warn(client->base.dev, "illegal opcode at word %u", + fw.pos-1); + return -EINVAL; + } + + if (err) + return err; + } + + return 0; +} diff --git a/drivers/gpu/drm/tegra/gem.c b/drivers/gpu/drm/tegra/gem.c index 26af8daa9a16..6ec598f5d5b3 100644 --- a/drivers/gpu/drm/tegra/gem.c +++ b/drivers/gpu/drm/tegra/gem.c @@ -707,3 +707,16 @@ struct drm_gem_object *tegra_gem_prime_import(struct drm_device *drm, return &bo->gem; } + +struct host1x_bo *tegra_gem_lookup(struct drm_file *file, u32 handle) +{ + struct drm_gem_object *gem; + struct tegra_bo *bo; + + gem = drm_gem_object_lookup(file, handle); + if (!gem) + return NULL; + + bo = to_tegra_bo(gem); + return &bo->base; +} diff --git a/drivers/gpu/drm/tegra/gem.h b/drivers/gpu/drm/tegra/gem.h index c15fd99d6cb2..cb5146a67668 100644 --- a/drivers/gpu/drm/tegra/gem.h +++ b/drivers/gpu/drm/tegra/gem.h @@ -80,4 +80,6 @@ struct dma_buf *tegra_gem_prime_export(struct drm_gem_object *gem, struct drm_gem_object *tegra_gem_prime_import(struct drm_device *drm, struct dma_buf *buf); +struct host1x_bo *tegra_gem_lookup(struct drm_file *file, u32 handle); + #endif diff --git a/drivers/gpu/drm/tegra/plane.c b/drivers/gpu/drm/tegra/plane.c index 2e65b4075ce6..e00ec3f40ec8 100644 --- a/drivers/gpu/drm/tegra/plane.c +++ b/drivers/gpu/drm/tegra/plane.c @@ -4,6 +4,7 @@ */ #include <linux/iommu.h> +#include <linux/interconnect.h> #include <drm/drm_atomic.h> #include <drm/drm_atomic_helper.h> @@ -64,6 +65,9 @@ tegra_plane_atomic_duplicate_state(struct drm_plane *plane) copy->reflect_x = state->reflect_x; copy->reflect_y = state->reflect_y; copy->opaque = state->opaque; + copy->total_peak_memory_bandwidth = state->total_peak_memory_bandwidth; + copy->peak_memory_bandwidth = state->peak_memory_bandwidth; + copy->avg_memory_bandwidth = state->avg_memory_bandwidth; for (i = 0; i < 2; i++) copy->blending[i] = state->blending[i]; @@ -244,6 +248,78 @@ void tegra_plane_cleanup_fb(struct drm_plane *plane, tegra_dc_unpin(dc, to_tegra_plane_state(state)); } +static int tegra_plane_calculate_memory_bandwidth(struct drm_plane_state *state) +{ + struct tegra_plane_state *tegra_state = to_tegra_plane_state(state); + unsigned int i, bpp, dst_w, dst_h, src_w, src_h, mul; + const struct tegra_dc_soc_info *soc; + const struct drm_format_info *fmt; + struct drm_crtc_state *crtc_state; + u64 avg_bandwidth, peak_bandwidth; + + if (!state->visible) + return 0; + + crtc_state = drm_atomic_get_new_crtc_state(state->state, state->crtc); + if (!crtc_state) + return -EINVAL; + + src_w = drm_rect_width(&state->src) >> 16; + src_h = drm_rect_height(&state->src) >> 16; + dst_w = drm_rect_width(&state->dst); + dst_h = drm_rect_height(&state->dst); + + fmt = state->fb->format; + soc = to_tegra_dc(state->crtc)->soc; + + /* + * Note that real memory bandwidth vary depending on format and + * memory layout, we are not taking that into account because small + * estimation error isn't important since bandwidth is rounded up + * anyway. + */ + for (i = 0, bpp = 0; i < fmt->num_planes; i++) { + unsigned int bpp_plane = fmt->cpp[i] * 8; + + /* + * Sub-sampling is relevant for chroma planes only and vertical + * readouts are not cached, hence only horizontal sub-sampling + * matters. + */ + if (i > 0) + bpp_plane /= fmt->hsub; + + bpp += bpp_plane; + } + + /* average bandwidth in kbytes/sec */ + avg_bandwidth = min(src_w, dst_w) * min(src_h, dst_h); + avg_bandwidth *= drm_mode_vrefresh(&crtc_state->adjusted_mode); + avg_bandwidth = DIV_ROUND_UP(avg_bandwidth * bpp, 8) + 999; + do_div(avg_bandwidth, 1000); + + /* mode.clock in kHz, peak bandwidth in kbytes/sec */ + peak_bandwidth = DIV_ROUND_UP(crtc_state->adjusted_mode.clock * bpp, 8); + + /* + * Tegra30/114 Memory Controller can't interleave DC memory requests + * for the tiled windows because DC uses 16-bytes atom, while DDR3 + * uses 32-bytes atom. Hence there is x2 memory overfetch for tiled + * framebuffer and DDR3 on these SoCs. + */ + if (soc->plane_tiled_memory_bandwidth_x2 && + tegra_state->tiling.mode == TEGRA_BO_TILING_MODE_TILED) + mul = 2; + else + mul = 1; + + /* ICC bandwidth in kbytes/sec */ + tegra_state->peak_memory_bandwidth = kBps_to_icc(peak_bandwidth) * mul; + tegra_state->avg_memory_bandwidth = kBps_to_icc(avg_bandwidth) * mul; + + return 0; +} + int tegra_plane_state_add(struct tegra_plane *plane, struct drm_plane_state *state) { @@ -262,6 +338,10 @@ int tegra_plane_state_add(struct tegra_plane *plane, if (err < 0) return err; + err = tegra_plane_calculate_memory_bandwidth(state); + if (err < 0) + return err; + tegra = to_dc_state(crtc_state); tegra->planes |= WIN_A_ACT_REQ << plane->index; @@ -646,3 +726,40 @@ int tegra_plane_setup_legacy_state(struct tegra_plane *tegra, return 0; } + +static const char * const tegra_plane_icc_names[TEGRA_DC_LEGACY_PLANES_NUM] = { + "wina", "winb", "winc", NULL, NULL, NULL, "cursor", +}; + +int tegra_plane_interconnect_init(struct tegra_plane *plane) +{ + const char *icc_name = tegra_plane_icc_names[plane->index]; + struct device *dev = plane->dc->dev; + struct tegra_dc *dc = plane->dc; + int err; + + if (WARN_ON(plane->index >= TEGRA_DC_LEGACY_PLANES_NUM) || + WARN_ON(!tegra_plane_icc_names[plane->index])) + return -EINVAL; + + plane->icc_mem = devm_of_icc_get(dev, icc_name); + err = PTR_ERR_OR_ZERO(plane->icc_mem); + if (err) { + dev_err_probe(dev, err, "failed to get %s interconnect\n", + icc_name); + return err; + } + + /* plane B on T20/30 has a dedicated memory client for a 6-tap vertical filter */ + if (plane->index == 1 && dc->soc->has_win_b_vfilter_mem_client) { + plane->icc_mem_vfilter = devm_of_icc_get(dev, "winb-vfilter"); + err = PTR_ERR_OR_ZERO(plane->icc_mem_vfilter); + if (err) { + dev_err_probe(dev, err, "failed to get %s interconnect\n", + "winb-vfilter"); + return err; + } + } + + return 0; +} diff --git a/drivers/gpu/drm/tegra/plane.h b/drivers/gpu/drm/tegra/plane.h index 1785c1559c0c..d9470780c803 100644 --- a/drivers/gpu/drm/tegra/plane.h +++ b/drivers/gpu/drm/tegra/plane.h @@ -8,6 +8,7 @@ #include <drm/drm_plane.h> +struct icc_path; struct tegra_bo; struct tegra_dc; @@ -16,6 +17,9 @@ struct tegra_plane { struct tegra_dc *dc; unsigned int offset; unsigned int index; + + struct icc_path *icc_mem; + struct icc_path *icc_mem_vfilter; }; struct tegra_cursor { @@ -52,6 +56,11 @@ struct tegra_plane_state { /* used for legacy blending support only */ struct tegra_plane_legacy_blending_state blending[2]; bool opaque; + + /* bandwidths are in ICC units, i.e. kbytes/sec */ + u32 total_peak_memory_bandwidth; + u32 peak_memory_bandwidth; + u32 avg_memory_bandwidth; }; static inline struct tegra_plane_state * @@ -63,6 +72,12 @@ to_tegra_plane_state(struct drm_plane_state *state) return NULL; } +static inline const struct tegra_plane_state * +to_const_tegra_plane_state(const struct drm_plane_state *state) +{ + return to_tegra_plane_state((struct drm_plane_state *)state); +} + extern const struct drm_plane_funcs tegra_plane_funcs; int tegra_plane_prepare_fb(struct drm_plane *plane, @@ -78,5 +93,6 @@ bool tegra_plane_format_is_indexed(unsigned int format); bool tegra_plane_format_is_yuv(unsigned int format, bool *planar, unsigned int *bpc); int tegra_plane_setup_legacy_state(struct tegra_plane *tegra, struct tegra_plane_state *state); +int tegra_plane_interconnect_init(struct tegra_plane *plane); #endif /* TEGRA_PLANE_H */ diff --git a/drivers/gpu/drm/tegra/rgb.c b/drivers/gpu/drm/tegra/rgb.c index 4142a56ca764..606c78a2b988 100644 --- a/drivers/gpu/drm/tegra/rgb.c +++ b/drivers/gpu/drm/tegra/rgb.c @@ -275,11 +275,8 @@ int tegra_dc_rgb_init(struct drm_device *drm, struct tegra_dc *dc) if (output->bridge) { err = drm_bridge_attach(&output->encoder, output->bridge, NULL, DRM_BRIDGE_ATTACH_NO_CONNECTOR); - if (err) { - dev_err(output->dev, "failed to attach bridge: %d\n", - err); + if (err) return err; - } connector = drm_bridge_connector_init(drm, &output->encoder); if (IS_ERR(connector)) { diff --git a/drivers/gpu/drm/tegra/submit.c b/drivers/gpu/drm/tegra/submit.c new file mode 100644 index 000000000000..776f825df52f --- /dev/null +++ b/drivers/gpu/drm/tegra/submit.c @@ -0,0 +1,625 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright (c) 2020 NVIDIA Corporation */ + +#include <linux/dma-fence-array.h> +#include <linux/dma-mapping.h> +#include <linux/file.h> +#include <linux/host1x.h> +#include <linux/iommu.h> +#include <linux/kref.h> +#include <linux/list.h> +#include <linux/nospec.h> +#include <linux/pm_runtime.h> +#include <linux/scatterlist.h> +#include <linux/slab.h> +#include <linux/sync_file.h> + +#include <drm/drm_drv.h> +#include <drm/drm_file.h> +#include <drm/drm_syncobj.h> + +#include "drm.h" +#include "gem.h" +#include "submit.h" +#include "uapi.h" + +#define SUBMIT_ERR(context, fmt, ...) \ + dev_err_ratelimited(context->client->base.dev, \ + "%s: job submission failed: " fmt "\n", \ + current->comm, ##__VA_ARGS__) + +struct gather_bo { + struct host1x_bo base; + + struct kref ref; + + struct device *dev; + u32 *gather_data; + dma_addr_t gather_data_dma; + size_t gather_data_words; +}; + +static struct host1x_bo *gather_bo_get(struct host1x_bo *host_bo) +{ + struct gather_bo *bo = container_of(host_bo, struct gather_bo, base); + + kref_get(&bo->ref); + + return host_bo; +} + +static void gather_bo_release(struct kref *ref) +{ + struct gather_bo *bo = container_of(ref, struct gather_bo, ref); + + dma_free_attrs(bo->dev, bo->gather_data_words * 4, bo->gather_data, bo->gather_data_dma, + 0); + kfree(bo); +} + +static void gather_bo_put(struct host1x_bo *host_bo) +{ + struct gather_bo *bo = container_of(host_bo, struct gather_bo, base); + + kref_put(&bo->ref, gather_bo_release); +} + +static struct sg_table * +gather_bo_pin(struct device *dev, struct host1x_bo *host_bo, dma_addr_t *phys) +{ + struct gather_bo *bo = container_of(host_bo, struct gather_bo, base); + struct sg_table *sgt; + int err; + + sgt = kzalloc(sizeof(*sgt), GFP_KERNEL); + if (!sgt) + return ERR_PTR(-ENOMEM); + + err = dma_get_sgtable(bo->dev, sgt, bo->gather_data, bo->gather_data_dma, + bo->gather_data_words * 4); + if (err) { + kfree(sgt); + return ERR_PTR(err); + } + + return sgt; +} + +static void gather_bo_unpin(struct device *dev, struct sg_table *sgt) +{ + if (sgt) { + sg_free_table(sgt); + kfree(sgt); + } +} + +static void *gather_bo_mmap(struct host1x_bo *host_bo) +{ + struct gather_bo *bo = container_of(host_bo, struct gather_bo, base); + + return bo->gather_data; +} + +static void gather_bo_munmap(struct host1x_bo *host_bo, void *addr) +{ +} + +const struct host1x_bo_ops gather_bo_ops = { + .get = gather_bo_get, + .put = gather_bo_put, + .pin = gather_bo_pin, + .unpin = gather_bo_unpin, + .mmap = gather_bo_mmap, + .munmap = gather_bo_munmap, +}; + +static struct tegra_drm_mapping * +tegra_drm_mapping_get(struct tegra_drm_context *context, u32 id) +{ + struct tegra_drm_mapping *mapping; + + xa_lock(&context->mappings); + + mapping = xa_load(&context->mappings, id); + if (mapping) + kref_get(&mapping->ref); + + xa_unlock(&context->mappings); + + return mapping; +} + +static void *alloc_copy_user_array(void __user *from, size_t count, size_t size) +{ + size_t copy_len; + void *data; + + if (check_mul_overflow(count, size, ©_len)) + return ERR_PTR(-EINVAL); + + if (copy_len > 0x4000) + return ERR_PTR(-E2BIG); + + data = kvmalloc(copy_len, GFP_KERNEL); + if (!data) + return ERR_PTR(-ENOMEM); + + if (copy_from_user(data, from, copy_len)) { + kvfree(data); + return ERR_PTR(-EFAULT); + } + + return data; +} + +static int submit_copy_gather_data(struct gather_bo **pbo, struct device *dev, + struct tegra_drm_context *context, + struct drm_tegra_channel_submit *args) +{ + struct gather_bo *bo; + size_t copy_len; + + if (args->gather_data_words == 0) { + SUBMIT_ERR(context, "gather_data_words cannot be zero"); + return -EINVAL; + } + + if (check_mul_overflow((size_t)args->gather_data_words, (size_t)4, ©_len)) { + SUBMIT_ERR(context, "gather_data_words is too large"); + return -EINVAL; + } + + bo = kzalloc(sizeof(*bo), GFP_KERNEL); + if (!bo) { + SUBMIT_ERR(context, "failed to allocate memory for bo info"); + return -ENOMEM; + } + + host1x_bo_init(&bo->base, &gather_bo_ops); + kref_init(&bo->ref); + bo->dev = dev; + + bo->gather_data = dma_alloc_attrs(dev, copy_len, &bo->gather_data_dma, + GFP_KERNEL | __GFP_NOWARN, 0); + if (!bo->gather_data) { + SUBMIT_ERR(context, "failed to allocate memory for gather data"); + kfree(bo); + return -ENOMEM; + } + + if (copy_from_user(bo->gather_data, u64_to_user_ptr(args->gather_data_ptr), copy_len)) { + SUBMIT_ERR(context, "failed to copy gather data from userspace"); + dma_free_attrs(dev, copy_len, bo->gather_data, bo->gather_data_dma, 0); + kfree(bo); + return -EFAULT; + } + + bo->gather_data_words = args->gather_data_words; + + *pbo = bo; + + return 0; +} + +static int submit_write_reloc(struct tegra_drm_context *context, struct gather_bo *bo, + struct drm_tegra_submit_buf *buf, struct tegra_drm_mapping *mapping) +{ + /* TODO check that target_offset is within bounds */ + dma_addr_t iova = mapping->iova + buf->reloc.target_offset; + u32 written_ptr; + +#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT + if (buf->flags & DRM_TEGRA_SUBMIT_RELOC_SECTOR_LAYOUT) + iova |= BIT_ULL(39); +#endif + + written_ptr = iova >> buf->reloc.shift; + + if (buf->reloc.gather_offset_words >= bo->gather_data_words) { + SUBMIT_ERR(context, + "relocation has too large gather offset (%u vs gather length %zu)", + buf->reloc.gather_offset_words, bo->gather_data_words); + return -EINVAL; + } + + buf->reloc.gather_offset_words = array_index_nospec(buf->reloc.gather_offset_words, + bo->gather_data_words); + + bo->gather_data[buf->reloc.gather_offset_words] = written_ptr; + + return 0; +} + +static int submit_process_bufs(struct tegra_drm_context *context, struct gather_bo *bo, + struct drm_tegra_channel_submit *args, + struct tegra_drm_submit_data *job_data) +{ + struct tegra_drm_used_mapping *mappings; + struct drm_tegra_submit_buf *bufs; + int err; + u32 i; + + bufs = alloc_copy_user_array(u64_to_user_ptr(args->bufs_ptr), args->num_bufs, + sizeof(*bufs)); + if (IS_ERR(bufs)) { + SUBMIT_ERR(context, "failed to copy bufs array from userspace"); + return PTR_ERR(bufs); + } + + mappings = kcalloc(args->num_bufs, sizeof(*mappings), GFP_KERNEL); + if (!mappings) { + SUBMIT_ERR(context, "failed to allocate memory for mapping info"); + err = -ENOMEM; + goto done; + } + + for (i = 0; i < args->num_bufs; i++) { + struct drm_tegra_submit_buf *buf = &bufs[i]; + struct tegra_drm_mapping *mapping; + + if (buf->flags & ~DRM_TEGRA_SUBMIT_RELOC_SECTOR_LAYOUT) { + SUBMIT_ERR(context, "invalid flag specified for buffer"); + err = -EINVAL; + goto drop_refs; + } + + mapping = tegra_drm_mapping_get(context, buf->mapping); + if (!mapping) { + SUBMIT_ERR(context, "invalid mapping ID '%u' for buffer", buf->mapping); + err = -EINVAL; + goto drop_refs; + } + + err = submit_write_reloc(context, bo, buf, mapping); + if (err) { + tegra_drm_mapping_put(mapping); + goto drop_refs; + } + + mappings[i].mapping = mapping; + mappings[i].flags = buf->flags; + } + + job_data->used_mappings = mappings; + job_data->num_used_mappings = i; + + err = 0; + + goto done; + +drop_refs: + while (i--) + tegra_drm_mapping_put(mappings[i].mapping); + + kfree(mappings); + job_data->used_mappings = NULL; + +done: + kvfree(bufs); + + return err; +} + +static int submit_get_syncpt(struct tegra_drm_context *context, struct host1x_job *job, + struct xarray *syncpoints, struct drm_tegra_channel_submit *args) +{ + struct host1x_syncpt *sp; + + if (args->syncpt.flags) { + SUBMIT_ERR(context, "invalid flag specified for syncpt"); + return -EINVAL; + } + + /* Syncpt ref will be dropped on job release */ + sp = xa_load(syncpoints, args->syncpt.id); + if (!sp) { + SUBMIT_ERR(context, "syncpoint specified in syncpt was not allocated"); + return -EINVAL; + } + + job->syncpt = host1x_syncpt_get(sp); + job->syncpt_incrs = args->syncpt.increments; + + return 0; +} + +static int submit_job_add_gather(struct host1x_job *job, struct tegra_drm_context *context, + struct drm_tegra_submit_cmd_gather_uptr *cmd, + struct gather_bo *bo, u32 *offset, + struct tegra_drm_submit_data *job_data, + u32 *class) +{ + u32 next_offset; + + if (cmd->reserved[0] || cmd->reserved[1] || cmd->reserved[2]) { + SUBMIT_ERR(context, "non-zero reserved field in GATHER_UPTR command"); + return -EINVAL; + } + + /* Check for maximum gather size */ + if (cmd->words > 16383) { + SUBMIT_ERR(context, "too many words in GATHER_UPTR command"); + return -EINVAL; + } + + if (check_add_overflow(*offset, cmd->words, &next_offset)) { + SUBMIT_ERR(context, "too many total words in job"); + return -EINVAL; + } + + if (next_offset > bo->gather_data_words) { + SUBMIT_ERR(context, "GATHER_UPTR command overflows gather data"); + return -EINVAL; + } + + if (tegra_drm_fw_validate(context->client, bo->gather_data, *offset, + cmd->words, job_data, class)) { + SUBMIT_ERR(context, "job was rejected by firewall"); + return -EINVAL; + } + + host1x_job_add_gather(job, &bo->base, cmd->words, *offset * 4); + + *offset = next_offset; + + return 0; +} + +static struct host1x_job * +submit_create_job(struct tegra_drm_context *context, struct gather_bo *bo, + struct drm_tegra_channel_submit *args, struct tegra_drm_submit_data *job_data, + struct xarray *syncpoints) +{ + struct drm_tegra_submit_cmd *cmds; + u32 i, gather_offset = 0, class; + struct host1x_job *job; + int err; + + /* Set initial class for firewall. */ + class = context->client->base.class; + + cmds = alloc_copy_user_array(u64_to_user_ptr(args->cmds_ptr), args->num_cmds, + sizeof(*cmds)); + if (IS_ERR(cmds)) { + SUBMIT_ERR(context, "failed to copy cmds array from userspace"); + return ERR_CAST(cmds); + } + + job = host1x_job_alloc(context->channel, args->num_cmds, 0, true); + if (!job) { + SUBMIT_ERR(context, "failed to allocate memory for job"); + job = ERR_PTR(-ENOMEM); + goto done; + } + + err = submit_get_syncpt(context, job, syncpoints, args); + if (err < 0) + goto free_job; + + job->client = &context->client->base; + job->class = context->client->base.class; + job->serialize = true; + + for (i = 0; i < args->num_cmds; i++) { + struct drm_tegra_submit_cmd *cmd = &cmds[i]; + + if (cmd->flags) { + SUBMIT_ERR(context, "unknown flags given for cmd"); + err = -EINVAL; + goto free_job; + } + + if (cmd->type == DRM_TEGRA_SUBMIT_CMD_GATHER_UPTR) { + err = submit_job_add_gather(job, context, &cmd->gather_uptr, bo, + &gather_offset, job_data, &class); + if (err) + goto free_job; + } else if (cmd->type == DRM_TEGRA_SUBMIT_CMD_WAIT_SYNCPT) { + if (cmd->wait_syncpt.reserved[0] || cmd->wait_syncpt.reserved[1]) { + SUBMIT_ERR(context, "non-zero reserved value"); + err = -EINVAL; + goto free_job; + } + + host1x_job_add_wait(job, cmd->wait_syncpt.id, cmd->wait_syncpt.value, + false, class); + } else if (cmd->type == DRM_TEGRA_SUBMIT_CMD_WAIT_SYNCPT_RELATIVE) { + if (cmd->wait_syncpt.reserved[0] || cmd->wait_syncpt.reserved[1]) { + SUBMIT_ERR(context, "non-zero reserved value"); + err = -EINVAL; + goto free_job; + } + + if (cmd->wait_syncpt.id != args->syncpt.id) { + SUBMIT_ERR(context, "syncpoint ID in CMD_WAIT_SYNCPT_RELATIVE is not used by the job"); + err = -EINVAL; + goto free_job; + } + + host1x_job_add_wait(job, cmd->wait_syncpt.id, cmd->wait_syncpt.value, + true, class); + } else { + SUBMIT_ERR(context, "unknown cmd type"); + err = -EINVAL; + goto free_job; + } + } + + if (gather_offset == 0) { + SUBMIT_ERR(context, "job must have at least one gather"); + err = -EINVAL; + goto free_job; + } + + goto done; + +free_job: + host1x_job_put(job); + job = ERR_PTR(err); + +done: + kvfree(cmds); + + return job; +} + +static void release_job(struct host1x_job *job) +{ + struct tegra_drm_client *client = container_of(job->client, struct tegra_drm_client, base); + struct tegra_drm_submit_data *job_data = job->user_data; + u32 i; + + for (i = 0; i < job_data->num_used_mappings; i++) + tegra_drm_mapping_put(job_data->used_mappings[i].mapping); + + kfree(job_data->used_mappings); + kfree(job_data); + + if (pm_runtime_enabled(client->base.dev)) + pm_runtime_put_autosuspend(client->base.dev); +} + +int tegra_drm_ioctl_channel_submit(struct drm_device *drm, void *data, + struct drm_file *file) +{ + struct tegra_drm_file *fpriv = file->driver_priv; + struct drm_tegra_channel_submit *args = data; + struct tegra_drm_submit_data *job_data; + struct drm_syncobj *syncobj = NULL; + struct tegra_drm_context *context; + struct host1x_job *job; + struct gather_bo *bo; + u32 i; + int err; + + mutex_lock(&fpriv->lock); + + context = xa_load(&fpriv->contexts, args->context); + if (!context) { + mutex_unlock(&fpriv->lock); + pr_err_ratelimited("%s: %s: invalid channel context '%#x'", __func__, + current->comm, args->context); + return -EINVAL; + } + + if (args->syncobj_in) { + struct dma_fence *fence; + + err = drm_syncobj_find_fence(file, args->syncobj_in, 0, 0, &fence); + if (err) { + SUBMIT_ERR(context, "invalid syncobj_in '%#x'", args->syncobj_in); + goto unlock; + } + + err = dma_fence_wait_timeout(fence, true, msecs_to_jiffies(10000)); + dma_fence_put(fence); + if (err) { + SUBMIT_ERR(context, "wait for syncobj_in timed out"); + goto unlock; + } + } + + if (args->syncobj_out) { + syncobj = drm_syncobj_find(file, args->syncobj_out); + if (!syncobj) { + SUBMIT_ERR(context, "invalid syncobj_out '%#x'", args->syncobj_out); + err = -ENOENT; + goto unlock; + } + } + + /* Allocate gather BO and copy gather words in. */ + err = submit_copy_gather_data(&bo, drm->dev, context, args); + if (err) + goto unlock; + + job_data = kzalloc(sizeof(*job_data), GFP_KERNEL); + if (!job_data) { + SUBMIT_ERR(context, "failed to allocate memory for job data"); + err = -ENOMEM; + goto put_bo; + } + + /* Get data buffer mappings and do relocation patching. */ + err = submit_process_bufs(context, bo, args, job_data); + if (err) + goto free_job_data; + + /* Allocate host1x_job and add gathers and waits to it. */ + job = submit_create_job(context, bo, args, job_data, &fpriv->syncpoints); + if (IS_ERR(job)) { + err = PTR_ERR(job); + goto free_job_data; + } + + /* Map gather data for Host1x. */ + err = host1x_job_pin(job, context->client->base.dev); + if (err) { + SUBMIT_ERR(context, "failed to pin job: %d", err); + goto put_job; + } + + /* Boot engine. */ + if (pm_runtime_enabled(context->client->base.dev)) { + err = pm_runtime_resume_and_get(context->client->base.dev); + if (err < 0) { + SUBMIT_ERR(context, "could not power up engine: %d", err); + goto unpin_job; + } + } + + job->user_data = job_data; + job->release = release_job; + job->timeout = 10000; + + /* + * job_data is now part of job reference counting, so don't release + * it from here. + */ + job_data = NULL; + + /* Submit job to hardware. */ + err = host1x_job_submit(job); + if (err) { + SUBMIT_ERR(context, "host1x job submission failed: %d", err); + goto unpin_job; + } + + /* Return postfences to userspace and add fences to DMA reservations. */ + args->syncpt.value = job->syncpt_end; + + if (syncobj) { + struct dma_fence *fence = host1x_fence_create(job->syncpt, job->syncpt_end); + if (IS_ERR(fence)) { + err = PTR_ERR(fence); + SUBMIT_ERR(context, "failed to create postfence: %d", err); + } + + drm_syncobj_replace_fence(syncobj, fence); + } + + goto put_job; + +unpin_job: + host1x_job_unpin(job); +put_job: + host1x_job_put(job); +free_job_data: + if (job_data && job_data->used_mappings) { + for (i = 0; i < job_data->num_used_mappings; i++) + tegra_drm_mapping_put(job_data->used_mappings[i].mapping); + + kfree(job_data->used_mappings); + } + + if (job_data) + kfree(job_data); +put_bo: + gather_bo_put(&bo->base); +unlock: + if (syncobj) + drm_syncobj_put(syncobj); + + mutex_unlock(&fpriv->lock); + return err; +} diff --git a/drivers/gpu/drm/tegra/submit.h b/drivers/gpu/drm/tegra/submit.h new file mode 100644 index 000000000000..cf6a2f0a29fc --- /dev/null +++ b/drivers/gpu/drm/tegra/submit.h @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* Copyright (c) 2020 NVIDIA Corporation */ + +#ifndef _TEGRA_DRM_UAPI_SUBMIT_H +#define _TEGRA_DRM_UAPI_SUBMIT_H + +struct tegra_drm_used_mapping { + struct tegra_drm_mapping *mapping; + u32 flags; +}; + +struct tegra_drm_submit_data { + struct tegra_drm_used_mapping *used_mappings; + u32 num_used_mappings; +}; + +int tegra_drm_fw_validate(struct tegra_drm_client *client, u32 *data, u32 start, + u32 words, struct tegra_drm_submit_data *submit, + u32 *job_class); + +#endif diff --git a/drivers/gpu/drm/tegra/uapi.c b/drivers/gpu/drm/tegra/uapi.c new file mode 100644 index 000000000000..dc16a24f4dbe --- /dev/null +++ b/drivers/gpu/drm/tegra/uapi.c @@ -0,0 +1,338 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright (c) 2020 NVIDIA Corporation */ + +#include <linux/host1x.h> +#include <linux/iommu.h> +#include <linux/list.h> + +#include <drm/drm_drv.h> +#include <drm/drm_file.h> +#include <drm/drm_utils.h> + +#include "drm.h" +#include "uapi.h" + +static void tegra_drm_mapping_release(struct kref *ref) +{ + struct tegra_drm_mapping *mapping = + container_of(ref, struct tegra_drm_mapping, ref); + + if (mapping->sgt) + dma_unmap_sgtable(mapping->dev, mapping->sgt, mapping->direction, + DMA_ATTR_SKIP_CPU_SYNC); + + host1x_bo_unpin(mapping->dev, mapping->bo, mapping->sgt); + host1x_bo_put(mapping->bo); + + kfree(mapping); +} + +void tegra_drm_mapping_put(struct tegra_drm_mapping *mapping) +{ + kref_put(&mapping->ref, tegra_drm_mapping_release); +} + +static void tegra_drm_channel_context_close(struct tegra_drm_context *context) +{ + struct tegra_drm_mapping *mapping; + unsigned long id; + + xa_for_each(&context->mappings, id, mapping) + tegra_drm_mapping_put(mapping); + + xa_destroy(&context->mappings); + + host1x_channel_put(context->channel); + + kfree(context); +} + +void tegra_drm_uapi_close_file(struct tegra_drm_file *file) +{ + struct tegra_drm_context *context; + struct host1x_syncpt *sp; + unsigned long id; + + xa_for_each(&file->contexts, id, context) + tegra_drm_channel_context_close(context); + + xa_for_each(&file->syncpoints, id, sp) + host1x_syncpt_put(sp); + + xa_destroy(&file->contexts); + xa_destroy(&file->syncpoints); +} + +static struct tegra_drm_client *tegra_drm_find_client(struct tegra_drm *tegra, u32 class) +{ + struct tegra_drm_client *client; + + list_for_each_entry(client, &tegra->clients, list) + if (client->base.class == class) + return client; + + return NULL; +} + +int tegra_drm_ioctl_channel_open(struct drm_device *drm, void *data, struct drm_file *file) +{ + struct tegra_drm_file *fpriv = file->driver_priv; + struct tegra_drm *tegra = drm->dev_private; + struct drm_tegra_channel_open *args = data; + struct tegra_drm_client *client = NULL; + struct tegra_drm_context *context; + int err; + + if (args->flags) + return -EINVAL; + + context = kzalloc(sizeof(*context), GFP_KERNEL); + if (!context) + return -ENOMEM; + + client = tegra_drm_find_client(tegra, args->host1x_class); + if (!client) { + err = -ENODEV; + goto free; + } + + if (client->shared_channel) { + context->channel = host1x_channel_get(client->shared_channel); + } else { + context->channel = host1x_channel_request(&client->base); + if (!context->channel) { + err = -EBUSY; + goto free; + } + } + + err = xa_alloc(&fpriv->contexts, &args->context, context, XA_LIMIT(1, U32_MAX), + GFP_KERNEL); + if (err < 0) + goto put_channel; + + context->client = client; + xa_init_flags(&context->mappings, XA_FLAGS_ALLOC1); + + args->version = client->version; + args->capabilities = 0; + + if (device_get_dma_attr(client->base.dev) == DEV_DMA_COHERENT) + args->capabilities |= DRM_TEGRA_CHANNEL_CAP_CACHE_COHERENT; + + return 0; + +put_channel: + host1x_channel_put(context->channel); +free: + kfree(context); + + return err; +} + +int tegra_drm_ioctl_channel_close(struct drm_device *drm, void *data, struct drm_file *file) +{ + struct tegra_drm_file *fpriv = file->driver_priv; + struct drm_tegra_channel_close *args = data; + struct tegra_drm_context *context; + + mutex_lock(&fpriv->lock); + + context = xa_load(&fpriv->contexts, args->context); + if (!context) { + mutex_unlock(&fpriv->lock); + return -EINVAL; + } + + xa_erase(&fpriv->contexts, args->context); + + mutex_unlock(&fpriv->lock); + + tegra_drm_channel_context_close(context); + + return 0; +} + +int tegra_drm_ioctl_channel_map(struct drm_device *drm, void *data, struct drm_file *file) +{ + struct tegra_drm_file *fpriv = file->driver_priv; + struct drm_tegra_channel_map *args = data; + struct tegra_drm_mapping *mapping; + struct tegra_drm_context *context; + int err = 0; + + if (args->flags & ~DRM_TEGRA_CHANNEL_MAP_READ_WRITE) + return -EINVAL; + + mutex_lock(&fpriv->lock); + + context = xa_load(&fpriv->contexts, args->context); + if (!context) { + mutex_unlock(&fpriv->lock); + return -EINVAL; + } + + mapping = kzalloc(sizeof(*mapping), GFP_KERNEL); + if (!mapping) { + err = -ENOMEM; + goto unlock; + } + + kref_init(&mapping->ref); + + mapping->dev = context->client->base.dev; + mapping->bo = tegra_gem_lookup(file, args->handle); + if (!mapping->bo) { + err = -EINVAL; + goto unlock; + } + + if (context->client->base.group) { + /* IOMMU domain managed directly using IOMMU API */ + host1x_bo_pin(mapping->dev, mapping->bo, &mapping->iova); + } else { + switch (args->flags & DRM_TEGRA_CHANNEL_MAP_READ_WRITE) { + case DRM_TEGRA_CHANNEL_MAP_READ_WRITE: + mapping->direction = DMA_BIDIRECTIONAL; + break; + + case DRM_TEGRA_CHANNEL_MAP_WRITE: + mapping->direction = DMA_FROM_DEVICE; + break; + + case DRM_TEGRA_CHANNEL_MAP_READ: + mapping->direction = DMA_TO_DEVICE; + break; + + default: + return -EINVAL; + } + + mapping->sgt = host1x_bo_pin(mapping->dev, mapping->bo, NULL); + if (IS_ERR(mapping->sgt)) { + err = PTR_ERR(mapping->sgt); + goto put_gem; + } + + err = dma_map_sgtable(mapping->dev, mapping->sgt, mapping->direction, + DMA_ATTR_SKIP_CPU_SYNC); + if (err) + goto unpin; + + mapping->iova = sg_dma_address(mapping->sgt->sgl); + } + + mapping->iova_end = mapping->iova + host1x_to_tegra_bo(mapping->bo)->size; + + err = xa_alloc(&context->mappings, &args->mapping, mapping, XA_LIMIT(1, U32_MAX), + GFP_KERNEL); + if (err < 0) + goto unmap; + + mutex_unlock(&fpriv->lock); + + return 0; + +unmap: + if (mapping->sgt) { + dma_unmap_sgtable(mapping->dev, mapping->sgt, mapping->direction, + DMA_ATTR_SKIP_CPU_SYNC); + } +unpin: + host1x_bo_unpin(mapping->dev, mapping->bo, mapping->sgt); +put_gem: + host1x_bo_put(mapping->bo); + kfree(mapping); +unlock: + mutex_unlock(&fpriv->lock); + return err; +} + +int tegra_drm_ioctl_channel_unmap(struct drm_device *drm, void *data, struct drm_file *file) +{ + struct tegra_drm_file *fpriv = file->driver_priv; + struct drm_tegra_channel_unmap *args = data; + struct tegra_drm_mapping *mapping; + struct tegra_drm_context *context; + + mutex_lock(&fpriv->lock); + + context = xa_load(&fpriv->contexts, args->context); + if (!context) { + mutex_unlock(&fpriv->lock); + return -EINVAL; + } + + mapping = xa_erase(&context->mappings, args->mapping); + + mutex_unlock(&fpriv->lock); + + if (!mapping) + return -EINVAL; + + tegra_drm_mapping_put(mapping); + return 0; +} + +int tegra_drm_ioctl_syncpoint_allocate(struct drm_device *drm, void *data, struct drm_file *file) +{ + struct host1x *host1x = tegra_drm_to_host1x(drm->dev_private); + struct tegra_drm_file *fpriv = file->driver_priv; + struct drm_tegra_syncpoint_allocate *args = data; + struct host1x_syncpt *sp; + int err; + + if (args->id) + return -EINVAL; + + sp = host1x_syncpt_alloc(host1x, HOST1X_SYNCPT_CLIENT_MANAGED, current->comm); + if (!sp) + return -EBUSY; + + args->id = host1x_syncpt_id(sp); + + err = xa_insert(&fpriv->syncpoints, args->id, sp, GFP_KERNEL); + if (err) { + host1x_syncpt_put(sp); + return err; + } + + return 0; +} + +int tegra_drm_ioctl_syncpoint_free(struct drm_device *drm, void *data, struct drm_file *file) +{ + struct tegra_drm_file *fpriv = file->driver_priv; + struct drm_tegra_syncpoint_allocate *args = data; + struct host1x_syncpt *sp; + + mutex_lock(&fpriv->lock); + sp = xa_erase(&fpriv->syncpoints, args->id); + mutex_unlock(&fpriv->lock); + + if (!sp) + return -EINVAL; + + host1x_syncpt_put(sp); + + return 0; +} + +int tegra_drm_ioctl_syncpoint_wait(struct drm_device *drm, void *data, struct drm_file *file) +{ + struct host1x *host1x = tegra_drm_to_host1x(drm->dev_private); + struct drm_tegra_syncpoint_wait *args = data; + signed long timeout_jiffies; + struct host1x_syncpt *sp; + + if (args->padding != 0) + return -EINVAL; + + sp = host1x_syncpt_get_by_id_noref(host1x, args->id); + if (!sp) + return -EINVAL; + + timeout_jiffies = drm_timeout_abs_to_jiffies(args->timeout_ns); + + return host1x_syncpt_wait(sp, args->threshold, timeout_jiffies, &args->value); +} diff --git a/drivers/gpu/drm/tegra/uapi.h b/drivers/gpu/drm/tegra/uapi.h new file mode 100644 index 000000000000..12adad770ad3 --- /dev/null +++ b/drivers/gpu/drm/tegra/uapi.h @@ -0,0 +1,58 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* Copyright (c) 2020 NVIDIA Corporation */ + +#ifndef _TEGRA_DRM_UAPI_H +#define _TEGRA_DRM_UAPI_H + +#include <linux/dma-mapping.h> +#include <linux/idr.h> +#include <linux/kref.h> +#include <linux/xarray.h> + +#include <drm/drm.h> + +struct drm_file; +struct drm_device; + +struct tegra_drm_file { + /* Legacy UAPI state */ + struct idr legacy_contexts; + struct mutex lock; + + /* New UAPI state */ + struct xarray contexts; + struct xarray syncpoints; +}; + +struct tegra_drm_mapping { + struct kref ref; + + struct device *dev; + struct host1x_bo *bo; + struct sg_table *sgt; + enum dma_data_direction direction; + dma_addr_t iova; + dma_addr_t iova_end; +}; + +int tegra_drm_ioctl_channel_open(struct drm_device *drm, void *data, + struct drm_file *file); +int tegra_drm_ioctl_channel_close(struct drm_device *drm, void *data, + struct drm_file *file); +int tegra_drm_ioctl_channel_map(struct drm_device *drm, void *data, + struct drm_file *file); +int tegra_drm_ioctl_channel_unmap(struct drm_device *drm, void *data, + struct drm_file *file); +int tegra_drm_ioctl_channel_submit(struct drm_device *drm, void *data, + struct drm_file *file); +int tegra_drm_ioctl_syncpoint_allocate(struct drm_device *drm, void *data, + struct drm_file *file); +int tegra_drm_ioctl_syncpoint_free(struct drm_device *drm, void *data, + struct drm_file *file); +int tegra_drm_ioctl_syncpoint_wait(struct drm_device *drm, void *data, + struct drm_file *file); + +void tegra_drm_uapi_close_file(struct tegra_drm_file *file); +void tegra_drm_mapping_put(struct tegra_drm_mapping *mapping); + +#endif diff --git a/drivers/gpu/drm/tegra/vic.c b/drivers/gpu/drm/tegra/vic.c index c9d55a9a3180..c02010ff2b7f 100644 --- a/drivers/gpu/drm/tegra/vic.c +++ b/drivers/gpu/drm/tegra/vic.c @@ -29,7 +29,6 @@ struct vic_config { struct vic { struct falcon falcon; - bool booted; void __iomem *regs; struct tegra_drm_client client; @@ -52,48 +51,6 @@ static void vic_writel(struct vic *vic, u32 value, unsigned int offset) writel(value, vic->regs + offset); } -static int vic_runtime_resume(struct device *dev) -{ - struct vic *vic = dev_get_drvdata(dev); - int err; - - err = clk_prepare_enable(vic->clk); - if (err < 0) - return err; - - usleep_range(10, 20); - - err = reset_control_deassert(vic->rst); - if (err < 0) - goto disable; - - usleep_range(10, 20); - - return 0; - -disable: - clk_disable_unprepare(vic->clk); - return err; -} - -static int vic_runtime_suspend(struct device *dev) -{ - struct vic *vic = dev_get_drvdata(dev); - int err; - - err = reset_control_assert(vic->rst); - if (err < 0) - return err; - - usleep_range(2000, 4000); - - clk_disable_unprepare(vic->clk); - - vic->booted = false; - - return 0; -} - static int vic_boot(struct vic *vic) { #ifdef CONFIG_IOMMU_API @@ -103,9 +60,6 @@ static int vic_boot(struct vic *vic) void *hdr; int err = 0; - if (vic->booted) - return 0; - #ifdef CONFIG_IOMMU_API if (vic->config->supports_sid && spec) { u32 value; @@ -168,8 +122,6 @@ static int vic_boot(struct vic *vic) return err; } - vic->booted = true; - return 0; } @@ -323,35 +275,74 @@ cleanup: return err; } -static int vic_open_channel(struct tegra_drm_client *client, - struct tegra_drm_context *context) + +static int vic_runtime_resume(struct device *dev) { - struct vic *vic = to_vic(client); + struct vic *vic = dev_get_drvdata(dev); int err; - err = pm_runtime_resume_and_get(vic->dev); + err = clk_prepare_enable(vic->clk); if (err < 0) return err; + usleep_range(10, 20); + + err = reset_control_deassert(vic->rst); + if (err < 0) + goto disable; + + usleep_range(10, 20); + err = vic_load_firmware(vic); if (err < 0) - goto rpm_put; + goto assert; err = vic_boot(vic); if (err < 0) - goto rpm_put; + goto assert; + + return 0; + +assert: + reset_control_assert(vic->rst); +disable: + clk_disable_unprepare(vic->clk); + return err; +} + +static int vic_runtime_suspend(struct device *dev) +{ + struct vic *vic = dev_get_drvdata(dev); + int err; + + err = reset_control_assert(vic->rst); + if (err < 0) + return err; + + usleep_range(2000, 4000); + + clk_disable_unprepare(vic->clk); + + return 0; +} + +static int vic_open_channel(struct tegra_drm_client *client, + struct tegra_drm_context *context) +{ + struct vic *vic = to_vic(client); + int err; + + err = pm_runtime_resume_and_get(vic->dev); + if (err < 0) + return err; context->channel = host1x_channel_get(vic->channel); if (!context->channel) { - err = -ENOMEM; - goto rpm_put; + pm_runtime_put(vic->dev); + return -ENOMEM; } return 0; - -rpm_put: - pm_runtime_put(vic->dev); - return err; } static void vic_close_channel(struct tegra_drm_context *context) @@ -359,7 +350,6 @@ static void vic_close_channel(struct tegra_drm_context *context) struct vic *vic = to_vic(context->client); host1x_channel_put(context->channel); - pm_runtime_put(vic->dev); } diff --git a/drivers/gpu/drm/tidss/tidss_drv.c b/drivers/gpu/drm/tidss/tidss_drv.c index 66e3c86eb5c7..d620f35688da 100644 --- a/drivers/gpu/drm/tidss/tidss_drv.c +++ b/drivers/gpu/drm/tidss/tidss_drv.c @@ -16,7 +16,6 @@ #include <drm/drm_drv.h> #include <drm/drm_fb_helper.h> #include <drm/drm_gem_cma_helper.h> -#include <drm/drm_irq.h> #include <drm/drm_managed.h> #include <drm/drm_probe_helper.h> @@ -118,11 +117,6 @@ static const struct drm_driver tidss_driver = { .date = "20180215", .major = 1, .minor = 0, - - .irq_preinstall = tidss_irq_preinstall, - .irq_postinstall = tidss_irq_postinstall, - .irq_handler = tidss_irq_handler, - .irq_uninstall = tidss_irq_uninstall, }; static int tidss_probe(struct platform_device *pdev) @@ -172,10 +166,11 @@ static int tidss_probe(struct platform_device *pdev) ret = irq; goto err_runtime_suspend; } + tidss->irq = irq; - ret = drm_irq_install(ddev, irq); + ret = tidss_irq_install(ddev, irq); if (ret) { - dev_err(dev, "drm_irq_install failed: %d\n", ret); + dev_err(dev, "tidss_irq_install failed: %d\n", ret); goto err_runtime_suspend; } @@ -196,7 +191,7 @@ static int tidss_probe(struct platform_device *pdev) return 0; err_irq_uninstall: - drm_irq_uninstall(ddev); + tidss_irq_uninstall(ddev); err_runtime_suspend: #ifndef CONFIG_PM @@ -219,7 +214,7 @@ static int tidss_remove(struct platform_device *pdev) drm_atomic_helper_shutdown(ddev); - drm_irq_uninstall(ddev); + tidss_irq_uninstall(ddev); #ifndef CONFIG_PM /* If we don't have PM, we need to call suspend manually */ diff --git a/drivers/gpu/drm/tidss/tidss_drv.h b/drivers/gpu/drm/tidss/tidss_drv.h index 7de4bba52e6f..d7f27b0b0315 100644 --- a/drivers/gpu/drm/tidss/tidss_drv.h +++ b/drivers/gpu/drm/tidss/tidss_drv.h @@ -27,6 +27,8 @@ struct tidss_device { unsigned int num_planes; struct drm_plane *planes[TIDSS_MAX_PLANES]; + unsigned int irq; + spinlock_t wait_lock; /* protects the irq masks */ dispc_irq_t irq_mask; /* enabled irqs in addition to wait_list */ }; diff --git a/drivers/gpu/drm/tidss/tidss_irq.c b/drivers/gpu/drm/tidss/tidss_irq.c index a5ec7931ef6b..0c681c7600bc 100644 --- a/drivers/gpu/drm/tidss/tidss_irq.c +++ b/drivers/gpu/drm/tidss/tidss_irq.c @@ -4,6 +4,9 @@ * Author: Tomi Valkeinen <tomi.valkeinen@ti.com> */ +#include <linux/platform_device.h> + +#include <drm/drm_drv.h> #include <drm/drm_print.h> #include "tidss_crtc.h" @@ -50,16 +53,13 @@ void tidss_irq_disable_vblank(struct drm_crtc *crtc) spin_unlock_irqrestore(&tidss->wait_lock, flags); } -irqreturn_t tidss_irq_handler(int irq, void *arg) +static irqreturn_t tidss_irq_handler(int irq, void *arg) { struct drm_device *ddev = (struct drm_device *)arg; struct tidss_device *tidss = to_tidss(ddev); unsigned int id; dispc_irq_t irqstatus; - if (WARN_ON(!ddev->irq_enabled)) - return IRQ_NONE; - irqstatus = dispc_read_and_clear_irqstatus(tidss->dispc); for (id = 0; id < tidss->num_crtcs; id++) { @@ -93,7 +93,7 @@ void tidss_irq_resume(struct tidss_device *tidss) spin_unlock_irqrestore(&tidss->wait_lock, flags); } -void tidss_irq_preinstall(struct drm_device *ddev) +static void tidss_irq_preinstall(struct drm_device *ddev) { struct tidss_device *tidss = to_tidss(ddev); @@ -107,7 +107,7 @@ void tidss_irq_preinstall(struct drm_device *ddev) tidss_runtime_put(tidss); } -int tidss_irq_postinstall(struct drm_device *ddev) +static void tidss_irq_postinstall(struct drm_device *ddev) { struct tidss_device *tidss = to_tidss(ddev); unsigned long flags; @@ -132,6 +132,22 @@ int tidss_irq_postinstall(struct drm_device *ddev) spin_unlock_irqrestore(&tidss->wait_lock, flags); tidss_runtime_put(tidss); +} + +int tidss_irq_install(struct drm_device *ddev, unsigned int irq) +{ + int ret; + + if (irq == IRQ_NOTCONNECTED) + return -ENOTCONN; + + tidss_irq_preinstall(ddev); + + ret = request_irq(irq, tidss_irq_handler, 0, ddev->driver->name, ddev); + if (ret) + return ret; + + tidss_irq_postinstall(ddev); return 0; } @@ -143,4 +159,6 @@ void tidss_irq_uninstall(struct drm_device *ddev) tidss_runtime_get(tidss); dispc_set_irqenable(tidss->dispc, 0); tidss_runtime_put(tidss); + + free_irq(tidss->irq, ddev); } diff --git a/drivers/gpu/drm/tidss/tidss_irq.h b/drivers/gpu/drm/tidss/tidss_irq.h index 4aaad5dfd7c2..b512614d5863 100644 --- a/drivers/gpu/drm/tidss/tidss_irq.h +++ b/drivers/gpu/drm/tidss/tidss_irq.h @@ -67,10 +67,8 @@ struct tidss_device; void tidss_irq_enable_vblank(struct drm_crtc *crtc); void tidss_irq_disable_vblank(struct drm_crtc *crtc); -void tidss_irq_preinstall(struct drm_device *ddev); -int tidss_irq_postinstall(struct drm_device *ddev); +int tidss_irq_install(struct drm_device *ddev, unsigned int irq); void tidss_irq_uninstall(struct drm_device *ddev); -irqreturn_t tidss_irq_handler(int irq, void *arg); void tidss_irq_resume(struct tidss_device *tidss); diff --git a/drivers/gpu/drm/tidss/tidss_kms.c b/drivers/gpu/drm/tidss/tidss_kms.c index 95f8e0f78e32..666e527a0acf 100644 --- a/drivers/gpu/drm/tidss/tidss_kms.c +++ b/drivers/gpu/drm/tidss/tidss_kms.c @@ -227,10 +227,8 @@ static int tidss_dispc_modeset_init(struct tidss_device *tidss) } ret = drm_bridge_attach(enc, pipes[i].bridge, NULL, 0); - if (ret) { - dev_err(tidss->dev, "bridge attach failed: %d\n", ret); + if (ret) return ret; - } } /* create overlay planes of the leftover planes */ diff --git a/drivers/gpu/drm/tidss/tidss_plane.c b/drivers/gpu/drm/tidss/tidss_plane.c index 1acd15aa4193..217415ec8eea 100644 --- a/drivers/gpu/drm/tidss/tidss_plane.c +++ b/drivers/gpu/drm/tidss/tidss_plane.c @@ -158,7 +158,6 @@ static void drm_plane_destroy(struct drm_plane *plane) } static const struct drm_plane_helper_funcs tidss_plane_helper_funcs = { - .prepare_fb = drm_gem_plane_helper_prepare_fb, .atomic_check = tidss_plane_atomic_check, .atomic_update = tidss_plane_atomic_update, .atomic_disable = tidss_plane_atomic_disable, diff --git a/drivers/gpu/drm/tilcdc/tilcdc_drv.c b/drivers/gpu/drm/tilcdc/tilcdc_drv.c index f1d3a9f919fd..6b03f89a98d4 100644 --- a/drivers/gpu/drm/tilcdc/tilcdc_drv.c +++ b/drivers/gpu/drm/tilcdc/tilcdc_drv.c @@ -20,7 +20,6 @@ #include <drm/drm_fourcc.h> #include <drm/drm_gem_cma_helper.h> #include <drm/drm_gem_framebuffer_helper.h> -#include <drm/drm_irq.h> #include <drm/drm_mm.h> #include <drm/drm_probe_helper.h> #include <drm/drm_vblank.h> @@ -124,6 +123,39 @@ static int cpufreq_transition(struct notifier_block *nb, } #endif +static irqreturn_t tilcdc_irq(int irq, void *arg) +{ + struct drm_device *dev = arg; + struct tilcdc_drm_private *priv = dev->dev_private; + + return tilcdc_crtc_irq(priv->crtc); +} + +static int tilcdc_irq_install(struct drm_device *dev, unsigned int irq) +{ + struct tilcdc_drm_private *priv = dev->dev_private; + int ret; + + ret = request_irq(irq, tilcdc_irq, 0, dev->driver->name, dev); + if (ret) + return ret; + + priv->irq_enabled = false; + + return 0; +} + +static void tilcdc_irq_uninstall(struct drm_device *dev) +{ + struct tilcdc_drm_private *priv = dev->dev_private; + + if (!priv->irq_enabled) + return; + + free_irq(priv->irq, dev); + priv->irq_enabled = false; +} + /* * DRM operations: */ @@ -145,7 +177,7 @@ static void tilcdc_fini(struct drm_device *dev) drm_dev_unregister(dev); drm_kms_helper_poll_fini(dev); - drm_irq_uninstall(dev); + tilcdc_irq_uninstall(dev); drm_mode_config_cleanup(dev); if (priv->clk) @@ -336,7 +368,12 @@ static int tilcdc_init(const struct drm_driver *ddrv, struct device *dev) goto init_failed; } - ret = drm_irq_install(ddev, platform_get_irq(pdev, 0)); + ret = platform_get_irq(pdev, 0); + if (ret < 0) + goto init_failed; + priv->irq = ret; + + ret = tilcdc_irq_install(ddev, priv->irq); if (ret < 0) { dev_err(dev, "failed to install IRQ handler\n"); goto init_failed; @@ -360,13 +397,6 @@ init_failed: return ret; } -static irqreturn_t tilcdc_irq(int irq, void *arg) -{ - struct drm_device *dev = arg; - struct tilcdc_drm_private *priv = dev->dev_private; - return tilcdc_crtc_irq(priv->crtc); -} - #if defined(CONFIG_DEBUG_FS) static const struct { const char *name; @@ -454,7 +484,6 @@ DEFINE_DRM_GEM_CMA_FOPS(fops); static const struct drm_driver tilcdc_driver = { .driver_features = DRIVER_GEM | DRIVER_MODESET | DRIVER_ATOMIC, - .irq_handler = tilcdc_irq, DRM_GEM_CMA_DRIVER_OPS, #ifdef CONFIG_DEBUG_FS .debugfs_init = tilcdc_debugfs_init, diff --git a/drivers/gpu/drm/tilcdc/tilcdc_drv.h b/drivers/gpu/drm/tilcdc/tilcdc_drv.h index d29806ca8817..b818448c83f6 100644 --- a/drivers/gpu/drm/tilcdc/tilcdc_drv.h +++ b/drivers/gpu/drm/tilcdc/tilcdc_drv.h @@ -46,6 +46,8 @@ struct tilcdc_drm_private { struct clk *clk; /* functional clock */ int rev; /* IP revision */ + unsigned int irq; + /* don't attempt resolutions w/ higher W * H * Hz: */ uint32_t max_bandwidth; /* @@ -82,6 +84,7 @@ struct tilcdc_drm_private { bool is_registered; bool is_componentized; + bool irq_enabled; }; /* Sub-module for display. Since we don't know at compile time what panels diff --git a/drivers/gpu/drm/tilcdc/tilcdc_external.c b/drivers/gpu/drm/tilcdc/tilcdc_external.c index b177525588c1..7594cf6e186e 100644 --- a/drivers/gpu/drm/tilcdc/tilcdc_external.c +++ b/drivers/gpu/drm/tilcdc/tilcdc_external.c @@ -93,10 +93,8 @@ int tilcdc_attach_bridge(struct drm_device *ddev, struct drm_bridge *bridge) priv->external_encoder->possible_crtcs = BIT(0); ret = drm_bridge_attach(priv->external_encoder, bridge, NULL, 0); - if (ret) { - dev_err(ddev->dev, "drm_bridge_attach() failed %d\n", ret); + if (ret) return ret; - } tilcdc_crtc_set_panel_info(priv->crtc, &panel_info_default); diff --git a/drivers/gpu/drm/tiny/Kconfig b/drivers/gpu/drm/tiny/Kconfig index d46f95d9196d..d31be274a2bd 100644 --- a/drivers/gpu/drm/tiny/Kconfig +++ b/drivers/gpu/drm/tiny/Kconfig @@ -10,6 +10,19 @@ config DRM_ARCPGU If M is selected the module will be called arcpgu. +config DRM_BOCHS + tristate "DRM Support for bochs dispi vga interface (qemu stdvga)" + depends on DRM && PCI && MMU + select DRM_KMS_HELPER + select DRM_VRAM_HELPER + select DRM_TTM + select DRM_TTM_HELPER + help + This is a KMS driver for qemu's stdvga output. Choose this option + for qemu. + + If M is selected the module will be called bochs. + config DRM_CIRRUS_QEMU tristate "Cirrus driver for QEMU emulated device" depends on DRM && PCI && MMU @@ -51,8 +64,8 @@ config DRM_SIMPLEDRM buffer, size, and display format must be provided via device tree, UEFI, VESA, etc. - On x86 and compatible, you should also select CONFIG_X86_SYSFB to - use UEFI and VESA framebuffers. + On x86 BIOS or UEFI systems, you should also select SYSFB_SIMPLEFB + to use UEFI and VESA framebuffers. config TINYDRM_HX8357D tristate "DRM support for HX8357D display panels" diff --git a/drivers/gpu/drm/tiny/Makefile b/drivers/gpu/drm/tiny/Makefile index 9cc847e756da..e09942895c77 100644 --- a/drivers/gpu/drm/tiny/Makefile +++ b/drivers/gpu/drm/tiny/Makefile @@ -1,6 +1,7 @@ # SPDX-License-Identifier: GPL-2.0-only obj-$(CONFIG_DRM_ARCPGU) += arcpgu.o +obj-$(CONFIG_DRM_BOCHS) += bochs.o obj-$(CONFIG_DRM_CIRRUS_QEMU) += cirrus.o obj-$(CONFIG_DRM_GM12U320) += gm12u320.o obj-$(CONFIG_DRM_SIMPLEDRM) += simpledrm.o diff --git a/drivers/gpu/drm/tiny/bochs.c b/drivers/gpu/drm/tiny/bochs.c new file mode 100644 index 000000000000..73415fa9ae0f --- /dev/null +++ b/drivers/gpu/drm/tiny/bochs.c @@ -0,0 +1,733 @@ +// SPDX-License-Identifier: GPL-2.0-or-later + +#include <linux/console.h> +#include <linux/pci.h> + +#include <drm/drm_aperture.h> +#include <drm/drm_atomic_helper.h> +#include <drm/drm_drv.h> +#include <drm/drm_fb_helper.h> +#include <drm/drm_fourcc.h> +#include <drm/drm_gem_framebuffer_helper.h> +#include <drm/drm_gem_vram_helper.h> +#include <drm/drm_managed.h> +#include <drm/drm_probe_helper.h> +#include <drm/drm_simple_kms_helper.h> + +#include <video/vga.h> + +/* ---------------------------------------------------------------------- */ + +#define VBE_DISPI_IOPORT_INDEX 0x01CE +#define VBE_DISPI_IOPORT_DATA 0x01CF + +#define VBE_DISPI_INDEX_ID 0x0 +#define VBE_DISPI_INDEX_XRES 0x1 +#define VBE_DISPI_INDEX_YRES 0x2 +#define VBE_DISPI_INDEX_BPP 0x3 +#define VBE_DISPI_INDEX_ENABLE 0x4 +#define VBE_DISPI_INDEX_BANK 0x5 +#define VBE_DISPI_INDEX_VIRT_WIDTH 0x6 +#define VBE_DISPI_INDEX_VIRT_HEIGHT 0x7 +#define VBE_DISPI_INDEX_X_OFFSET 0x8 +#define VBE_DISPI_INDEX_Y_OFFSET 0x9 +#define VBE_DISPI_INDEX_VIDEO_MEMORY_64K 0xa + +#define VBE_DISPI_ID0 0xB0C0 +#define VBE_DISPI_ID1 0xB0C1 +#define VBE_DISPI_ID2 0xB0C2 +#define VBE_DISPI_ID3 0xB0C3 +#define VBE_DISPI_ID4 0xB0C4 +#define VBE_DISPI_ID5 0xB0C5 + +#define VBE_DISPI_DISABLED 0x00 +#define VBE_DISPI_ENABLED 0x01 +#define VBE_DISPI_GETCAPS 0x02 +#define VBE_DISPI_8BIT_DAC 0x20 +#define VBE_DISPI_LFB_ENABLED 0x40 +#define VBE_DISPI_NOCLEARMEM 0x80 + +static int bochs_modeset = -1; +static int defx = 1024; +static int defy = 768; + +module_param_named(modeset, bochs_modeset, int, 0444); +MODULE_PARM_DESC(modeset, "enable/disable kernel modesetting"); + +module_param(defx, int, 0444); +module_param(defy, int, 0444); +MODULE_PARM_DESC(defx, "default x resolution"); +MODULE_PARM_DESC(defy, "default y resolution"); + +/* ---------------------------------------------------------------------- */ + +enum bochs_types { + BOCHS_QEMU_STDVGA, + BOCHS_UNKNOWN, +}; + +struct bochs_device { + /* hw */ + void __iomem *mmio; + int ioports; + void __iomem *fb_map; + unsigned long fb_base; + unsigned long fb_size; + unsigned long qext_size; + + /* mode */ + u16 xres; + u16 yres; + u16 yres_virtual; + u32 stride; + u32 bpp; + struct edid *edid; + + /* drm */ + struct drm_device *dev; + struct drm_simple_display_pipe pipe; + struct drm_connector connector; +}; + +/* ---------------------------------------------------------------------- */ + +static void bochs_vga_writeb(struct bochs_device *bochs, u16 ioport, u8 val) +{ + if (WARN_ON(ioport < 0x3c0 || ioport > 0x3df)) + return; + + if (bochs->mmio) { + int offset = ioport - 0x3c0 + 0x400; + + writeb(val, bochs->mmio + offset); + } else { + outb(val, ioport); + } +} + +static u8 bochs_vga_readb(struct bochs_device *bochs, u16 ioport) +{ + if (WARN_ON(ioport < 0x3c0 || ioport > 0x3df)) + return 0xff; + + if (bochs->mmio) { + int offset = ioport - 0x3c0 + 0x400; + + return readb(bochs->mmio + offset); + } else { + return inb(ioport); + } +} + +static u16 bochs_dispi_read(struct bochs_device *bochs, u16 reg) +{ + u16 ret = 0; + + if (bochs->mmio) { + int offset = 0x500 + (reg << 1); + + ret = readw(bochs->mmio + offset); + } else { + outw(reg, VBE_DISPI_IOPORT_INDEX); + ret = inw(VBE_DISPI_IOPORT_DATA); + } + return ret; +} + +static void bochs_dispi_write(struct bochs_device *bochs, u16 reg, u16 val) +{ + if (bochs->mmio) { + int offset = 0x500 + (reg << 1); + + writew(val, bochs->mmio + offset); + } else { + outw(reg, VBE_DISPI_IOPORT_INDEX); + outw(val, VBE_DISPI_IOPORT_DATA); + } +} + +static void bochs_hw_set_big_endian(struct bochs_device *bochs) +{ + if (bochs->qext_size < 8) + return; + + writel(0xbebebebe, bochs->mmio + 0x604); +} + +static void bochs_hw_set_little_endian(struct bochs_device *bochs) +{ + if (bochs->qext_size < 8) + return; + + writel(0x1e1e1e1e, bochs->mmio + 0x604); +} + +#ifdef __BIG_ENDIAN +#define bochs_hw_set_native_endian(_b) bochs_hw_set_big_endian(_b) +#else +#define bochs_hw_set_native_endian(_b) bochs_hw_set_little_endian(_b) +#endif + +static int bochs_get_edid_block(void *data, u8 *buf, + unsigned int block, size_t len) +{ + struct bochs_device *bochs = data; + size_t i, start = block * EDID_LENGTH; + + if (start + len > 0x400 /* vga register offset */) + return -1; + + for (i = 0; i < len; i++) + buf[i] = readb(bochs->mmio + start + i); + + return 0; +} + +static int bochs_hw_load_edid(struct bochs_device *bochs) +{ + u8 header[8]; + + if (!bochs->mmio) + return -1; + + /* check header to detect whenever edid support is enabled in qemu */ + bochs_get_edid_block(bochs, header, 0, ARRAY_SIZE(header)); + if (drm_edid_header_is_valid(header) != 8) + return -1; + + kfree(bochs->edid); + bochs->edid = drm_do_get_edid(&bochs->connector, + bochs_get_edid_block, bochs); + if (bochs->edid == NULL) + return -1; + + return 0; +} + +static int bochs_hw_init(struct drm_device *dev) +{ + struct bochs_device *bochs = dev->dev_private; + struct pci_dev *pdev = to_pci_dev(dev->dev); + unsigned long addr, size, mem, ioaddr, iosize; + u16 id; + + if (pdev->resource[2].flags & IORESOURCE_MEM) { + /* mmio bar with vga and bochs registers present */ + if (pci_request_region(pdev, 2, "bochs-drm") != 0) { + DRM_ERROR("Cannot request mmio region\n"); + return -EBUSY; + } + ioaddr = pci_resource_start(pdev, 2); + iosize = pci_resource_len(pdev, 2); + bochs->mmio = ioremap(ioaddr, iosize); + if (bochs->mmio == NULL) { + DRM_ERROR("Cannot map mmio region\n"); + return -ENOMEM; + } + } else { + ioaddr = VBE_DISPI_IOPORT_INDEX; + iosize = 2; + if (!request_region(ioaddr, iosize, "bochs-drm")) { + DRM_ERROR("Cannot request ioports\n"); + return -EBUSY; + } + bochs->ioports = 1; + } + + id = bochs_dispi_read(bochs, VBE_DISPI_INDEX_ID); + mem = bochs_dispi_read(bochs, VBE_DISPI_INDEX_VIDEO_MEMORY_64K) + * 64 * 1024; + if ((id & 0xfff0) != VBE_DISPI_ID0) { + DRM_ERROR("ID mismatch\n"); + return -ENODEV; + } + + if ((pdev->resource[0].flags & IORESOURCE_MEM) == 0) + return -ENODEV; + addr = pci_resource_start(pdev, 0); + size = pci_resource_len(pdev, 0); + if (addr == 0) + return -ENODEV; + if (size != mem) { + DRM_ERROR("Size mismatch: pci=%ld, bochs=%ld\n", + size, mem); + size = min(size, mem); + } + + if (pci_request_region(pdev, 0, "bochs-drm") != 0) + DRM_WARN("Cannot request framebuffer, boot fb still active?\n"); + + bochs->fb_map = ioremap(addr, size); + if (bochs->fb_map == NULL) { + DRM_ERROR("Cannot map framebuffer\n"); + return -ENOMEM; + } + bochs->fb_base = addr; + bochs->fb_size = size; + + DRM_INFO("Found bochs VGA, ID 0x%x.\n", id); + DRM_INFO("Framebuffer size %ld kB @ 0x%lx, %s @ 0x%lx.\n", + size / 1024, addr, + bochs->ioports ? "ioports" : "mmio", + ioaddr); + + if (bochs->mmio && pdev->revision >= 2) { + bochs->qext_size = readl(bochs->mmio + 0x600); + if (bochs->qext_size < 4 || bochs->qext_size > iosize) { + bochs->qext_size = 0; + goto noext; + } + DRM_DEBUG("Found qemu ext regs, size %ld\n", + bochs->qext_size); + bochs_hw_set_native_endian(bochs); + } + +noext: + return 0; +} + +static void bochs_hw_fini(struct drm_device *dev) +{ + struct bochs_device *bochs = dev->dev_private; + + /* TODO: shot down existing vram mappings */ + + if (bochs->mmio) + iounmap(bochs->mmio); + if (bochs->ioports) + release_region(VBE_DISPI_IOPORT_INDEX, 2); + if (bochs->fb_map) + iounmap(bochs->fb_map); + pci_release_regions(to_pci_dev(dev->dev)); + kfree(bochs->edid); +} + +static void bochs_hw_blank(struct bochs_device *bochs, bool blank) +{ + DRM_DEBUG_DRIVER("hw_blank %d\n", blank); + /* discard ar_flip_flop */ + (void)bochs_vga_readb(bochs, VGA_IS1_RC); + /* blank or unblank; we need only update index and set 0x20 */ + bochs_vga_writeb(bochs, VGA_ATT_W, blank ? 0 : 0x20); +} + +static void bochs_hw_setmode(struct bochs_device *bochs, struct drm_display_mode *mode) +{ + int idx; + + if (!drm_dev_enter(bochs->dev, &idx)) + return; + + bochs->xres = mode->hdisplay; + bochs->yres = mode->vdisplay; + bochs->bpp = 32; + bochs->stride = mode->hdisplay * (bochs->bpp / 8); + bochs->yres_virtual = bochs->fb_size / bochs->stride; + + DRM_DEBUG_DRIVER("%dx%d @ %d bpp, vy %d\n", + bochs->xres, bochs->yres, bochs->bpp, + bochs->yres_virtual); + + bochs_hw_blank(bochs, false); + + bochs_dispi_write(bochs, VBE_DISPI_INDEX_ENABLE, 0); + bochs_dispi_write(bochs, VBE_DISPI_INDEX_BPP, bochs->bpp); + bochs_dispi_write(bochs, VBE_DISPI_INDEX_XRES, bochs->xres); + bochs_dispi_write(bochs, VBE_DISPI_INDEX_YRES, bochs->yres); + bochs_dispi_write(bochs, VBE_DISPI_INDEX_BANK, 0); + bochs_dispi_write(bochs, VBE_DISPI_INDEX_VIRT_WIDTH, bochs->xres); + bochs_dispi_write(bochs, VBE_DISPI_INDEX_VIRT_HEIGHT, + bochs->yres_virtual); + bochs_dispi_write(bochs, VBE_DISPI_INDEX_X_OFFSET, 0); + bochs_dispi_write(bochs, VBE_DISPI_INDEX_Y_OFFSET, 0); + + bochs_dispi_write(bochs, VBE_DISPI_INDEX_ENABLE, + VBE_DISPI_ENABLED | VBE_DISPI_LFB_ENABLED); + + drm_dev_exit(idx); +} + +static void bochs_hw_setformat(struct bochs_device *bochs, const struct drm_format_info *format) +{ + int idx; + + if (!drm_dev_enter(bochs->dev, &idx)) + return; + + DRM_DEBUG_DRIVER("format %c%c%c%c\n", + (format->format >> 0) & 0xff, + (format->format >> 8) & 0xff, + (format->format >> 16) & 0xff, + (format->format >> 24) & 0xff); + + switch (format->format) { + case DRM_FORMAT_XRGB8888: + bochs_hw_set_little_endian(bochs); + break; + case DRM_FORMAT_BGRX8888: + bochs_hw_set_big_endian(bochs); + break; + default: + /* should not happen */ + DRM_ERROR("%s: Huh? Got framebuffer format 0x%x", + __func__, format->format); + break; + } + + drm_dev_exit(idx); +} + +static void bochs_hw_setbase(struct bochs_device *bochs, int x, int y, int stride, u64 addr) +{ + unsigned long offset; + unsigned int vx, vy, vwidth, idx; + + if (!drm_dev_enter(bochs->dev, &idx)) + return; + + bochs->stride = stride; + offset = (unsigned long)addr + + y * bochs->stride + + x * (bochs->bpp / 8); + vy = offset / bochs->stride; + vx = (offset % bochs->stride) * 8 / bochs->bpp; + vwidth = stride * 8 / bochs->bpp; + + DRM_DEBUG_DRIVER("x %d, y %d, addr %llx -> offset %lx, vx %d, vy %d\n", + x, y, addr, offset, vx, vy); + bochs_dispi_write(bochs, VBE_DISPI_INDEX_VIRT_WIDTH, vwidth); + bochs_dispi_write(bochs, VBE_DISPI_INDEX_X_OFFSET, vx); + bochs_dispi_write(bochs, VBE_DISPI_INDEX_Y_OFFSET, vy); + + drm_dev_exit(idx); +} + +/* ---------------------------------------------------------------------- */ + +static const uint32_t bochs_formats[] = { + DRM_FORMAT_XRGB8888, + DRM_FORMAT_BGRX8888, +}; + +static void bochs_plane_update(struct bochs_device *bochs, struct drm_plane_state *state) +{ + struct drm_gem_vram_object *gbo; + s64 gpu_addr; + + if (!state->fb || !bochs->stride) + return; + + gbo = drm_gem_vram_of_gem(state->fb->obj[0]); + gpu_addr = drm_gem_vram_offset(gbo); + if (WARN_ON_ONCE(gpu_addr < 0)) + return; /* Bug: we didn't pin the BO to VRAM in prepare_fb. */ + + bochs_hw_setbase(bochs, + state->crtc_x, + state->crtc_y, + state->fb->pitches[0], + state->fb->offsets[0] + gpu_addr); + bochs_hw_setformat(bochs, state->fb->format); +} + +static void bochs_pipe_enable(struct drm_simple_display_pipe *pipe, + struct drm_crtc_state *crtc_state, + struct drm_plane_state *plane_state) +{ + struct bochs_device *bochs = pipe->crtc.dev->dev_private; + + bochs_hw_setmode(bochs, &crtc_state->mode); + bochs_plane_update(bochs, plane_state); +} + +static void bochs_pipe_disable(struct drm_simple_display_pipe *pipe) +{ + struct bochs_device *bochs = pipe->crtc.dev->dev_private; + + bochs_hw_blank(bochs, true); +} + +static void bochs_pipe_update(struct drm_simple_display_pipe *pipe, + struct drm_plane_state *old_state) +{ + struct bochs_device *bochs = pipe->crtc.dev->dev_private; + + bochs_plane_update(bochs, pipe->plane.state); +} + +static const struct drm_simple_display_pipe_funcs bochs_pipe_funcs = { + .enable = bochs_pipe_enable, + .disable = bochs_pipe_disable, + .update = bochs_pipe_update, + .prepare_fb = drm_gem_vram_simple_display_pipe_prepare_fb, + .cleanup_fb = drm_gem_vram_simple_display_pipe_cleanup_fb, +}; + +static int bochs_connector_get_modes(struct drm_connector *connector) +{ + struct bochs_device *bochs = + container_of(connector, struct bochs_device, connector); + int count = 0; + + if (bochs->edid) + count = drm_add_edid_modes(connector, bochs->edid); + + if (!count) { + count = drm_add_modes_noedid(connector, 8192, 8192); + drm_set_preferred_mode(connector, defx, defy); + } + return count; +} + +static const struct drm_connector_helper_funcs bochs_connector_connector_helper_funcs = { + .get_modes = bochs_connector_get_modes, +}; + +static const struct drm_connector_funcs bochs_connector_connector_funcs = { + .fill_modes = drm_helper_probe_single_connector_modes, + .destroy = drm_connector_cleanup, + .reset = drm_atomic_helper_connector_reset, + .atomic_duplicate_state = drm_atomic_helper_connector_duplicate_state, + .atomic_destroy_state = drm_atomic_helper_connector_destroy_state, +}; + +static void bochs_connector_init(struct drm_device *dev) +{ + struct bochs_device *bochs = dev->dev_private; + struct drm_connector *connector = &bochs->connector; + + drm_connector_init(dev, connector, &bochs_connector_connector_funcs, + DRM_MODE_CONNECTOR_VIRTUAL); + drm_connector_helper_add(connector, &bochs_connector_connector_helper_funcs); + + bochs_hw_load_edid(bochs); + if (bochs->edid) { + DRM_INFO("Found EDID data blob.\n"); + drm_connector_attach_edid_property(connector); + drm_connector_update_edid_property(connector, bochs->edid); + } +} + +static struct drm_framebuffer * +bochs_gem_fb_create(struct drm_device *dev, struct drm_file *file, + const struct drm_mode_fb_cmd2 *mode_cmd) +{ + if (mode_cmd->pixel_format != DRM_FORMAT_XRGB8888 && + mode_cmd->pixel_format != DRM_FORMAT_BGRX8888) + return ERR_PTR(-EINVAL); + + return drm_gem_fb_create(dev, file, mode_cmd); +} + +static const struct drm_mode_config_funcs bochs_mode_funcs = { + .fb_create = bochs_gem_fb_create, + .mode_valid = drm_vram_helper_mode_valid, + .atomic_check = drm_atomic_helper_check, + .atomic_commit = drm_atomic_helper_commit, +}; + +static int bochs_kms_init(struct bochs_device *bochs) +{ + int ret; + + ret = drmm_mode_config_init(bochs->dev); + if (ret) + return ret; + + bochs->dev->mode_config.max_width = 8192; + bochs->dev->mode_config.max_height = 8192; + + bochs->dev->mode_config.fb_base = bochs->fb_base; + bochs->dev->mode_config.preferred_depth = 24; + bochs->dev->mode_config.prefer_shadow = 0; + bochs->dev->mode_config.prefer_shadow_fbdev = 1; + bochs->dev->mode_config.quirk_addfb_prefer_host_byte_order = true; + + bochs->dev->mode_config.funcs = &bochs_mode_funcs; + + bochs_connector_init(bochs->dev); + drm_simple_display_pipe_init(bochs->dev, + &bochs->pipe, + &bochs_pipe_funcs, + bochs_formats, + ARRAY_SIZE(bochs_formats), + NULL, + &bochs->connector); + + drm_mode_config_reset(bochs->dev); + + return 0; +} + +/* ---------------------------------------------------------------------- */ +/* drm interface */ + +static int bochs_load(struct drm_device *dev) +{ + struct bochs_device *bochs; + int ret; + + bochs = drmm_kzalloc(dev, sizeof(*bochs), GFP_KERNEL); + if (bochs == NULL) + return -ENOMEM; + dev->dev_private = bochs; + bochs->dev = dev; + + ret = bochs_hw_init(dev); + if (ret) + return ret; + + ret = drmm_vram_helper_init(dev, bochs->fb_base, bochs->fb_size); + if (ret) + return ret; + + ret = bochs_kms_init(bochs); + if (ret) + return ret; + + return 0; +} + +DEFINE_DRM_GEM_FOPS(bochs_fops); + +static const struct drm_driver bochs_driver = { + .driver_features = DRIVER_GEM | DRIVER_MODESET | DRIVER_ATOMIC, + .fops = &bochs_fops, + .name = "bochs-drm", + .desc = "bochs dispi vga interface (qemu stdvga)", + .date = "20130925", + .major = 1, + .minor = 0, + DRM_GEM_VRAM_DRIVER, +}; + +/* ---------------------------------------------------------------------- */ +/* pm interface */ + +#ifdef CONFIG_PM_SLEEP +static int bochs_pm_suspend(struct device *dev) +{ + struct drm_device *drm_dev = dev_get_drvdata(dev); + + return drm_mode_config_helper_suspend(drm_dev); +} + +static int bochs_pm_resume(struct device *dev) +{ + struct drm_device *drm_dev = dev_get_drvdata(dev); + + return drm_mode_config_helper_resume(drm_dev); +} +#endif + +static const struct dev_pm_ops bochs_pm_ops = { + SET_SYSTEM_SLEEP_PM_OPS(bochs_pm_suspend, + bochs_pm_resume) +}; + +/* ---------------------------------------------------------------------- */ +/* pci interface */ + +static int bochs_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) +{ + struct drm_device *dev; + unsigned long fbsize; + int ret; + + fbsize = pci_resource_len(pdev, 0); + if (fbsize < 4 * 1024 * 1024) { + DRM_ERROR("less than 4 MB video memory, ignoring device\n"); + return -ENOMEM; + } + + ret = drm_aperture_remove_conflicting_pci_framebuffers(pdev, &bochs_driver); + if (ret) + return ret; + + dev = drm_dev_alloc(&bochs_driver, &pdev->dev); + if (IS_ERR(dev)) + return PTR_ERR(dev); + + ret = pcim_enable_device(pdev); + if (ret) + goto err_free_dev; + + pci_set_drvdata(pdev, dev); + + ret = bochs_load(dev); + if (ret) + goto err_free_dev; + + ret = drm_dev_register(dev, 0); + if (ret) + goto err_free_dev; + + drm_fbdev_generic_setup(dev, 32); + return ret; + +err_free_dev: + drm_dev_put(dev); + return ret; +} + +static void bochs_pci_remove(struct pci_dev *pdev) +{ + struct drm_device *dev = pci_get_drvdata(pdev); + + drm_dev_unplug(dev); + drm_atomic_helper_shutdown(dev); + bochs_hw_fini(dev); + drm_dev_put(dev); +} + +static const struct pci_device_id bochs_pci_tbl[] = { + { + .vendor = 0x1234, + .device = 0x1111, + .subvendor = PCI_SUBVENDOR_ID_REDHAT_QUMRANET, + .subdevice = PCI_SUBDEVICE_ID_QEMU, + .driver_data = BOCHS_QEMU_STDVGA, + }, + { + .vendor = 0x1234, + .device = 0x1111, + .subvendor = PCI_ANY_ID, + .subdevice = PCI_ANY_ID, + .driver_data = BOCHS_UNKNOWN, + }, + { /* end of list */ } +}; + +static struct pci_driver bochs_pci_driver = { + .name = "bochs-drm", + .id_table = bochs_pci_tbl, + .probe = bochs_pci_probe, + .remove = bochs_pci_remove, + .driver.pm = &bochs_pm_ops, +}; + +/* ---------------------------------------------------------------------- */ +/* module init/exit */ + +static int __init bochs_init(void) +{ + if (vgacon_text_force() && bochs_modeset == -1) + return -EINVAL; + + if (bochs_modeset == 0) + return -EINVAL; + + return pci_register_driver(&bochs_pci_driver); +} + +static void __exit bochs_exit(void) +{ + pci_unregister_driver(&bochs_pci_driver); +} + +module_init(bochs_init); +module_exit(bochs_exit); + +MODULE_DEVICE_TABLE(pci, bochs_pci_tbl); +MODULE_AUTHOR("Gerd Hoffmann <kraxel@redhat.com>"); +MODULE_LICENSE("GPL"); diff --git a/drivers/gpu/drm/tiny/cirrus.c b/drivers/gpu/drm/tiny/cirrus.c index 42611dacde88..4611ec408506 100644 --- a/drivers/gpu/drm/tiny/cirrus.c +++ b/drivers/gpu/drm/tiny/cirrus.c @@ -435,7 +435,7 @@ static void cirrus_pipe_enable(struct drm_simple_display_pipe *pipe, struct drm_shadow_plane_state *shadow_plane_state = to_drm_shadow_plane_state(plane_state); cirrus_mode_set(cirrus, &crtc_state->mode, plane_state->fb); - cirrus_fb_blit_fullscreen(plane_state->fb, &shadow_plane_state->map[0]); + cirrus_fb_blit_fullscreen(plane_state->fb, &shadow_plane_state->data[0]); } static void cirrus_pipe_update(struct drm_simple_display_pipe *pipe, @@ -451,7 +451,7 @@ static void cirrus_pipe_update(struct drm_simple_display_pipe *pipe, cirrus_mode_set(cirrus, &crtc->mode, state->fb); if (drm_atomic_helper_damage_merged(old_state, state, &rect)) - cirrus_fb_blit_rect(state->fb, &shadow_plane_state->map[0], &rect); + cirrus_fb_blit_rect(state->fb, &shadow_plane_state->data[0], &rect); } static const struct drm_simple_display_pipe_funcs cirrus_pipe_funcs = { @@ -550,7 +550,7 @@ static int cirrus_pci_probe(struct pci_dev *pdev, struct cirrus_device *cirrus; int ret; - ret = drm_aperture_remove_conflicting_pci_framebuffers(pdev, "cirrusdrmfb"); + ret = drm_aperture_remove_conflicting_pci_framebuffers(pdev, &cirrus_driver); if (ret) return ret; diff --git a/drivers/gpu/drm/tiny/gm12u320.c b/drivers/gpu/drm/tiny/gm12u320.c index a233c86d428b..6bc0c298739c 100644 --- a/drivers/gpu/drm/tiny/gm12u320.c +++ b/drivers/gpu/drm/tiny/gm12u320.c @@ -3,7 +3,6 @@ * Copyright 2019 Hans de Goede <hdegoede@redhat.com> */ -#include <linux/dma-buf.h> #include <linux/module.h> #include <linux/usb.h> @@ -268,13 +267,10 @@ static void gm12u320_copy_fb_to_blocks(struct gm12u320_device *gm12u320) y2 = gm12u320->fb_update.rect.y2; vaddr = gm12u320->fb_update.src_map.vaddr; /* TODO: Use mapping abstraction properly */ - if (fb->obj[0]->import_attach) { - ret = dma_buf_begin_cpu_access( - fb->obj[0]->import_attach->dmabuf, DMA_FROM_DEVICE); - if (ret) { - GM12U320_ERR("dma_buf_begin_cpu_access err: %d\n", ret); - goto put_fb; - } + ret = drm_gem_fb_begin_cpu_access(fb, DMA_FROM_DEVICE); + if (ret) { + GM12U320_ERR("drm_gem_fb_begin_cpu_access err: %d\n", ret); + goto put_fb; } src = vaddr + y1 * fb->pitches[0] + x1 * 4; @@ -311,12 +307,7 @@ static void gm12u320_copy_fb_to_blocks(struct gm12u320_device *gm12u320) src += fb->pitches[0]; } - if (fb->obj[0]->import_attach) { - ret = dma_buf_end_cpu_access(fb->obj[0]->import_attach->dmabuf, - DMA_FROM_DEVICE); - if (ret) - GM12U320_ERR("dma_buf_end_cpu_access err: %d\n", ret); - } + drm_gem_fb_end_cpu_access(fb, DMA_FROM_DEVICE); put_fb: drm_framebuffer_put(fb); gm12u320->fb_update.fb = NULL; @@ -563,7 +554,7 @@ static void gm12u320_pipe_enable(struct drm_simple_display_pipe *pipe, struct drm_shadow_plane_state *shadow_plane_state = to_drm_shadow_plane_state(plane_state); gm12u320->fb_update.draw_status_timeout = FIRST_FRAME_TIMEOUT; - gm12u320_fb_mark_dirty(plane_state->fb, &shadow_plane_state->map[0], &rect); + gm12u320_fb_mark_dirty(plane_state->fb, &shadow_plane_state->data[0], &rect); } static void gm12u320_pipe_disable(struct drm_simple_display_pipe *pipe) @@ -581,7 +572,7 @@ static void gm12u320_pipe_update(struct drm_simple_display_pipe *pipe, struct drm_rect rect; if (drm_atomic_helper_damage_merged(old_state, state, &rect)) - gm12u320_fb_mark_dirty(state->fb, &shadow_plane_state->map[0], &rect); + gm12u320_fb_mark_dirty(state->fb, &shadow_plane_state->data[0], &rect); } static const struct drm_simple_display_pipe_funcs gm12u320_pipe_funcs = { diff --git a/drivers/gpu/drm/tiny/hx8357d.c b/drivers/gpu/drm/tiny/hx8357d.c index da5df93450de..9b33c05732aa 100644 --- a/drivers/gpu/drm/tiny/hx8357d.c +++ b/drivers/gpu/drm/tiny/hx8357d.c @@ -184,7 +184,6 @@ static const struct drm_simple_display_pipe_funcs hx8357d_pipe_funcs = { .enable = yx240qv29_enable, .disable = mipi_dbi_pipe_disable, .update = mipi_dbi_pipe_update, - .prepare_fb = drm_gem_simple_display_pipe_prepare_fb, }; static const struct drm_display_mode yx350hv15_mode = { diff --git a/drivers/gpu/drm/tiny/ili9225.c b/drivers/gpu/drm/tiny/ili9225.c index 69265d8a3beb..976d3209f164 100644 --- a/drivers/gpu/drm/tiny/ili9225.c +++ b/drivers/gpu/drm/tiny/ili9225.c @@ -328,7 +328,6 @@ static const struct drm_simple_display_pipe_funcs ili9225_pipe_funcs = { .enable = ili9225_pipe_enable, .disable = ili9225_pipe_disable, .update = ili9225_pipe_update, - .prepare_fb = drm_gem_simple_display_pipe_prepare_fb, }; static const struct drm_display_mode ili9225_mode = { diff --git a/drivers/gpu/drm/tiny/ili9341.c b/drivers/gpu/drm/tiny/ili9341.c index ad9ce7b4f76f..37e0c33399c8 100644 --- a/drivers/gpu/drm/tiny/ili9341.c +++ b/drivers/gpu/drm/tiny/ili9341.c @@ -140,7 +140,6 @@ static const struct drm_simple_display_pipe_funcs ili9341_pipe_funcs = { .enable = yx240qv29_enable, .disable = mipi_dbi_pipe_disable, .update = mipi_dbi_pipe_update, - .prepare_fb = drm_gem_simple_display_pipe_prepare_fb, }; static const struct drm_display_mode yx240qv29_mode = { diff --git a/drivers/gpu/drm/tiny/ili9486.c b/drivers/gpu/drm/tiny/ili9486.c index 75aa1476c66c..e9a63f4b2993 100644 --- a/drivers/gpu/drm/tiny/ili9486.c +++ b/drivers/gpu/drm/tiny/ili9486.c @@ -153,7 +153,6 @@ static const struct drm_simple_display_pipe_funcs waveshare_pipe_funcs = { .enable = waveshare_enable, .disable = mipi_dbi_pipe_disable, .update = mipi_dbi_pipe_update, - .prepare_fb = drm_gem_simple_display_pipe_prepare_fb, }; static const struct drm_display_mode waveshare_mode = { diff --git a/drivers/gpu/drm/tiny/mi0283qt.c b/drivers/gpu/drm/tiny/mi0283qt.c index 82fd1ad3413f..023de49e7a8e 100644 --- a/drivers/gpu/drm/tiny/mi0283qt.c +++ b/drivers/gpu/drm/tiny/mi0283qt.c @@ -144,7 +144,6 @@ static const struct drm_simple_display_pipe_funcs mi0283qt_pipe_funcs = { .enable = mi0283qt_enable, .disable = mipi_dbi_pipe_disable, .update = mipi_dbi_pipe_update, - .prepare_fb = drm_gem_simple_display_pipe_prepare_fb, }; static const struct drm_display_mode mi0283qt_mode = { diff --git a/drivers/gpu/drm/tiny/repaper.c b/drivers/gpu/drm/tiny/repaper.c index 2cee07a2e00b..4d07b21a16e6 100644 --- a/drivers/gpu/drm/tiny/repaper.c +++ b/drivers/gpu/drm/tiny/repaper.c @@ -14,7 +14,6 @@ */ #include <linux/delay.h> -#include <linux/dma-buf.h> #include <linux/gpio/consumer.h> #include <linux/module.h> #include <linux/property.h> @@ -532,7 +531,6 @@ static void repaper_gray8_to_mono_reversed(u8 *buf, u32 width, u32 height) static int repaper_fb_dirty(struct drm_framebuffer *fb) { struct drm_gem_cma_object *cma_obj = drm_fb_cma_get_gem_obj(fb, 0); - struct dma_buf_attachment *import_attach = cma_obj->base.import_attach; struct repaper_epd *epd = drm_to_epd(fb->dev); struct drm_rect clip; int idx, ret = 0; @@ -558,21 +556,13 @@ static int repaper_fb_dirty(struct drm_framebuffer *fb) goto out_exit; } - if (import_attach) { - ret = dma_buf_begin_cpu_access(import_attach->dmabuf, - DMA_FROM_DEVICE); - if (ret) - goto out_free; - } + ret = drm_gem_fb_begin_cpu_access(fb, DMA_FROM_DEVICE); + if (ret) + goto out_free; drm_fb_xrgb8888_to_gray8(buf, cma_obj->vaddr, fb, &clip); - if (import_attach) { - ret = dma_buf_end_cpu_access(import_attach->dmabuf, - DMA_FROM_DEVICE); - if (ret) - goto out_free; - } + drm_gem_fb_end_cpu_access(fb, DMA_FROM_DEVICE); repaper_gray8_to_mono_reversed(buf, fb->width, fb->height); @@ -861,7 +851,6 @@ static const struct drm_simple_display_pipe_funcs repaper_pipe_funcs = { .enable = repaper_pipe_enable, .disable = repaper_pipe_disable, .update = repaper_pipe_update, - .prepare_fb = drm_gem_simple_display_pipe_prepare_fb, }; static int repaper_connector_get_modes(struct drm_connector *connector) diff --git a/drivers/gpu/drm/tiny/simpledrm.c b/drivers/gpu/drm/tiny/simpledrm.c index 08ae66b1e6f5..481b48bde047 100644 --- a/drivers/gpu/drm/tiny/simpledrm.c +++ b/drivers/gpu/drm/tiny/simpledrm.c @@ -639,7 +639,7 @@ simpledrm_simple_display_pipe_enable(struct drm_simple_display_pipe *pipe, struct simpledrm_device *sdev = simpledrm_device_of_dev(pipe->crtc.dev); struct drm_shadow_plane_state *shadow_plane_state = to_drm_shadow_plane_state(plane_state); struct drm_framebuffer *fb = plane_state->fb; - void *vmap = shadow_plane_state->map[0].vaddr; /* TODO: Use mapping abstraction properly */ + void *vmap = shadow_plane_state->data[0].vaddr; /* TODO: Use mapping abstraction */ struct drm_device *dev = &sdev->dev; int idx; @@ -677,7 +677,7 @@ simpledrm_simple_display_pipe_update(struct drm_simple_display_pipe *pipe, struct simpledrm_device *sdev = simpledrm_device_of_dev(pipe->crtc.dev); struct drm_plane_state *plane_state = pipe->plane.state; struct drm_shadow_plane_state *shadow_plane_state = to_drm_shadow_plane_state(plane_state); - void *vmap = shadow_plane_state->map[0].vaddr; /* TODO: Use mapping abstraction properly */ + void *vmap = shadow_plane_state->data[0].vaddr; /* TODO: Use mapping abstraction */ struct drm_framebuffer *fb = plane_state->fb; struct drm_device *dev = &sdev->dev; struct drm_rect clip; diff --git a/drivers/gpu/drm/tiny/st7586.c b/drivers/gpu/drm/tiny/st7586.c index 05db980cc047..ad0faa8723c2 100644 --- a/drivers/gpu/drm/tiny/st7586.c +++ b/drivers/gpu/drm/tiny/st7586.c @@ -6,7 +6,6 @@ */ #include <linux/delay.h> -#include <linux/dma-buf.h> #include <linux/gpio/consumer.h> #include <linux/module.h> #include <linux/property.h> @@ -21,6 +20,7 @@ #include <drm/drm_format_helper.h> #include <drm/drm_gem_atomic_helper.h> #include <drm/drm_gem_cma_helper.h> +#include <drm/drm_gem_framebuffer_helper.h> #include <drm/drm_managed.h> #include <drm/drm_mipi_dbi.h> #include <drm/drm_rect.h> @@ -92,24 +92,18 @@ static int st7586_buf_copy(void *dst, struct drm_framebuffer *fb, struct drm_rect *clip) { struct drm_gem_cma_object *cma_obj = drm_fb_cma_get_gem_obj(fb, 0); - struct dma_buf_attachment *import_attach = cma_obj->base.import_attach; void *src = cma_obj->vaddr; int ret = 0; - if (import_attach) { - ret = dma_buf_begin_cpu_access(import_attach->dmabuf, - DMA_FROM_DEVICE); - if (ret) - return ret; - } + ret = drm_gem_fb_begin_cpu_access(fb, DMA_FROM_DEVICE); + if (ret) + return ret; st7586_xrgb8888_to_gray332(dst, src, fb, clip); - if (import_attach) - ret = dma_buf_end_cpu_access(import_attach->dmabuf, - DMA_FROM_DEVICE); + drm_gem_fb_end_cpu_access(fb, DMA_FROM_DEVICE); - return ret; + return 0; } static void st7586_fb_dirty(struct drm_framebuffer *fb, struct drm_rect *rect) @@ -268,7 +262,6 @@ static const struct drm_simple_display_pipe_funcs st7586_pipe_funcs = { .enable = st7586_pipe_enable, .disable = st7586_pipe_disable, .update = st7586_pipe_update, - .prepare_fb = drm_gem_simple_display_pipe_prepare_fb, }; static const struct drm_display_mode st7586_mode = { diff --git a/drivers/gpu/drm/tiny/st7735r.c b/drivers/gpu/drm/tiny/st7735r.c index e8b7815d8cae..fc40dd10efa8 100644 --- a/drivers/gpu/drm/tiny/st7735r.c +++ b/drivers/gpu/drm/tiny/st7735r.c @@ -136,7 +136,6 @@ static const struct drm_simple_display_pipe_funcs st7735r_pipe_funcs = { .enable = st7735r_pipe_enable, .disable = mipi_dbi_pipe_disable, .update = mipi_dbi_pipe_update, - .prepare_fb = drm_gem_simple_display_pipe_prepare_fb, }; static const struct st7735r_cfg jd_t18003_t01_cfg = { diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index 8d7fd65ccced..ea4add2b9717 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -488,6 +488,31 @@ void ttm_bo_unlock_delayed_workqueue(struct ttm_device *bdev, int resched) } EXPORT_SYMBOL(ttm_bo_unlock_delayed_workqueue); +static int ttm_bo_bounce_temp_buffer(struct ttm_buffer_object *bo, + struct ttm_resource **mem, + struct ttm_operation_ctx *ctx, + struct ttm_place *hop) +{ + struct ttm_placement hop_placement; + struct ttm_resource *hop_mem; + int ret; + + hop_placement.num_placement = hop_placement.num_busy_placement = 1; + hop_placement.placement = hop_placement.busy_placement = hop; + + /* find space in the bounce domain */ + ret = ttm_bo_mem_space(bo, &hop_placement, &hop_mem, ctx); + if (ret) + return ret; + /* move to the bounce domain */ + ret = ttm_bo_handle_move_mem(bo, hop_mem, false, ctx, NULL); + if (ret) { + ttm_resource_free(bo, &hop_mem); + return ret; + } + return 0; +} + static int ttm_bo_evict(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx) { @@ -527,12 +552,17 @@ static int ttm_bo_evict(struct ttm_buffer_object *bo, goto out; } +bounce: ret = ttm_bo_handle_move_mem(bo, evict_mem, true, ctx, &hop); - if (unlikely(ret)) { - WARN(ret == -EMULTIHOP, "Unexpected multihop in eviction - likely driver bug\n"); - if (ret != -ERESTARTSYS) + if (ret == -EMULTIHOP) { + ret = ttm_bo_bounce_temp_buffer(bo, &evict_mem, ctx, &hop); + if (ret) { pr_err("Buffer eviction failed\n"); - ttm_resource_free(bo, &evict_mem); + ttm_resource_free(bo, &evict_mem); + goto out; + } + /* try and move to final place now. */ + goto bounce; } out: return ret; @@ -847,31 +877,6 @@ error: } EXPORT_SYMBOL(ttm_bo_mem_space); -static int ttm_bo_bounce_temp_buffer(struct ttm_buffer_object *bo, - struct ttm_resource **mem, - struct ttm_operation_ctx *ctx, - struct ttm_place *hop) -{ - struct ttm_placement hop_placement; - struct ttm_resource *hop_mem; - int ret; - - hop_placement.num_placement = hop_placement.num_busy_placement = 1; - hop_placement.placement = hop_placement.busy_placement = hop; - - /* find space in the bounce domain */ - ret = ttm_bo_mem_space(bo, &hop_placement, &hop_mem, ctx); - if (ret) - return ret; - /* move to the bounce domain */ - ret = ttm_bo_handle_move_mem(bo, hop_mem, false, ctx, NULL); - if (ret) { - ttm_resource_free(bo, &hop_mem); - return ret; - } - return 0; -} - static int ttm_bo_move_buffer(struct ttm_buffer_object *bo, struct ttm_placement *placement, struct ttm_operation_ctx *ctx) @@ -916,6 +921,9 @@ static bool ttm_bo_places_compat(const struct ttm_place *places, { unsigned i; + if (mem->placement & TTM_PL_FLAG_TEMPORARY) + return false; + for (i = 0; i < num_placement; i++) { const struct ttm_place *heap = &places[i]; diff --git a/drivers/gpu/drm/tve200/tve200_display.c b/drivers/gpu/drm/tve200/tve200_display.c index 50e1fb71869f..17b8c8dd169d 100644 --- a/drivers/gpu/drm/tve200/tve200_display.c +++ b/drivers/gpu/drm/tve200/tve200_display.c @@ -316,7 +316,6 @@ static const struct drm_simple_display_pipe_funcs tve200_display_funcs = { .enable = tve200_display_enable, .disable = tve200_display_disable, .update = tve200_display_update, - .prepare_fb = drm_gem_simple_display_pipe_prepare_fb, .enable_vblank = tve200_display_enable_vblank, .disable_vblank = tve200_display_disable_vblank, }; diff --git a/drivers/gpu/drm/udl/udl_modeset.c b/drivers/gpu/drm/udl/udl_modeset.c index 8d98bf69d075..32232228dae9 100644 --- a/drivers/gpu/drm/udl/udl_modeset.c +++ b/drivers/gpu/drm/udl/udl_modeset.c @@ -6,11 +6,8 @@ * Copyright (C) 2009 Roberto De Ioris <roberto@unbit.it> * Copyright (C) 2009 Jaya Kumar <jayakumar.lkml@gmail.com> * Copyright (C) 2009 Bernie Thompson <bernie@plugable.com> - */ -#include <linux/dma-buf.h> - #include <drm/drm_atomic_helper.h> #include <drm/drm_crtc_helper.h> #include <drm/drm_damage_helper.h> @@ -271,9 +268,8 @@ static int udl_handle_damage(struct drm_framebuffer *fb, const struct dma_buf_ma int x, int y, int width, int height) { struct drm_device *dev = fb->dev; - struct dma_buf_attachment *import_attach = fb->obj[0]->import_attach; void *vaddr = map->vaddr; /* TODO: Use mapping abstraction properly */ - int i, ret, tmp_ret; + int i, ret; char *cmd; struct urb *urb; struct drm_rect clip; @@ -290,17 +286,14 @@ static int udl_handle_damage(struct drm_framebuffer *fb, const struct dma_buf_ma else if ((clip.x2 > fb->width) || (clip.y2 > fb->height)) return -EINVAL; - if (import_attach) { - ret = dma_buf_begin_cpu_access(import_attach->dmabuf, - DMA_FROM_DEVICE); - if (ret) - return ret; - } + ret = drm_gem_fb_begin_cpu_access(fb, DMA_FROM_DEVICE); + if (ret) + return ret; urb = udl_get_urb(dev); if (!urb) { ret = -ENOMEM; - goto out_dma_buf_end_cpu_access; + goto out_drm_gem_fb_end_cpu_access; } cmd = urb->transfer_buffer; @@ -313,7 +306,7 @@ static int udl_handle_damage(struct drm_framebuffer *fb, const struct dma_buf_ma &cmd, byte_offset, dev_byte_offset, byte_width); if (ret) - goto out_dma_buf_end_cpu_access; + goto out_drm_gem_fb_end_cpu_access; } if (cmd > (char *)urb->transfer_buffer) { @@ -329,14 +322,8 @@ static int udl_handle_damage(struct drm_framebuffer *fb, const struct dma_buf_ma ret = 0; -out_dma_buf_end_cpu_access: - if (import_attach) { - tmp_ret = dma_buf_end_cpu_access(import_attach->dmabuf, - DMA_FROM_DEVICE); - if (tmp_ret && !ret) - ret = tmp_ret; /* only update ret if not set yet */ - } - +out_drm_gem_fb_end_cpu_access: + drm_gem_fb_end_cpu_access(fb, DMA_FROM_DEVICE); return ret; } @@ -392,7 +379,7 @@ udl_simple_display_pipe_enable(struct drm_simple_display_pipe *pipe, udl->mode_buf_len = wrptr - buf; - udl_handle_damage(fb, &shadow_plane_state->map[0], 0, 0, fb->width, fb->height); + udl_handle_damage(fb, &shadow_plane_state->data[0], 0, 0, fb->width, fb->height); if (!crtc_state->mode_changed) return; @@ -435,7 +422,7 @@ udl_simple_display_pipe_update(struct drm_simple_display_pipe *pipe, return; if (drm_atomic_helper_damage_merged(old_plane_state, state, &rect)) - udl_handle_damage(fb, &shadow_plane_state->map[0], rect.x1, rect.y1, + udl_handle_damage(fb, &shadow_plane_state->data[0], rect.x1, rect.y1, rect.x2 - rect.x1, rect.y2 - rect.y1); } diff --git a/drivers/gpu/drm/v3d/Makefile b/drivers/gpu/drm/v3d/Makefile index db4cfc155821..e8b314137020 100644 --- a/drivers/gpu/drm/v3d/Makefile +++ b/drivers/gpu/drm/v3d/Makefile @@ -9,6 +9,7 @@ v3d-y := \ v3d_gem.o \ v3d_irq.o \ v3d_mmu.o \ + v3d_perfmon.o \ v3d_trace_points.o \ v3d_sched.o diff --git a/drivers/gpu/drm/v3d/v3d_drv.c b/drivers/gpu/drm/v3d/v3d_drv.c index 99e22beea90b..9403c3b36aca 100644 --- a/drivers/gpu/drm/v3d/v3d_drv.c +++ b/drivers/gpu/drm/v3d/v3d_drv.c @@ -94,6 +94,9 @@ static int v3d_get_param_ioctl(struct drm_device *dev, void *data, case DRM_V3D_PARAM_SUPPORTS_CACHE_FLUSH: args->value = 1; return 0; + case DRM_V3D_PARAM_SUPPORTS_PERFMON: + args->value = (v3d->ver >= 40); + return 0; default: DRM_DEBUG("Unknown parameter %d\n", args->param); return -EINVAL; @@ -121,6 +124,7 @@ v3d_open(struct drm_device *dev, struct drm_file *file) 1, NULL); } + v3d_perfmon_open_file(v3d_priv); file->driver_priv = v3d_priv; return 0; @@ -136,6 +140,7 @@ v3d_postclose(struct drm_device *dev, struct drm_file *file) drm_sched_entity_destroy(&v3d_priv->sched_entity[q]); } + v3d_perfmon_close_file(v3d_priv); kfree(v3d_priv); } @@ -156,6 +161,9 @@ static const struct drm_ioctl_desc v3d_drm_ioctls[] = { DRM_IOCTL_DEF_DRV(V3D_GET_BO_OFFSET, v3d_get_bo_offset_ioctl, DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(V3D_SUBMIT_TFU, v3d_submit_tfu_ioctl, DRM_RENDER_ALLOW | DRM_AUTH), DRM_IOCTL_DEF_DRV(V3D_SUBMIT_CSD, v3d_submit_csd_ioctl, DRM_RENDER_ALLOW | DRM_AUTH), + DRM_IOCTL_DEF_DRV(V3D_PERFMON_CREATE, v3d_perfmon_create_ioctl, DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(V3D_PERFMON_DESTROY, v3d_perfmon_destroy_ioctl, DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(V3D_PERFMON_GET_VALUES, v3d_perfmon_get_values_ioctl, DRM_RENDER_ALLOW), }; static const struct drm_driver v3d_drm_driver = { diff --git a/drivers/gpu/drm/v3d/v3d_drv.h b/drivers/gpu/drm/v3d/v3d_drv.h index 8a390738d65b..270134779073 100644 --- a/drivers/gpu/drm/v3d/v3d_drv.h +++ b/drivers/gpu/drm/v3d/v3d_drv.h @@ -37,6 +37,40 @@ struct v3d_queue_state { u64 emit_seqno; }; +/* Performance monitor object. The perform lifetime is controlled by userspace + * using perfmon related ioctls. A perfmon can be attached to a submit_cl + * request, and when this is the case, HW perf counters will be activated just + * before the submit_cl is submitted to the GPU and disabled when the job is + * done. This way, only events related to a specific job will be counted. + */ +struct v3d_perfmon { + /* Tracks the number of users of the perfmon, when this counter reaches + * zero the perfmon is destroyed. + */ + refcount_t refcnt; + + /* Protects perfmon stop, as it can be invoked from multiple places. */ + struct mutex lock; + + /* Number of counters activated in this perfmon instance + * (should be less than DRM_V3D_MAX_PERF_COUNTERS). + */ + u8 ncounters; + + /* Events counted by the HW perf counters. */ + u8 counters[DRM_V3D_MAX_PERF_COUNTERS]; + + /* Storage for counter values. Counters are incremented by the + * HW perf counter values every time the perfmon is attached + * to a GPU job. This way, perfmon users don't have to + * retrieve the results after each job if they want to track + * events covering several submissions. Note that counter + * values can't be reset, but you can fake a reset by + * destroying the perfmon and creating a new one. + */ + u64 values[]; +}; + struct v3d_dev { struct drm_device drm; @@ -89,6 +123,9 @@ struct v3d_dev { */ spinlock_t job_lock; + /* Used to track the active perfmon if any. */ + struct v3d_perfmon *active_perfmon; + /* Protects bo_stats */ struct mutex bo_lock; @@ -133,6 +170,11 @@ v3d_has_csd(struct v3d_dev *v3d) struct v3d_file_priv { struct v3d_dev *v3d; + struct { + struct idr idr; + struct mutex lock; + } perfmon; + struct drm_sched_entity sched_entity[V3D_MAX_QUEUES]; }; @@ -205,6 +247,11 @@ struct v3d_job { */ struct dma_fence *done_fence; + /* Pointer to a performance monitor object if the user requested it, + * NULL otherwise. + */ + struct v3d_perfmon *perfmon; + /* Callback for the freeing of the job on refcount going to 0. */ void (*free)(struct kref *ref); }; @@ -353,3 +400,19 @@ void v3d_mmu_remove_ptes(struct v3d_bo *bo); /* v3d_sched.c */ int v3d_sched_init(struct v3d_dev *v3d); void v3d_sched_fini(struct v3d_dev *v3d); + +/* v3d_perfmon.c */ +void v3d_perfmon_get(struct v3d_perfmon *perfmon); +void v3d_perfmon_put(struct v3d_perfmon *perfmon); +void v3d_perfmon_start(struct v3d_dev *v3d, struct v3d_perfmon *perfmon); +void v3d_perfmon_stop(struct v3d_dev *v3d, struct v3d_perfmon *perfmon, + bool capture); +struct v3d_perfmon *v3d_perfmon_find(struct v3d_file_priv *v3d_priv, int id); +void v3d_perfmon_open_file(struct v3d_file_priv *v3d_priv); +void v3d_perfmon_close_file(struct v3d_file_priv *v3d_priv); +int v3d_perfmon_create_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_priv); +int v3d_perfmon_destroy_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_priv); +int v3d_perfmon_get_values_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_priv); diff --git a/drivers/gpu/drm/v3d/v3d_gem.c b/drivers/gpu/drm/v3d/v3d_gem.c index 4eb354226972..5689da118197 100644 --- a/drivers/gpu/drm/v3d/v3d_gem.c +++ b/drivers/gpu/drm/v3d/v3d_gem.c @@ -126,6 +126,8 @@ v3d_reset(struct v3d_dev *v3d) v3d_mmu_set_page_table(v3d); v3d_irq_reset(v3d); + v3d_perfmon_stop(v3d, v3d->active_perfmon, false); + trace_v3d_reset_end(dev); } @@ -375,6 +377,9 @@ v3d_job_free(struct kref *ref) pm_runtime_mark_last_busy(job->v3d->drm.dev); pm_runtime_put_autosuspend(job->v3d->drm.dev); + if (job->perfmon) + v3d_perfmon_put(job->perfmon); + kfree(job); } @@ -539,6 +544,9 @@ v3d_submit_cl_ioctl(struct drm_device *dev, void *data, trace_v3d_submit_cl_ioctl(&v3d->drm, args->rcl_start, args->rcl_end); + if (args->pad != 0) + return -EINVAL; + if (args->flags != 0 && args->flags != DRM_V3D_SUBMIT_CL_FLUSH_CACHE) { DRM_INFO("invalid flags: %d\n", args->flags); @@ -611,8 +619,20 @@ v3d_submit_cl_ioctl(struct drm_device *dev, void *data, if (ret) goto fail; + if (args->perfmon_id) { + render->base.perfmon = v3d_perfmon_find(v3d_priv, + args->perfmon_id); + + if (!render->base.perfmon) { + ret = -ENOENT; + goto fail; + } + } + mutex_lock(&v3d->sched_lock); if (bin) { + bin->base.perfmon = render->base.perfmon; + v3d_perfmon_get(bin->base.perfmon); ret = v3d_push_job(v3d_priv, &bin->base, V3D_BIN); if (ret) goto fail_unreserve; @@ -633,6 +653,8 @@ v3d_submit_cl_ioctl(struct drm_device *dev, void *data, ret = drm_gem_fence_array_add(&clean_job->deps, render_fence); if (ret) goto fail_unreserve; + clean_job->perfmon = render->base.perfmon; + v3d_perfmon_get(clean_job->perfmon); ret = v3d_push_job(v3d_priv, clean_job, V3D_CACHE_CLEAN); if (ret) goto fail_unreserve; @@ -827,6 +849,15 @@ v3d_submit_csd_ioctl(struct drm_device *dev, void *data, if (ret) goto fail; + if (args->perfmon_id) { + job->base.perfmon = v3d_perfmon_find(v3d_priv, + args->perfmon_id); + if (!job->base.perfmon) { + ret = -ENOENT; + goto fail; + } + } + mutex_lock(&v3d->sched_lock); ret = v3d_push_job(v3d_priv, &job->base, V3D_CSD); if (ret) diff --git a/drivers/gpu/drm/v3d/v3d_perfmon.c b/drivers/gpu/drm/v3d/v3d_perfmon.c new file mode 100644 index 000000000000..0288ef063513 --- /dev/null +++ b/drivers/gpu/drm/v3d/v3d_perfmon.c @@ -0,0 +1,213 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2021 Raspberry Pi + */ + +#include "v3d_drv.h" +#include "v3d_regs.h" + +#define V3D_PERFMONID_MIN 1 +#define V3D_PERFMONID_MAX U32_MAX + +void v3d_perfmon_get(struct v3d_perfmon *perfmon) +{ + if (perfmon) + refcount_inc(&perfmon->refcnt); +} + +void v3d_perfmon_put(struct v3d_perfmon *perfmon) +{ + if (perfmon && refcount_dec_and_test(&perfmon->refcnt)) + kfree(perfmon); +} + +void v3d_perfmon_start(struct v3d_dev *v3d, struct v3d_perfmon *perfmon) +{ + unsigned int i; + u32 mask; + u8 ncounters = perfmon->ncounters; + + if (WARN_ON_ONCE(!perfmon || v3d->active_perfmon)) + return; + + mask = GENMASK(ncounters - 1, 0); + + for (i = 0; i < ncounters; i++) { + u32 source = i / 4; + u32 channel = V3D_SET_FIELD(perfmon->counters[i], V3D_PCTR_S0); + + i++; + channel |= V3D_SET_FIELD(i < ncounters ? perfmon->counters[i] : 0, + V3D_PCTR_S1); + i++; + channel |= V3D_SET_FIELD(i < ncounters ? perfmon->counters[i] : 0, + V3D_PCTR_S2); + i++; + channel |= V3D_SET_FIELD(i < ncounters ? perfmon->counters[i] : 0, + V3D_PCTR_S3); + V3D_CORE_WRITE(0, V3D_V4_PCTR_0_SRC_X(source), channel); + } + + V3D_CORE_WRITE(0, V3D_V4_PCTR_0_CLR, mask); + V3D_CORE_WRITE(0, V3D_PCTR_0_OVERFLOW, mask); + V3D_CORE_WRITE(0, V3D_V4_PCTR_0_EN, mask); + + v3d->active_perfmon = perfmon; +} + +void v3d_perfmon_stop(struct v3d_dev *v3d, struct v3d_perfmon *perfmon, + bool capture) +{ + unsigned int i; + + if (!perfmon || !v3d->active_perfmon) + return; + + mutex_lock(&perfmon->lock); + if (perfmon != v3d->active_perfmon) { + mutex_unlock(&perfmon->lock); + return; + } + + if (capture) + for (i = 0; i < perfmon->ncounters; i++) + perfmon->values[i] += V3D_CORE_READ(0, V3D_PCTR_0_PCTRX(i)); + + V3D_CORE_WRITE(0, V3D_V4_PCTR_0_EN, 0); + + v3d->active_perfmon = NULL; + mutex_unlock(&perfmon->lock); +} + +struct v3d_perfmon *v3d_perfmon_find(struct v3d_file_priv *v3d_priv, int id) +{ + struct v3d_perfmon *perfmon; + + mutex_lock(&v3d_priv->perfmon.lock); + perfmon = idr_find(&v3d_priv->perfmon.idr, id); + v3d_perfmon_get(perfmon); + mutex_unlock(&v3d_priv->perfmon.lock); + + return perfmon; +} + +void v3d_perfmon_open_file(struct v3d_file_priv *v3d_priv) +{ + mutex_init(&v3d_priv->perfmon.lock); + idr_init(&v3d_priv->perfmon.idr); +} + +static int v3d_perfmon_idr_del(int id, void *elem, void *data) +{ + struct v3d_perfmon *perfmon = elem; + + v3d_perfmon_put(perfmon); + + return 0; +} + +void v3d_perfmon_close_file(struct v3d_file_priv *v3d_priv) +{ + mutex_lock(&v3d_priv->perfmon.lock); + idr_for_each(&v3d_priv->perfmon.idr, v3d_perfmon_idr_del, NULL); + idr_destroy(&v3d_priv->perfmon.idr); + mutex_unlock(&v3d_priv->perfmon.lock); +} + +int v3d_perfmon_create_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_priv) +{ + struct v3d_file_priv *v3d_priv = file_priv->driver_priv; + struct drm_v3d_perfmon_create *req = data; + struct v3d_perfmon *perfmon; + unsigned int i; + int ret; + + /* Number of monitored counters cannot exceed HW limits. */ + if (req->ncounters > DRM_V3D_MAX_PERF_COUNTERS || + !req->ncounters) + return -EINVAL; + + /* Make sure all counters are valid. */ + for (i = 0; i < req->ncounters; i++) { + if (req->counters[i] >= V3D_PERFCNT_NUM) + return -EINVAL; + } + + perfmon = kzalloc(struct_size(perfmon, values, req->ncounters), + GFP_KERNEL); + if (!perfmon) + return -ENOMEM; + + for (i = 0; i < req->ncounters; i++) + perfmon->counters[i] = req->counters[i]; + + perfmon->ncounters = req->ncounters; + + refcount_set(&perfmon->refcnt, 1); + mutex_init(&perfmon->lock); + + mutex_lock(&v3d_priv->perfmon.lock); + ret = idr_alloc(&v3d_priv->perfmon.idr, perfmon, V3D_PERFMONID_MIN, + V3D_PERFMONID_MAX, GFP_KERNEL); + mutex_unlock(&v3d_priv->perfmon.lock); + + if (ret < 0) { + kfree(perfmon); + return ret; + } + + req->id = ret; + + return 0; +} + +int v3d_perfmon_destroy_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_priv) +{ + struct v3d_file_priv *v3d_priv = file_priv->driver_priv; + struct drm_v3d_perfmon_destroy *req = data; + struct v3d_perfmon *perfmon; + + mutex_lock(&v3d_priv->perfmon.lock); + perfmon = idr_remove(&v3d_priv->perfmon.idr, req->id); + mutex_unlock(&v3d_priv->perfmon.lock); + + if (!perfmon) + return -EINVAL; + + v3d_perfmon_put(perfmon); + + return 0; +} + +int v3d_perfmon_get_values_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_priv) +{ + struct v3d_dev *v3d = to_v3d_dev(dev); + struct v3d_file_priv *v3d_priv = file_priv->driver_priv; + struct drm_v3d_perfmon_get_values *req = data; + struct v3d_perfmon *perfmon; + int ret = 0; + + if (req->pad != 0) + return -EINVAL; + + mutex_lock(&v3d_priv->perfmon.lock); + perfmon = idr_find(&v3d_priv->perfmon.idr, req->id); + v3d_perfmon_get(perfmon); + mutex_unlock(&v3d_priv->perfmon.lock); + + if (!perfmon) + return -EINVAL; + + v3d_perfmon_stop(v3d, perfmon, true); + + if (copy_to_user(u64_to_user_ptr(req->values_ptr), perfmon->values, + perfmon->ncounters * sizeof(u64))) + ret = -EFAULT; + + v3d_perfmon_put(perfmon); + + return ret; +} diff --git a/drivers/gpu/drm/v3d/v3d_regs.h b/drivers/gpu/drm/v3d/v3d_regs.h index 9bcb57781d31..3663e0d6bf76 100644 --- a/drivers/gpu/drm/v3d/v3d_regs.h +++ b/drivers/gpu/drm/v3d/v3d_regs.h @@ -347,6 +347,8 @@ /* Each src reg muxes four counters each. */ #define V3D_V4_PCTR_0_SRC_0_3 0x00660 #define V3D_V4_PCTR_0_SRC_28_31 0x0067c +#define V3D_V4_PCTR_0_SRC_X(x) (V3D_V4_PCTR_0_SRC_0_3 + \ + 4 * (x)) # define V3D_PCTR_S0_MASK V3D_MASK(6, 0) # define V3D_PCTR_S0_SHIFT 0 # define V3D_PCTR_S1_MASK V3D_MASK(14, 8) diff --git a/drivers/gpu/drm/v3d/v3d_sched.c b/drivers/gpu/drm/v3d/v3d_sched.c index 8992480c88fa..dd7fcc36d726 100644 --- a/drivers/gpu/drm/v3d/v3d_sched.c +++ b/drivers/gpu/drm/v3d/v3d_sched.c @@ -63,6 +63,16 @@ v3d_job_free(struct drm_sched_job *sched_job) v3d_job_put(job); } +static void +v3d_switch_perfmon(struct v3d_dev *v3d, struct v3d_job *job) +{ + if (job->perfmon != v3d->active_perfmon) + v3d_perfmon_stop(v3d, v3d->active_perfmon, true); + + if (job->perfmon && v3d->active_perfmon != job->perfmon) + v3d_perfmon_start(v3d, job->perfmon); +} + /* * Returns the fences that the job depends on, one by one. * @@ -120,6 +130,8 @@ static struct dma_fence *v3d_bin_job_run(struct drm_sched_job *sched_job) trace_v3d_submit_cl(dev, false, to_v3d_fence(fence)->seqno, job->start, job->end); + v3d_switch_perfmon(v3d, &job->base); + /* Set the current and end address of the control list. * Writing the end register is what starts the job. */ @@ -169,6 +181,8 @@ static struct dma_fence *v3d_render_job_run(struct drm_sched_job *sched_job) trace_v3d_submit_cl(dev, true, to_v3d_fence(fence)->seqno, job->start, job->end); + v3d_switch_perfmon(v3d, &job->base); + /* XXX: Set the QCFG */ /* Set the current and end address of the control list. @@ -240,6 +254,8 @@ v3d_csd_job_run(struct drm_sched_job *sched_job) trace_v3d_submit_csd(dev, to_v3d_fence(fence)->seqno); + v3d_switch_perfmon(v3d, &job->base); + for (i = 1; i <= 6; i++) V3D_CORE_WRITE(0, V3D_CSD_QUEUED_CFG0 + 4 * i, job->args.cfg[i]); /* CFG0 write kicks off the job. */ @@ -402,7 +418,7 @@ v3d_sched_init(struct v3d_dev *v3d) ret = drm_sched_init(&v3d->queue[V3D_BIN].sched, &v3d_bin_sched_ops, hw_jobs_limit, job_hang_limit, - msecs_to_jiffies(hang_limit_ms), + msecs_to_jiffies(hang_limit_ms), NULL, NULL, "v3d_bin"); if (ret) { dev_err(v3d->drm.dev, "Failed to create bin scheduler: %d.", ret); @@ -412,7 +428,7 @@ v3d_sched_init(struct v3d_dev *v3d) ret = drm_sched_init(&v3d->queue[V3D_RENDER].sched, &v3d_render_sched_ops, hw_jobs_limit, job_hang_limit, - msecs_to_jiffies(hang_limit_ms), + msecs_to_jiffies(hang_limit_ms), NULL, NULL, "v3d_render"); if (ret) { dev_err(v3d->drm.dev, "Failed to create render scheduler: %d.", @@ -424,7 +440,7 @@ v3d_sched_init(struct v3d_dev *v3d) ret = drm_sched_init(&v3d->queue[V3D_TFU].sched, &v3d_tfu_sched_ops, hw_jobs_limit, job_hang_limit, - msecs_to_jiffies(hang_limit_ms), + msecs_to_jiffies(hang_limit_ms), NULL, NULL, "v3d_tfu"); if (ret) { dev_err(v3d->drm.dev, "Failed to create TFU scheduler: %d.", @@ -437,7 +453,7 @@ v3d_sched_init(struct v3d_dev *v3d) ret = drm_sched_init(&v3d->queue[V3D_CSD].sched, &v3d_csd_sched_ops, hw_jobs_limit, job_hang_limit, - msecs_to_jiffies(hang_limit_ms), + msecs_to_jiffies(hang_limit_ms), NULL, NULL, "v3d_csd"); if (ret) { dev_err(v3d->drm.dev, "Failed to create CSD scheduler: %d.", @@ -449,7 +465,7 @@ v3d_sched_init(struct v3d_dev *v3d) ret = drm_sched_init(&v3d->queue[V3D_CACHE_CLEAN].sched, &v3d_cache_clean_sched_ops, hw_jobs_limit, job_hang_limit, - msecs_to_jiffies(hang_limit_ms), + msecs_to_jiffies(hang_limit_ms), NULL, NULL, "v3d_cache_clean"); if (ret) { dev_err(v3d->drm.dev, "Failed to create CACHE_CLEAN scheduler: %d.", diff --git a/drivers/gpu/drm/vboxvideo/vbox_drv.c b/drivers/gpu/drm/vboxvideo/vbox_drv.c index 6d4b32da9866..2b81cb259d23 100644 --- a/drivers/gpu/drm/vboxvideo/vbox_drv.c +++ b/drivers/gpu/drm/vboxvideo/vbox_drv.c @@ -43,7 +43,7 @@ static int vbox_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) if (!vbox_check_supported(VBE_DISPI_ID_HGSMI)) return -ENODEV; - ret = drm_aperture_remove_conflicting_pci_framebuffers(pdev, "vboxvideodrmfb"); + ret = drm_aperture_remove_conflicting_pci_framebuffers(pdev, &driver); if (ret) return ret; @@ -184,7 +184,6 @@ static const struct drm_driver driver = { .lastclose = drm_fb_helper_lastclose, .fops = &vbox_fops, - .irq_handler = vbox_irq_handler, .name = DRIVER_NAME, .desc = DRIVER_DESC, .date = DRIVER_DATE, diff --git a/drivers/gpu/drm/vboxvideo/vbox_drv.h b/drivers/gpu/drm/vboxvideo/vbox_drv.h index ac7c2effc46f..4903b91d7fe4 100644 --- a/drivers/gpu/drm/vboxvideo/vbox_drv.h +++ b/drivers/gpu/drm/vboxvideo/vbox_drv.h @@ -145,7 +145,6 @@ void vbox_mm_fini(struct vbox_private *vbox); int vbox_irq_init(struct vbox_private *vbox); void vbox_irq_fini(struct vbox_private *vbox); void vbox_report_hotplug(struct vbox_private *vbox); -irqreturn_t vbox_irq_handler(int irq, void *arg); /* vbox_hgsmi.c */ void *hgsmi_buffer_alloc(struct gen_pool *guest_pool, size_t size, diff --git a/drivers/gpu/drm/vboxvideo/vbox_irq.c b/drivers/gpu/drm/vboxvideo/vbox_irq.c index b3ded68603ba..903a6c48ee8b 100644 --- a/drivers/gpu/drm/vboxvideo/vbox_irq.c +++ b/drivers/gpu/drm/vboxvideo/vbox_irq.c @@ -10,7 +10,8 @@ */ #include <linux/pci.h> -#include <drm/drm_irq.h> + +#include <drm/drm_drv.h> #include <drm/drm_probe_helper.h> #include "vbox_drv.h" @@ -31,7 +32,7 @@ void vbox_report_hotplug(struct vbox_private *vbox) schedule_work(&vbox->hotplug_work); } -irqreturn_t vbox_irq_handler(int irq, void *arg) +static irqreturn_t vbox_irq_handler(int irq, void *arg) { struct drm_device *dev = (struct drm_device *)arg; struct vbox_private *vbox = to_vbox_dev(dev); @@ -170,16 +171,21 @@ static void vbox_hotplug_worker(struct work_struct *work) int vbox_irq_init(struct vbox_private *vbox) { - struct pci_dev *pdev = to_pci_dev(vbox->ddev.dev); + struct drm_device *dev = &vbox->ddev; + struct pci_dev *pdev = to_pci_dev(dev->dev); INIT_WORK(&vbox->hotplug_work, vbox_hotplug_worker); vbox_update_mode_hints(vbox); - return drm_irq_install(&vbox->ddev, pdev->irq); + /* PCI devices require shared interrupts. */ + return request_irq(pdev->irq, vbox_irq_handler, IRQF_SHARED, dev->driver->name, dev); } void vbox_irq_fini(struct vbox_private *vbox) { - drm_irq_uninstall(&vbox->ddev); + struct drm_device *dev = &vbox->ddev; + struct pci_dev *pdev = to_pci_dev(dev->dev); + + free_irq(pdev->irq, dev); flush_work(&vbox->hotplug_work); } diff --git a/drivers/gpu/drm/vboxvideo/vbox_mode.c b/drivers/gpu/drm/vboxvideo/vbox_mode.c index 964381d55fc1..4227a915b06a 100644 --- a/drivers/gpu/drm/vboxvideo/vbox_mode.c +++ b/drivers/gpu/drm/vboxvideo/vbox_mode.c @@ -398,7 +398,7 @@ static void vbox_cursor_atomic_update(struct drm_plane *plane, u32 height = new_state->crtc_h; struct drm_shadow_plane_state *shadow_plane_state = to_drm_shadow_plane_state(new_state); - struct dma_buf_map map = shadow_plane_state->map[0]; + struct dma_buf_map map = shadow_plane_state->data[0]; u8 *src = map.vaddr; /* TODO: Use mapping abstraction properly */ size_t data_size, mask_size; u32 flags; @@ -488,8 +488,7 @@ static const struct drm_plane_helper_funcs vbox_primary_helper_funcs = { .atomic_check = vbox_primary_atomic_check, .atomic_update = vbox_primary_atomic_update, .atomic_disable = vbox_primary_atomic_disable, - .prepare_fb = drm_gem_vram_plane_helper_prepare_fb, - .cleanup_fb = drm_gem_vram_plane_helper_cleanup_fb, + DRM_GEM_VRAM_PLANE_HELPER_FUNCS, }; static const struct drm_plane_funcs vbox_primary_plane_funcs = { diff --git a/drivers/gpu/drm/vc4/Kconfig b/drivers/gpu/drm/vc4/Kconfig index 118e8a426b1a..345a5570a3da 100644 --- a/drivers/gpu/drm/vc4/Kconfig +++ b/drivers/gpu/drm/vc4/Kconfig @@ -12,6 +12,7 @@ config DRM_VC4 select SND_PCM select SND_PCM_ELD select SND_SOC_GENERIC_DMAENGINE_PCM + select SND_SOC_HDMI_CODEC select DRM_MIPI_DSI help Choose this option if you have a system that has a Broadcom diff --git a/drivers/gpu/drm/vc4/vc4_drv.c b/drivers/gpu/drm/vc4/vc4_drv.c index 8a60fb8ad370..f6c16c5aee68 100644 --- a/drivers/gpu/drm/vc4/vc4_drv.c +++ b/drivers/gpu/drm/vc4/vc4_drv.c @@ -168,10 +168,6 @@ static struct drm_driver vc4_drm_driver = { DRIVER_SYNCOBJ), .open = vc4_open, .postclose = vc4_close, - .irq_handler = vc4_irq, - .irq_preinstall = vc4_irq_preinstall, - .irq_postinstall = vc4_irq_postinstall, - .irq_uninstall = vc4_irq_uninstall, #if defined(CONFIG_DEBUG_FS) .debugfs_init = vc4_debugfs_init, @@ -265,7 +261,7 @@ static int vc4_drm_bind(struct device *dev) if (ret) goto unbind_all; - ret = drm_aperture_remove_framebuffers(false, "vc4drmfb"); + ret = drm_aperture_remove_framebuffers(false, &vc4_drm_driver); if (ret) goto unbind_all; diff --git a/drivers/gpu/drm/vc4/vc4_drv.h b/drivers/gpu/drm/vc4/vc4_drv.h index 5dceadc61600..ef73e0aaf726 100644 --- a/drivers/gpu/drm/vc4/vc4_drv.h +++ b/drivers/gpu/drm/vc4/vc4_drv.h @@ -74,6 +74,8 @@ struct vc4_perfmon { struct vc4_dev { struct drm_device base; + unsigned int irq; + struct vc4_hvs *hvs; struct vc4_v3d *v3d; struct vc4_dpi *dpi; @@ -895,9 +897,9 @@ extern struct platform_driver vc4_vec_driver; extern struct platform_driver vc4_txp_driver; /* vc4_irq.c */ -irqreturn_t vc4_irq(int irq, void *arg); -void vc4_irq_preinstall(struct drm_device *dev); -int vc4_irq_postinstall(struct drm_device *dev); +void vc4_irq_enable(struct drm_device *dev); +void vc4_irq_disable(struct drm_device *dev); +int vc4_irq_install(struct drm_device *dev, int irq); void vc4_irq_uninstall(struct drm_device *dev); void vc4_irq_reset(struct drm_device *dev); diff --git a/drivers/gpu/drm/vc4/vc4_dsi.c b/drivers/gpu/drm/vc4/vc4_dsi.c index a55256ed0955..a185027911ce 100644 --- a/drivers/gpu/drm/vc4/vc4_dsi.c +++ b/drivers/gpu/drm/vc4/vc4_dsi.c @@ -1646,10 +1646,8 @@ static int vc4_dsi_bind(struct device *dev, struct device *master, void *data) drm_encoder_helper_add(dsi->encoder, &vc4_dsi_encoder_helper_funcs); ret = drm_bridge_attach(dsi->encoder, dsi->bridge, NULL, 0); - if (ret) { - dev_err(dev, "bridge attach failed: %d\n", ret); + if (ret) return ret; - } /* Disable the atomic helper calls into the bridge. We * manually call the bridge pre_enable / enable / etc. calls * from our driver, since we need to sequence them within the diff --git a/drivers/gpu/drm/vc4/vc4_hdmi.c b/drivers/gpu/drm/vc4/vc4_hdmi.c index c2876731ee2d..b7dc32a0c9bb 100644 --- a/drivers/gpu/drm/vc4/vc4_hdmi.c +++ b/drivers/gpu/drm/vc4/vc4_hdmi.c @@ -46,6 +46,7 @@ #include <linux/rational.h> #include <linux/reset.h> #include <sound/dmaengine_pcm.h> +#include <sound/hdmi-codec.h> #include <sound/pcm_drm_eld.h> #include <sound/pcm_params.h> #include <sound/soc.h> @@ -435,7 +436,7 @@ static void vc4_hdmi_set_avi_infoframe(struct drm_encoder *encoder) struct vc4_hdmi_encoder *vc4_encoder = to_vc4_hdmi_encoder(encoder); struct drm_connector *connector = &vc4_hdmi->connector; struct drm_connector_state *cstate = connector->state; - struct drm_crtc *crtc = encoder->crtc; + struct drm_crtc *crtc = cstate->crtc; const struct drm_display_mode *mode = &crtc->state->adjusted_mode; union hdmi_infoframe frame; int ret; @@ -477,15 +478,10 @@ static void vc4_hdmi_set_spd_infoframe(struct drm_encoder *encoder) static void vc4_hdmi_set_audio_infoframe(struct drm_encoder *encoder) { struct vc4_hdmi *vc4_hdmi = encoder_to_vc4_hdmi(encoder); + struct hdmi_audio_infoframe *audio = &vc4_hdmi->audio.infoframe; union hdmi_infoframe frame; - hdmi_audio_infoframe_init(&frame.audio); - - frame.audio.coding_type = HDMI_AUDIO_CODING_TYPE_STREAM; - frame.audio.sample_frequency = HDMI_AUDIO_SAMPLE_FREQUENCY_STREAM; - frame.audio.sample_size = HDMI_AUDIO_SAMPLE_SIZE_STREAM; - frame.audio.channels = vc4_hdmi->audio.channels; - + memcpy(&frame.audio, audio, sizeof(*audio)); vc4_hdmi_write_infoframe(encoder, &frame); } @@ -545,8 +541,11 @@ static bool vc4_hdmi_supports_scrambling(struct drm_encoder *encoder, static void vc4_hdmi_enable_scrambling(struct drm_encoder *encoder) { - struct drm_display_mode *mode = &encoder->crtc->state->adjusted_mode; struct vc4_hdmi *vc4_hdmi = encoder_to_vc4_hdmi(encoder); + struct drm_connector *connector = &vc4_hdmi->connector; + struct drm_connector_state *cstate = connector->state; + struct drm_crtc *crtc = cstate->crtc; + struct drm_display_mode *mode = &crtc->state->adjusted_mode; if (!vc4_hdmi_supports_scrambling(encoder, mode)) return; @@ -567,17 +566,18 @@ static void vc4_hdmi_enable_scrambling(struct drm_encoder *encoder) static void vc4_hdmi_disable_scrambling(struct drm_encoder *encoder) { struct vc4_hdmi *vc4_hdmi = encoder_to_vc4_hdmi(encoder); - struct drm_crtc *crtc = encoder->crtc; + struct drm_connector *connector = &vc4_hdmi->connector; + struct drm_connector_state *cstate = connector->state; /* - * At boot, encoder->crtc will be NULL. Since we don't know the + * At boot, connector->state will be NULL. Since we don't know the * state of the scrambler and in order to avoid any * inconsistency, let's disable it all the time. */ - if (crtc && !vc4_hdmi_supports_scrambling(encoder, &crtc->mode)) + if (cstate && !vc4_hdmi_supports_scrambling(encoder, &cstate->crtc->mode)) return; - if (crtc && !vc4_hdmi_mode_needs_scrambling(&crtc->mode)) + if (cstate && !vc4_hdmi_mode_needs_scrambling(&cstate->crtc->mode)) return; if (delayed_work_pending(&vc4_hdmi->scrambling_work)) @@ -613,12 +613,12 @@ static void vc4_hdmi_encoder_post_crtc_disable(struct drm_encoder *encoder, HDMI_WRITE(HDMI_RAM_PACKET_CONFIG, 0); - HDMI_WRITE(HDMI_VID_CTL, HDMI_READ(HDMI_VID_CTL) | - VC4_HD_VID_CTL_CLRRGB | VC4_HD_VID_CTL_CLRSYNC); + HDMI_WRITE(HDMI_VID_CTL, HDMI_READ(HDMI_VID_CTL) | VC4_HD_VID_CTL_CLRRGB); - HDMI_WRITE(HDMI_VID_CTL, - HDMI_READ(HDMI_VID_CTL) | VC4_HD_VID_CTL_BLANKPIX); + mdelay(1); + HDMI_WRITE(HDMI_VID_CTL, + HDMI_READ(HDMI_VID_CTL) & ~VC4_HD_VID_CTL_ENABLE); vc4_hdmi_disable_scrambling(encoder); } @@ -628,12 +628,12 @@ static void vc4_hdmi_encoder_post_crtc_powerdown(struct drm_encoder *encoder, struct vc4_hdmi *vc4_hdmi = encoder_to_vc4_hdmi(encoder); int ret; + HDMI_WRITE(HDMI_VID_CTL, + HDMI_READ(HDMI_VID_CTL) | VC4_HD_VID_CTL_BLANKPIX); + if (vc4_hdmi->variant->phy_disable) vc4_hdmi->variant->phy_disable(vc4_hdmi); - HDMI_WRITE(HDMI_VID_CTL, - HDMI_READ(HDMI_VID_CTL) & ~VC4_HD_VID_CTL_ENABLE); - clk_disable_unprepare(vc4_hdmi->pixel_bvb_clock); clk_disable_unprepare(vc4_hdmi->pixel_clock); @@ -898,7 +898,9 @@ static void vc4_hdmi_encoder_pre_crtc_configure(struct drm_encoder *encoder, vc4_hdmi_encoder_get_connector_state(encoder, state); struct vc4_hdmi_connector_state *vc4_conn_state = conn_state_to_vc4_hdmi_conn_state(conn_state); - struct drm_display_mode *mode = &encoder->crtc->state->adjusted_mode; + struct drm_crtc_state *crtc_state = + drm_atomic_get_new_crtc_state(state, conn_state->crtc); + struct drm_display_mode *mode = &crtc_state->adjusted_mode; struct vc4_hdmi *vc4_hdmi = encoder_to_vc4_hdmi(encoder); unsigned long bvb_rate, pixel_rate, hsm_rate; int ret; @@ -983,7 +985,11 @@ static void vc4_hdmi_encoder_pre_crtc_configure(struct drm_encoder *encoder, static void vc4_hdmi_encoder_pre_crtc_enable(struct drm_encoder *encoder, struct drm_atomic_state *state) { - struct drm_display_mode *mode = &encoder->crtc->state->adjusted_mode; + struct drm_connector_state *conn_state = + vc4_hdmi_encoder_get_connector_state(encoder, state); + struct drm_crtc_state *crtc_state = + drm_atomic_get_new_crtc_state(state, conn_state->crtc); + struct drm_display_mode *mode = &crtc_state->adjusted_mode; struct vc4_hdmi_encoder *vc4_encoder = to_vc4_hdmi_encoder(encoder); struct vc4_hdmi *vc4_hdmi = encoder_to_vc4_hdmi(encoder); @@ -1006,7 +1012,11 @@ static void vc4_hdmi_encoder_pre_crtc_enable(struct drm_encoder *encoder, static void vc4_hdmi_encoder_post_crtc_enable(struct drm_encoder *encoder, struct drm_atomic_state *state) { - struct drm_display_mode *mode = &encoder->crtc->state->adjusted_mode; + struct drm_connector_state *conn_state = + vc4_hdmi_encoder_get_connector_state(encoder, state); + struct drm_crtc_state *crtc_state = + drm_atomic_get_new_crtc_state(state, conn_state->crtc); + struct drm_display_mode *mode = &crtc_state->adjusted_mode; struct vc4_hdmi *vc4_hdmi = encoder_to_vc4_hdmi(encoder); struct vc4_hdmi_encoder *vc4_encoder = to_vc4_hdmi_encoder(encoder); bool hsync_pos = mode->flags & DRM_MODE_FLAG_PHSYNC; @@ -1015,6 +1025,7 @@ static void vc4_hdmi_encoder_post_crtc_enable(struct drm_encoder *encoder, HDMI_WRITE(HDMI_VID_CTL, VC4_HD_VID_CTL_ENABLE | + VC4_HD_VID_CTL_CLRRGB | VC4_HD_VID_CTL_UNDERFLOW_ENABLE | VC4_HD_VID_CTL_FRAME_COUNTER_RESET | (vsync_pos ? 0 : VC4_HD_VID_CTL_VSYNC_LOW) | @@ -1173,12 +1184,13 @@ static u32 vc5_hdmi_channel_map(struct vc4_hdmi *vc4_hdmi, u32 channel_mask) } /* HDMI audio codec callbacks */ -static void vc4_hdmi_audio_set_mai_clock(struct vc4_hdmi *vc4_hdmi) +static void vc4_hdmi_audio_set_mai_clock(struct vc4_hdmi *vc4_hdmi, + unsigned int samplerate) { u32 hsm_clock = clk_get_rate(vc4_hdmi->audio_clock); unsigned long n, m; - rational_best_approximation(hsm_clock, vc4_hdmi->audio.samplerate, + rational_best_approximation(hsm_clock, samplerate, VC4_HD_MAI_SMP_N_MASK >> VC4_HD_MAI_SMP_N_SHIFT, (VC4_HD_MAI_SMP_M_MASK >> @@ -1190,12 +1202,11 @@ static void vc4_hdmi_audio_set_mai_clock(struct vc4_hdmi *vc4_hdmi) VC4_SET_FIELD(m - 1, VC4_HD_MAI_SMP_M)); } -static void vc4_hdmi_set_n_cts(struct vc4_hdmi *vc4_hdmi) +static void vc4_hdmi_set_n_cts(struct vc4_hdmi *vc4_hdmi, unsigned int samplerate) { - struct drm_encoder *encoder = &vc4_hdmi->encoder.base.base; - struct drm_crtc *crtc = encoder->crtc; + struct drm_connector *connector = &vc4_hdmi->connector; + struct drm_crtc *crtc = connector->state->crtc; const struct drm_display_mode *mode = &crtc->state->adjusted_mode; - u32 samplerate = vc4_hdmi->audio.samplerate; u32 n, cts; u64 tmp; @@ -1224,36 +1235,31 @@ static inline struct vc4_hdmi *dai_to_hdmi(struct snd_soc_dai *dai) return snd_soc_card_get_drvdata(card); } -static int vc4_hdmi_audio_startup(struct snd_pcm_substream *substream, - struct snd_soc_dai *dai) +static int vc4_hdmi_audio_startup(struct device *dev, void *data) { - struct vc4_hdmi *vc4_hdmi = dai_to_hdmi(dai); - struct drm_encoder *encoder = &vc4_hdmi->encoder.base.base; + struct vc4_hdmi *vc4_hdmi = dev_get_drvdata(dev); struct drm_connector *connector = &vc4_hdmi->connector; - int ret; - - if (vc4_hdmi->audio.substream && vc4_hdmi->audio.substream != substream) - return -EINVAL; - - vc4_hdmi->audio.substream = substream; /* * If the HDMI encoder hasn't probed, or the encoder is * currently in DVI mode, treat the codec dai as missing. */ - if (!encoder->crtc || !(HDMI_READ(HDMI_RAM_PACKET_CONFIG) & + if (!connector->state || !(HDMI_READ(HDMI_RAM_PACKET_CONFIG) & VC4_HDMI_RAM_PACKET_ENABLE)) return -ENODEV; - ret = snd_pcm_hw_constraint_eld(substream->runtime, connector->eld); - if (ret) - return ret; + vc4_hdmi->audio.streaming = true; - return 0; -} + HDMI_WRITE(HDMI_MAI_CTL, + VC4_HD_MAI_CTL_RESET | + VC4_HD_MAI_CTL_FLUSH | + VC4_HD_MAI_CTL_DLATE | + VC4_HD_MAI_CTL_ERRORE | + VC4_HD_MAI_CTL_ERRORF); + + if (vc4_hdmi->variant->phy_rng_enable) + vc4_hdmi->variant->phy_rng_enable(vc4_hdmi); -static int vc4_hdmi_audio_set_fmt(struct snd_soc_dai *dai, unsigned int fmt) -{ return 0; } @@ -1273,48 +1279,96 @@ static void vc4_hdmi_audio_reset(struct vc4_hdmi *vc4_hdmi) HDMI_WRITE(HDMI_MAI_CTL, VC4_HD_MAI_CTL_FLUSH); } -static void vc4_hdmi_audio_shutdown(struct snd_pcm_substream *substream, - struct snd_soc_dai *dai) +static void vc4_hdmi_audio_shutdown(struct device *dev, void *data) { - struct vc4_hdmi *vc4_hdmi = dai_to_hdmi(dai); + struct vc4_hdmi *vc4_hdmi = dev_get_drvdata(dev); - if (substream != vc4_hdmi->audio.substream) - return; + HDMI_WRITE(HDMI_MAI_CTL, + VC4_HD_MAI_CTL_DLATE | + VC4_HD_MAI_CTL_ERRORE | + VC4_HD_MAI_CTL_ERRORF); + + if (vc4_hdmi->variant->phy_rng_disable) + vc4_hdmi->variant->phy_rng_disable(vc4_hdmi); + vc4_hdmi->audio.streaming = false; vc4_hdmi_audio_reset(vc4_hdmi); +} - vc4_hdmi->audio.substream = NULL; +static int sample_rate_to_mai_fmt(int samplerate) +{ + switch (samplerate) { + case 8000: + return VC4_HDMI_MAI_SAMPLE_RATE_8000; + case 11025: + return VC4_HDMI_MAI_SAMPLE_RATE_11025; + case 12000: + return VC4_HDMI_MAI_SAMPLE_RATE_12000; + case 16000: + return VC4_HDMI_MAI_SAMPLE_RATE_16000; + case 22050: + return VC4_HDMI_MAI_SAMPLE_RATE_22050; + case 24000: + return VC4_HDMI_MAI_SAMPLE_RATE_24000; + case 32000: + return VC4_HDMI_MAI_SAMPLE_RATE_32000; + case 44100: + return VC4_HDMI_MAI_SAMPLE_RATE_44100; + case 48000: + return VC4_HDMI_MAI_SAMPLE_RATE_48000; + case 64000: + return VC4_HDMI_MAI_SAMPLE_RATE_64000; + case 88200: + return VC4_HDMI_MAI_SAMPLE_RATE_88200; + case 96000: + return VC4_HDMI_MAI_SAMPLE_RATE_96000; + case 128000: + return VC4_HDMI_MAI_SAMPLE_RATE_128000; + case 176400: + return VC4_HDMI_MAI_SAMPLE_RATE_176400; + case 192000: + return VC4_HDMI_MAI_SAMPLE_RATE_192000; + default: + return VC4_HDMI_MAI_SAMPLE_RATE_NOT_INDICATED; + } } /* HDMI audio codec callbacks */ -static int vc4_hdmi_audio_hw_params(struct snd_pcm_substream *substream, - struct snd_pcm_hw_params *params, - struct snd_soc_dai *dai) +static int vc4_hdmi_audio_prepare(struct device *dev, void *data, + struct hdmi_codec_daifmt *daifmt, + struct hdmi_codec_params *params) { - struct vc4_hdmi *vc4_hdmi = dai_to_hdmi(dai); + struct vc4_hdmi *vc4_hdmi = dev_get_drvdata(dev); struct drm_encoder *encoder = &vc4_hdmi->encoder.base.base; - struct device *dev = &vc4_hdmi->pdev->dev; + unsigned int sample_rate = params->sample_rate; + unsigned int channels = params->channels; u32 audio_packet_config, channel_mask; u32 channel_map; - - if (substream != vc4_hdmi->audio.substream) - return -EINVAL; + u32 mai_audio_format; + u32 mai_sample_rate; dev_dbg(dev, "%s: %u Hz, %d bit, %d channels\n", __func__, - params_rate(params), params_width(params), - params_channels(params)); - - vc4_hdmi->audio.channels = params_channels(params); - vc4_hdmi->audio.samplerate = params_rate(params); + sample_rate, params->sample_width, channels); HDMI_WRITE(HDMI_MAI_CTL, - VC4_HD_MAI_CTL_RESET | - VC4_HD_MAI_CTL_FLUSH | - VC4_HD_MAI_CTL_DLATE | - VC4_HD_MAI_CTL_ERRORE | - VC4_HD_MAI_CTL_ERRORF); + VC4_SET_FIELD(channels, VC4_HD_MAI_CTL_CHNUM) | + VC4_HD_MAI_CTL_WHOLSMP | + VC4_HD_MAI_CTL_CHALIGN | + VC4_HD_MAI_CTL_ENABLE); + + vc4_hdmi_audio_set_mai_clock(vc4_hdmi, sample_rate); - vc4_hdmi_audio_set_mai_clock(vc4_hdmi); + mai_sample_rate = sample_rate_to_mai_fmt(sample_rate); + if (params->iec.status[0] & IEC958_AES0_NONAUDIO && + params->channels == 8) + mai_audio_format = VC4_HDMI_MAI_FORMAT_HBR; + else + mai_audio_format = VC4_HDMI_MAI_FORMAT_PCM; + HDMI_WRITE(HDMI_MAI_FMT, + VC4_SET_FIELD(mai_sample_rate, + VC4_HDMI_MAI_FORMAT_SAMPLE_RATE) | + VC4_SET_FIELD(mai_audio_format, + VC4_HDMI_MAI_FORMAT_AUDIO_FORMAT)); /* The B frame identifier should match the value used by alsa-lib (8) */ audio_packet_config = @@ -1322,122 +1376,33 @@ static int vc4_hdmi_audio_hw_params(struct snd_pcm_substream *substream, VC4_HDMI_AUDIO_PACKET_ZERO_DATA_ON_INACTIVE_CHANNELS | VC4_SET_FIELD(0x8, VC4_HDMI_AUDIO_PACKET_B_FRAME_IDENTIFIER); - channel_mask = GENMASK(vc4_hdmi->audio.channels - 1, 0); + channel_mask = GENMASK(channels - 1, 0); audio_packet_config |= VC4_SET_FIELD(channel_mask, VC4_HDMI_AUDIO_PACKET_CEA_MASK); - /* Set the MAI threshold. This logic mimics the firmware's. */ - if (vc4_hdmi->audio.samplerate > 96000) { - HDMI_WRITE(HDMI_MAI_THR, - VC4_SET_FIELD(0x12, VC4_HD_MAI_THR_DREQHIGH) | - VC4_SET_FIELD(0x12, VC4_HD_MAI_THR_DREQLOW)); - } else if (vc4_hdmi->audio.samplerate > 48000) { - HDMI_WRITE(HDMI_MAI_THR, - VC4_SET_FIELD(0x14, VC4_HD_MAI_THR_DREQHIGH) | - VC4_SET_FIELD(0x12, VC4_HD_MAI_THR_DREQLOW)); - } else { - HDMI_WRITE(HDMI_MAI_THR, - VC4_SET_FIELD(0x10, VC4_HD_MAI_THR_PANICHIGH) | - VC4_SET_FIELD(0x10, VC4_HD_MAI_THR_PANICLOW) | - VC4_SET_FIELD(0x10, VC4_HD_MAI_THR_DREQHIGH) | - VC4_SET_FIELD(0x10, VC4_HD_MAI_THR_DREQLOW)); - } + /* Set the MAI threshold */ + HDMI_WRITE(HDMI_MAI_THR, + VC4_SET_FIELD(0x10, VC4_HD_MAI_THR_PANICHIGH) | + VC4_SET_FIELD(0x10, VC4_HD_MAI_THR_PANICLOW) | + VC4_SET_FIELD(0x10, VC4_HD_MAI_THR_DREQHIGH) | + VC4_SET_FIELD(0x10, VC4_HD_MAI_THR_DREQLOW)); HDMI_WRITE(HDMI_MAI_CONFIG, VC4_HDMI_MAI_CONFIG_BIT_REVERSE | + VC4_HDMI_MAI_CONFIG_FORMAT_REVERSE | VC4_SET_FIELD(channel_mask, VC4_HDMI_MAI_CHANNEL_MASK)); channel_map = vc4_hdmi->variant->channel_map(vc4_hdmi, channel_mask); HDMI_WRITE(HDMI_MAI_CHANNEL_MAP, channel_map); HDMI_WRITE(HDMI_AUDIO_PACKET_CONFIG, audio_packet_config); - vc4_hdmi_set_n_cts(vc4_hdmi); + vc4_hdmi_set_n_cts(vc4_hdmi, sample_rate); + memcpy(&vc4_hdmi->audio.infoframe, ¶ms->cea, sizeof(params->cea)); vc4_hdmi_set_audio_infoframe(encoder); return 0; } -static int vc4_hdmi_audio_trigger(struct snd_pcm_substream *substream, int cmd, - struct snd_soc_dai *dai) -{ - struct vc4_hdmi *vc4_hdmi = dai_to_hdmi(dai); - - switch (cmd) { - case SNDRV_PCM_TRIGGER_START: - vc4_hdmi->audio.streaming = true; - - if (vc4_hdmi->variant->phy_rng_enable) - vc4_hdmi->variant->phy_rng_enable(vc4_hdmi); - - HDMI_WRITE(HDMI_MAI_CTL, - VC4_SET_FIELD(vc4_hdmi->audio.channels, - VC4_HD_MAI_CTL_CHNUM) | - VC4_HD_MAI_CTL_ENABLE); - break; - case SNDRV_PCM_TRIGGER_STOP: - HDMI_WRITE(HDMI_MAI_CTL, - VC4_HD_MAI_CTL_DLATE | - VC4_HD_MAI_CTL_ERRORE | - VC4_HD_MAI_CTL_ERRORF); - - if (vc4_hdmi->variant->phy_rng_disable) - vc4_hdmi->variant->phy_rng_disable(vc4_hdmi); - - vc4_hdmi->audio.streaming = false; - - break; - default: - break; - } - - return 0; -} - -static inline struct vc4_hdmi * -snd_component_to_hdmi(struct snd_soc_component *component) -{ - struct snd_soc_card *card = snd_soc_component_get_drvdata(component); - - return snd_soc_card_get_drvdata(card); -} - -static int vc4_hdmi_audio_eld_ctl_info(struct snd_kcontrol *kcontrol, - struct snd_ctl_elem_info *uinfo) -{ - struct snd_soc_component *component = snd_kcontrol_chip(kcontrol); - struct vc4_hdmi *vc4_hdmi = snd_component_to_hdmi(component); - struct drm_connector *connector = &vc4_hdmi->connector; - - uinfo->type = SNDRV_CTL_ELEM_TYPE_BYTES; - uinfo->count = sizeof(connector->eld); - - return 0; -} - -static int vc4_hdmi_audio_eld_ctl_get(struct snd_kcontrol *kcontrol, - struct snd_ctl_elem_value *ucontrol) -{ - struct snd_soc_component *component = snd_kcontrol_chip(kcontrol); - struct vc4_hdmi *vc4_hdmi = snd_component_to_hdmi(component); - struct drm_connector *connector = &vc4_hdmi->connector; - - memcpy(ucontrol->value.bytes.data, connector->eld, - sizeof(connector->eld)); - - return 0; -} - -static const struct snd_kcontrol_new vc4_hdmi_audio_controls[] = { - { - .access = SNDRV_CTL_ELEM_ACCESS_READ | - SNDRV_CTL_ELEM_ACCESS_VOLATILE, - .iface = SNDRV_CTL_ELEM_IFACE_PCM, - .name = "ELD", - .info = vc4_hdmi_audio_eld_ctl_info, - .get = vc4_hdmi_audio_eld_ctl_get, - }, -}; - static const struct snd_soc_dapm_widget vc4_hdmi_audio_widgets[] = { SND_SOC_DAPM_OUTPUT("TX"), }; @@ -1446,42 +1411,6 @@ static const struct snd_soc_dapm_route vc4_hdmi_audio_routes[] = { { "TX", NULL, "Playback" }, }; -static const struct snd_soc_component_driver vc4_hdmi_audio_component_drv = { - .name = "vc4-hdmi-codec-dai-component", - .controls = vc4_hdmi_audio_controls, - .num_controls = ARRAY_SIZE(vc4_hdmi_audio_controls), - .dapm_widgets = vc4_hdmi_audio_widgets, - .num_dapm_widgets = ARRAY_SIZE(vc4_hdmi_audio_widgets), - .dapm_routes = vc4_hdmi_audio_routes, - .num_dapm_routes = ARRAY_SIZE(vc4_hdmi_audio_routes), - .idle_bias_on = 1, - .use_pmdown_time = 1, - .endianness = 1, - .non_legacy_dai_naming = 1, -}; - -static const struct snd_soc_dai_ops vc4_hdmi_audio_dai_ops = { - .startup = vc4_hdmi_audio_startup, - .shutdown = vc4_hdmi_audio_shutdown, - .hw_params = vc4_hdmi_audio_hw_params, - .set_fmt = vc4_hdmi_audio_set_fmt, - .trigger = vc4_hdmi_audio_trigger, -}; - -static struct snd_soc_dai_driver vc4_hdmi_audio_codec_dai_drv = { - .name = "vc4-hdmi-hifi", - .playback = { - .stream_name = "Playback", - .channels_min = 2, - .channels_max = 8, - .rates = SNDRV_PCM_RATE_32000 | SNDRV_PCM_RATE_44100 | - SNDRV_PCM_RATE_48000 | SNDRV_PCM_RATE_88200 | - SNDRV_PCM_RATE_96000 | SNDRV_PCM_RATE_176400 | - SNDRV_PCM_RATE_192000, - .formats = SNDRV_PCM_FMTBIT_IEC958_SUBFRAME_LE, - }, -}; - static const struct snd_soc_component_driver vc4_hdmi_audio_cpu_dai_comp = { .name = "vc4-hdmi-cpu-dai-component", }; @@ -1508,7 +1437,6 @@ static struct snd_soc_dai_driver vc4_hdmi_audio_cpu_dai_drv = { SNDRV_PCM_RATE_192000, .formats = SNDRV_PCM_FMTBIT_IEC958_SUBFRAME_LE, }, - .ops = &vc4_hdmi_audio_dai_ops, }; static const struct snd_dmaengine_pcm_config pcm_conf = { @@ -1516,6 +1444,30 @@ static const struct snd_dmaengine_pcm_config pcm_conf = { .prepare_slave_config = snd_dmaengine_pcm_prepare_slave_config, }; +static int vc4_hdmi_audio_get_eld(struct device *dev, void *data, + uint8_t *buf, size_t len) +{ + struct vc4_hdmi *vc4_hdmi = dev_get_drvdata(dev); + struct drm_connector *connector = &vc4_hdmi->connector; + + memcpy(buf, connector->eld, min(sizeof(connector->eld), len)); + + return 0; +} + +static const struct hdmi_codec_ops vc4_hdmi_codec_ops = { + .get_eld = vc4_hdmi_audio_get_eld, + .prepare = vc4_hdmi_audio_prepare, + .audio_shutdown = vc4_hdmi_audio_shutdown, + .audio_startup = vc4_hdmi_audio_startup, +}; + +struct hdmi_codec_pdata vc4_hdmi_codec_pdata = { + .ops = &vc4_hdmi_codec_ops, + .max_i2s_channels = 8, + .i2s = 1, +}; + static int vc4_hdmi_audio_init(struct vc4_hdmi *vc4_hdmi) { const struct vc4_hdmi_register *mai_data = @@ -1523,6 +1475,7 @@ static int vc4_hdmi_audio_init(struct vc4_hdmi *vc4_hdmi) struct snd_soc_dai_link *dai_link = &vc4_hdmi->audio.link; struct snd_soc_card *card = &vc4_hdmi->audio.card; struct device *dev = &vc4_hdmi->pdev->dev; + struct platform_device *codec_pdev; const __be32 *addr; int index; int ret; @@ -1569,12 +1522,13 @@ static int vc4_hdmi_audio_init(struct vc4_hdmi *vc4_hdmi) return ret; } - /* register component and codec dai */ - ret = devm_snd_soc_register_component(dev, &vc4_hdmi_audio_component_drv, - &vc4_hdmi_audio_codec_dai_drv, 1); - if (ret) { - dev_err(dev, "Could not register component: %d\n", ret); - return ret; + codec_pdev = platform_device_register_data(dev, HDMI_CODEC_DRV_NAME, + PLATFORM_DEVID_AUTO, + &vc4_hdmi_codec_pdata, + sizeof(vc4_hdmi_codec_pdata)); + if (IS_ERR(codec_pdev)) { + dev_err(dev, "Couldn't register the HDMI codec: %ld\n", PTR_ERR(codec_pdev)); + return PTR_ERR(codec_pdev); } dai_link->cpus = &vc4_hdmi->audio.cpu; @@ -1587,9 +1541,9 @@ static int vc4_hdmi_audio_init(struct vc4_hdmi *vc4_hdmi) dai_link->name = "MAI"; dai_link->stream_name = "MAI PCM"; - dai_link->codecs->dai_name = vc4_hdmi_audio_codec_dai_drv.name; + dai_link->codecs->dai_name = "i2s-hifi"; dai_link->cpus->dai_name = dev_name(dev); - dai_link->codecs->name = dev_name(dev); + dai_link->codecs->name = dev_name(&codec_pdev->dev); dai_link->platforms->name = dev_name(dev); card->dai_link = dai_link; @@ -1609,12 +1563,65 @@ static int vc4_hdmi_audio_init(struct vc4_hdmi *vc4_hdmi) snd_soc_card_set_drvdata(card, vc4_hdmi); ret = devm_snd_soc_register_card(dev, card); if (ret) - dev_err(dev, "Could not register sound card: %d\n", ret); + dev_err_probe(dev, ret, "Could not register sound card\n"); return ret; } +static irqreturn_t vc4_hdmi_hpd_irq_thread(int irq, void *priv) +{ + struct vc4_hdmi *vc4_hdmi = priv; + struct drm_device *dev = vc4_hdmi->connector.dev; + + if (dev && dev->registered) + drm_kms_helper_hotplug_event(dev); + + return IRQ_HANDLED; +} + +static int vc4_hdmi_hotplug_init(struct vc4_hdmi *vc4_hdmi) +{ + struct drm_connector *connector = &vc4_hdmi->connector; + struct platform_device *pdev = vc4_hdmi->pdev; + int ret; + + if (vc4_hdmi->variant->external_irq_controller) { + unsigned int hpd_con = platform_get_irq_byname(pdev, "hpd-connected"); + unsigned int hpd_rm = platform_get_irq_byname(pdev, "hpd-removed"); + + ret = request_threaded_irq(hpd_con, + NULL, + vc4_hdmi_hpd_irq_thread, IRQF_ONESHOT, + "vc4 hdmi hpd connected", vc4_hdmi); + if (ret) + return ret; + + ret = request_threaded_irq(hpd_rm, + NULL, + vc4_hdmi_hpd_irq_thread, IRQF_ONESHOT, + "vc4 hdmi hpd disconnected", vc4_hdmi); + if (ret) { + free_irq(hpd_con, vc4_hdmi); + return ret; + } + + connector->polled = DRM_CONNECTOR_POLL_HPD; + } + + return 0; +} + +static void vc4_hdmi_hotplug_exit(struct vc4_hdmi *vc4_hdmi) +{ + struct platform_device *pdev = vc4_hdmi->pdev; + + if (vc4_hdmi->variant->external_irq_controller) { + free_irq(platform_get_irq_byname(pdev, "hpd-connected"), vc4_hdmi); + free_irq(platform_get_irq_byname(pdev, "hpd-removed"), vc4_hdmi); + } +} + #ifdef CONFIG_DRM_VC4_HDMI_CEC static irqreturn_t vc4_cec_irq_handler_rx_thread(int irq, void *priv) { @@ -2213,10 +2220,14 @@ static int vc4_hdmi_bind(struct device *dev, struct device *master, void *data) if (ret) goto err_destroy_encoder; - ret = vc4_hdmi_cec_init(vc4_hdmi); + ret = vc4_hdmi_hotplug_init(vc4_hdmi); if (ret) goto err_destroy_conn; + ret = vc4_hdmi_cec_init(vc4_hdmi); + if (ret) + goto err_free_hotplug; + ret = vc4_hdmi_audio_init(vc4_hdmi); if (ret) goto err_free_cec; @@ -2229,6 +2240,8 @@ static int vc4_hdmi_bind(struct device *dev, struct device *master, void *data) err_free_cec: vc4_hdmi_cec_exit(vc4_hdmi); +err_free_hotplug: + vc4_hdmi_hotplug_exit(vc4_hdmi); err_destroy_conn: vc4_hdmi_connector_destroy(&vc4_hdmi->connector); err_destroy_encoder: @@ -2270,6 +2283,7 @@ static void vc4_hdmi_unbind(struct device *dev, struct device *master, kfree(vc4_hdmi->hd_regset.regs); vc4_hdmi_cec_exit(vc4_hdmi); + vc4_hdmi_hotplug_exit(vc4_hdmi); vc4_hdmi_connector_destroy(&vc4_hdmi->connector); drm_encoder_cleanup(&vc4_hdmi->encoder.base.base); diff --git a/drivers/gpu/drm/vc4/vc4_hdmi.h b/drivers/gpu/drm/vc4/vc4_hdmi.h index 884d245507a9..33e9f665ab8e 100644 --- a/drivers/gpu/drm/vc4/vc4_hdmi.h +++ b/drivers/gpu/drm/vc4/vc4_hdmi.h @@ -111,11 +111,8 @@ struct vc4_hdmi_audio { struct snd_soc_dai_link_component cpu; struct snd_soc_dai_link_component codec; struct snd_soc_dai_link_component platform; - int samplerate; - int channels; struct snd_dmaengine_dai_dma_data dma_data; - struct snd_pcm_substream *substream; - + struct hdmi_audio_infoframe infoframe; bool streaming; }; diff --git a/drivers/gpu/drm/vc4/vc4_irq.c b/drivers/gpu/drm/vc4/vc4_irq.c index e226c24e543f..20fa8e34c20b 100644 --- a/drivers/gpu/drm/vc4/vc4_irq.c +++ b/drivers/gpu/drm/vc4/vc4_irq.c @@ -45,6 +45,10 @@ * current job can make progress. */ +#include <linux/platform_device.h> + +#include <drm/drm_drv.h> + #include "vc4_drv.h" #include "vc4_regs.h" @@ -192,7 +196,7 @@ vc4_irq_finish_render_job(struct drm_device *dev) schedule_work(&vc4->job_done_work); } -irqreturn_t +static irqreturn_t vc4_irq(int irq, void *arg) { struct drm_device *dev = arg; @@ -234,8 +238,8 @@ vc4_irq(int irq, void *arg) return status; } -void -vc4_irq_preinstall(struct drm_device *dev) +static void +vc4_irq_prepare(struct drm_device *dev) { struct vc4_dev *vc4 = to_vc4_dev(dev); @@ -251,24 +255,22 @@ vc4_irq_preinstall(struct drm_device *dev) V3D_WRITE(V3D_INTCTL, V3D_DRIVER_IRQS); } -int -vc4_irq_postinstall(struct drm_device *dev) +void +vc4_irq_enable(struct drm_device *dev) { struct vc4_dev *vc4 = to_vc4_dev(dev); if (!vc4->v3d) - return 0; + return; /* Enable the render done interrupts. The out-of-memory interrupt is * enabled as soon as we have a binner BO allocated. */ V3D_WRITE(V3D_INTENA, V3D_INT_FLDONE | V3D_INT_FRDONE); - - return 0; } void -vc4_irq_uninstall(struct drm_device *dev) +vc4_irq_disable(struct drm_device *dev) { struct vc4_dev *vc4 = to_vc4_dev(dev); @@ -282,11 +284,37 @@ vc4_irq_uninstall(struct drm_device *dev) V3D_WRITE(V3D_INTCTL, V3D_DRIVER_IRQS); /* Finish any interrupt handler still in flight. */ - disable_irq(dev->irq); + disable_irq(vc4->irq); cancel_work_sync(&vc4->overflow_mem_work); } +int vc4_irq_install(struct drm_device *dev, int irq) +{ + int ret; + + if (irq == IRQ_NOTCONNECTED) + return -ENOTCONN; + + vc4_irq_prepare(dev); + + ret = request_irq(irq, vc4_irq, 0, dev->driver->name, dev); + if (ret) + return ret; + + vc4_irq_enable(dev); + + return 0; +} + +void vc4_irq_uninstall(struct drm_device *dev) +{ + struct vc4_dev *vc4 = to_vc4_dev(dev); + + vc4_irq_disable(dev); + free_irq(vc4->irq, dev); +} + /** Reinitializes interrupt registers when a GPU reset is performed. */ void vc4_irq_reset(struct drm_device *dev) { diff --git a/drivers/gpu/drm/vc4/vc4_kms.c b/drivers/gpu/drm/vc4/vc4_kms.c index 6a1a9e1d72ce..f0b3e4cf5bce 100644 --- a/drivers/gpu/drm/vc4/vc4_kms.c +++ b/drivers/gpu/drm/vc4/vc4_kms.c @@ -880,7 +880,6 @@ int vc4_kms_load(struct drm_device *dev) /* Set support for vblank irq fast disable, before drm_vblank_init() */ dev->vblank_disable_immediate = true; - dev->irq_enabled = true; ret = drm_vblank_init(dev, dev->mode_config.num_crtc); if (ret < 0) { dev_err(dev->dev, "failed to initialize vblank\n"); diff --git a/drivers/gpu/drm/vc4/vc4_regs.h b/drivers/gpu/drm/vc4/vc4_regs.h index be2c32a519b3..489f921ef44d 100644 --- a/drivers/gpu/drm/vc4/vc4_regs.h +++ b/drivers/gpu/drm/vc4/vc4_regs.h @@ -516,6 +516,36 @@ # define VC4_HDMI_AUDIO_PACKET_CEA_MASK_MASK VC4_MASK(7, 0) # define VC4_HDMI_AUDIO_PACKET_CEA_MASK_SHIFT 0 +# define VC4_HDMI_MAI_FORMAT_AUDIO_FORMAT_MASK VC4_MASK(23, 16) +# define VC4_HDMI_MAI_FORMAT_AUDIO_FORMAT_SHIFT 16 + +enum { + VC4_HDMI_MAI_FORMAT_PCM = 2, + VC4_HDMI_MAI_FORMAT_HBR = 200, +}; + +# define VC4_HDMI_MAI_FORMAT_SAMPLE_RATE_MASK VC4_MASK(15, 8) +# define VC4_HDMI_MAI_FORMAT_SAMPLE_RATE_SHIFT 8 + +enum { + VC4_HDMI_MAI_SAMPLE_RATE_NOT_INDICATED = 0, + VC4_HDMI_MAI_SAMPLE_RATE_8000 = 1, + VC4_HDMI_MAI_SAMPLE_RATE_11025 = 2, + VC4_HDMI_MAI_SAMPLE_RATE_12000 = 3, + VC4_HDMI_MAI_SAMPLE_RATE_16000 = 4, + VC4_HDMI_MAI_SAMPLE_RATE_22050 = 5, + VC4_HDMI_MAI_SAMPLE_RATE_24000 = 6, + VC4_HDMI_MAI_SAMPLE_RATE_32000 = 7, + VC4_HDMI_MAI_SAMPLE_RATE_44100 = 8, + VC4_HDMI_MAI_SAMPLE_RATE_48000 = 9, + VC4_HDMI_MAI_SAMPLE_RATE_64000 = 10, + VC4_HDMI_MAI_SAMPLE_RATE_88200 = 11, + VC4_HDMI_MAI_SAMPLE_RATE_96000 = 12, + VC4_HDMI_MAI_SAMPLE_RATE_128000 = 13, + VC4_HDMI_MAI_SAMPLE_RATE_176400 = 14, + VC4_HDMI_MAI_SAMPLE_RATE_192000 = 15, +}; + # define VC4_HDMI_RAM_PACKET_ENABLE BIT(16) /* When set, the CTS_PERIOD counts based on MAI bus sync pulse instead diff --git a/drivers/gpu/drm/vc4/vc4_v3d.c b/drivers/gpu/drm/vc4/vc4_v3d.c index 73d63d72575b..7bb3067f8425 100644 --- a/drivers/gpu/drm/vc4/vc4_v3d.c +++ b/drivers/gpu/drm/vc4/vc4_v3d.c @@ -10,8 +10,6 @@ #include <linux/platform_device.h> #include <linux/pm_runtime.h> -#include <drm/drm_irq.h> - #include "vc4_drv.h" #include "vc4_regs.h" @@ -361,7 +359,7 @@ static int vc4_v3d_runtime_suspend(struct device *dev) struct vc4_v3d *v3d = dev_get_drvdata(dev); struct vc4_dev *vc4 = v3d->vc4; - vc4_irq_uninstall(&vc4->base); + vc4_irq_disable(&vc4->base); clk_disable_unprepare(v3d->clk); @@ -381,8 +379,8 @@ static int vc4_v3d_runtime_resume(struct device *dev) vc4_v3d_init_hw(&vc4->base); /* We disabled the IRQ as part of vc4_irq_uninstall in suspend. */ - enable_irq(vc4->base.irq); - vc4_irq_postinstall(&vc4->base); + enable_irq(vc4->irq); + vc4_irq_enable(&vc4->base); return 0; } @@ -448,7 +446,12 @@ static int vc4_v3d_bind(struct device *dev, struct device *master, void *data) vc4_v3d_init_hw(drm); - ret = drm_irq_install(drm, platform_get_irq(pdev, 0)); + ret = platform_get_irq(pdev, 0); + if (ret < 0) + return ret; + vc4->irq = ret; + + ret = vc4_irq_install(drm, vc4->irq); if (ret) { DRM_ERROR("Failed to install IRQ handler\n"); return ret; @@ -473,7 +476,7 @@ static void vc4_v3d_unbind(struct device *dev, struct device *master, pm_runtime_disable(dev); - drm_irq_uninstall(drm); + vc4_irq_uninstall(drm); /* Disable the binner's overflow memory address, so the next * driver probe (if any) doesn't try to reuse our old diff --git a/drivers/gpu/drm/via/via_mm.c b/drivers/gpu/drm/via/via_mm.c index dae1bacd86c1..c9afa1a51f23 100644 --- a/drivers/gpu/drm/via/via_mm.c +++ b/drivers/gpu/drm/via/via_mm.c @@ -29,7 +29,6 @@ #include <drm/drm_device.h> #include <drm/drm_file.h> -#include <drm/drm_irq.h> #include <drm/via_drm.h> #include "via_drv.h" @@ -86,7 +85,7 @@ int via_final_context(struct drm_device *dev, int context) /* Last context, perform cleanup */ if (list_is_singular(&dev->ctxlist)) { DRM_DEBUG("Last Context\n"); - drm_irq_uninstall(dev); + drm_legacy_irq_uninstall(dev); via_cleanup_futex(dev_priv); via_do_cleanup_map(dev); } diff --git a/drivers/gpu/drm/virtio/virtgpu_drv.c b/drivers/gpu/drm/virtio/virtgpu_drv.c index ca77edbc5ea0..ed85a7863256 100644 --- a/drivers/gpu/drm/virtio/virtgpu_drv.c +++ b/drivers/gpu/drm/virtio/virtgpu_drv.c @@ -57,7 +57,7 @@ static int virtio_gpu_pci_quirk(struct drm_device *dev, struct virtio_device *vd vga ? "virtio-vga" : "virtio-gpu-pci", pname); if (vga) { - ret = drm_aperture_remove_conflicting_pci_framebuffers(pdev, "virtiodrmfb"); + ret = drm_aperture_remove_conflicting_pci_framebuffers(pdev, &driver); if (ret) return ret; } diff --git a/drivers/gpu/drm/virtio/virtgpu_drv.h b/drivers/gpu/drm/virtio/virtgpu_drv.h index d9dbc4f258f3..d4e610a44e12 100644 --- a/drivers/gpu/drm/virtio/virtgpu_drv.h +++ b/drivers/gpu/drm/virtio/virtgpu_drv.h @@ -315,7 +315,9 @@ void virtio_gpu_cmd_transfer_to_host_2d(struct virtio_gpu_device *vgdev, void virtio_gpu_cmd_resource_flush(struct virtio_gpu_device *vgdev, uint32_t resource_id, uint32_t x, uint32_t y, - uint32_t width, uint32_t height); + uint32_t width, uint32_t height, + struct virtio_gpu_object_array *objs, + struct virtio_gpu_fence *fence); void virtio_gpu_cmd_set_scanout(struct virtio_gpu_device *vgdev, uint32_t scanout_id, uint32_t resource_id, uint32_t width, uint32_t height, diff --git a/drivers/gpu/drm/virtio/virtgpu_plane.c b/drivers/gpu/drm/virtio/virtgpu_plane.c index 4e1b17548007..a49fd9480381 100644 --- a/drivers/gpu/drm/virtio/virtgpu_plane.c +++ b/drivers/gpu/drm/virtio/virtgpu_plane.c @@ -129,6 +129,40 @@ static void virtio_gpu_update_dumb_bo(struct virtio_gpu_device *vgdev, objs, NULL); } +static void virtio_gpu_resource_flush(struct drm_plane *plane, + uint32_t x, uint32_t y, + uint32_t width, uint32_t height) +{ + struct drm_device *dev = plane->dev; + struct virtio_gpu_device *vgdev = dev->dev_private; + struct virtio_gpu_framebuffer *vgfb; + struct virtio_gpu_object *bo; + + vgfb = to_virtio_gpu_framebuffer(plane->state->fb); + bo = gem_to_virtio_gpu_obj(vgfb->base.obj[0]); + if (vgfb->fence) { + struct virtio_gpu_object_array *objs; + + objs = virtio_gpu_array_alloc(1); + if (!objs) + return; + virtio_gpu_array_add_obj(objs, vgfb->base.obj[0]); + virtio_gpu_array_lock_resv(objs); + virtio_gpu_cmd_resource_flush(vgdev, bo->hw_res_handle, x, y, + width, height, objs, vgfb->fence); + virtio_gpu_notify(vgdev); + + dma_fence_wait_timeout(&vgfb->fence->f, true, + msecs_to_jiffies(50)); + dma_fence_put(&vgfb->fence->f); + vgfb->fence = NULL; + } else { + virtio_gpu_cmd_resource_flush(vgdev, bo->hw_res_handle, x, y, + width, height, NULL, NULL); + virtio_gpu_notify(vgdev); + } +} + static void virtio_gpu_primary_plane_update(struct drm_plane *plane, struct drm_atomic_state *state) { @@ -198,16 +232,15 @@ static void virtio_gpu_primary_plane_update(struct drm_plane *plane, } } - virtio_gpu_cmd_resource_flush(vgdev, bo->hw_res_handle, - rect.x1, - rect.y1, - rect.x2 - rect.x1, - rect.y2 - rect.y1); - virtio_gpu_notify(vgdev); + virtio_gpu_resource_flush(plane, + rect.x1, + rect.y1, + rect.x2 - rect.x1, + rect.y2 - rect.y1); } -static int virtio_gpu_cursor_prepare_fb(struct drm_plane *plane, - struct drm_plane_state *new_state) +static int virtio_gpu_plane_prepare_fb(struct drm_plane *plane, + struct drm_plane_state *new_state) { struct drm_device *dev = plane->dev; struct virtio_gpu_device *vgdev = dev->dev_private; @@ -219,7 +252,10 @@ static int virtio_gpu_cursor_prepare_fb(struct drm_plane *plane, vgfb = to_virtio_gpu_framebuffer(new_state->fb); bo = gem_to_virtio_gpu_obj(vgfb->base.obj[0]); - if (bo && bo->dumb && (plane->state->fb != new_state->fb)) { + if (!bo || (plane->type == DRM_PLANE_TYPE_PRIMARY && !bo->guest_blob)) + return 0; + + if (bo->dumb && (plane->state->fb != new_state->fb)) { vgfb->fence = virtio_gpu_fence_alloc(vgdev); if (!vgfb->fence) return -ENOMEM; @@ -228,8 +264,8 @@ static int virtio_gpu_cursor_prepare_fb(struct drm_plane *plane, return 0; } -static void virtio_gpu_cursor_cleanup_fb(struct drm_plane *plane, - struct drm_plane_state *old_state) +static void virtio_gpu_plane_cleanup_fb(struct drm_plane *plane, + struct drm_plane_state *old_state) { struct virtio_gpu_framebuffer *vgfb; @@ -321,13 +357,15 @@ static void virtio_gpu_cursor_plane_update(struct drm_plane *plane, } static const struct drm_plane_helper_funcs virtio_gpu_primary_helper_funcs = { + .prepare_fb = virtio_gpu_plane_prepare_fb, + .cleanup_fb = virtio_gpu_plane_cleanup_fb, .atomic_check = virtio_gpu_plane_atomic_check, .atomic_update = virtio_gpu_primary_plane_update, }; static const struct drm_plane_helper_funcs virtio_gpu_cursor_helper_funcs = { - .prepare_fb = virtio_gpu_cursor_prepare_fb, - .cleanup_fb = virtio_gpu_cursor_cleanup_fb, + .prepare_fb = virtio_gpu_plane_prepare_fb, + .cleanup_fb = virtio_gpu_plane_cleanup_fb, .atomic_check = virtio_gpu_plane_atomic_check, .atomic_update = virtio_gpu_cursor_plane_update, }; diff --git a/drivers/gpu/drm/virtio/virtgpu_prime.c b/drivers/gpu/drm/virtio/virtgpu_prime.c index 807a27a16365..e45dbf14b307 100644 --- a/drivers/gpu/drm/virtio/virtgpu_prime.c +++ b/drivers/gpu/drm/virtio/virtgpu_prime.c @@ -98,6 +98,8 @@ struct dma_buf *virtgpu_gem_prime_export(struct drm_gem_object *obj, } else { bo->uuid_state = STATE_ERR; } + } else if (!(bo->blob_flags & VIRTGPU_BLOB_FLAG_USE_CROSS_DEVICE)) { + bo->uuid_state = STATE_ERR; } exp_info.ops = &virtgpu_dmabuf_ops.ops; diff --git a/drivers/gpu/drm/virtio/virtgpu_vq.c b/drivers/gpu/drm/virtio/virtgpu_vq.c index cf84d382dd41..2e71e91278b4 100644 --- a/drivers/gpu/drm/virtio/virtgpu_vq.c +++ b/drivers/gpu/drm/virtio/virtgpu_vq.c @@ -576,13 +576,16 @@ void virtio_gpu_cmd_set_scanout(struct virtio_gpu_device *vgdev, void virtio_gpu_cmd_resource_flush(struct virtio_gpu_device *vgdev, uint32_t resource_id, uint32_t x, uint32_t y, - uint32_t width, uint32_t height) + uint32_t width, uint32_t height, + struct virtio_gpu_object_array *objs, + struct virtio_gpu_fence *fence) { struct virtio_gpu_resource_flush *cmd_p; struct virtio_gpu_vbuffer *vbuf; cmd_p = virtio_gpu_alloc_cmd(vgdev, &vbuf, sizeof(*cmd_p)); memset(cmd_p, 0, sizeof(*cmd_p)); + vbuf->objs = objs; cmd_p->hdr.type = cpu_to_le32(VIRTIO_GPU_CMD_RESOURCE_FLUSH); cmd_p->resource_id = cpu_to_le32(resource_id); @@ -591,7 +594,7 @@ void virtio_gpu_cmd_resource_flush(struct virtio_gpu_device *vgdev, cmd_p->r.x = cpu_to_le32(x); cmd_p->r.y = cpu_to_le32(y); - virtio_gpu_queue_ctrl_buffer(vgdev, vbuf); + virtio_gpu_queue_fenced_ctrl_buffer(vgdev, vbuf, fence); } void virtio_gpu_cmd_transfer_to_host_2d(struct virtio_gpu_device *vgdev, diff --git a/drivers/gpu/drm/vkms/vkms_composer.c b/drivers/gpu/drm/vkms/vkms_composer.c index e49523866e1d..9e8204be9a14 100644 --- a/drivers/gpu/drm/vkms/vkms_composer.c +++ b/drivers/gpu/drm/vkms/vkms_composer.c @@ -6,7 +6,6 @@ #include <drm/drm_atomic_helper.h> #include <drm/drm_fourcc.h> #include <drm/drm_gem_framebuffer_helper.h> -#include <drm/drm_gem_shmem_helper.h> #include <drm/drm_vblank.h> #include "vkms_drv.h" @@ -154,24 +153,21 @@ static void compose_plane(struct vkms_composer *primary_composer, struct vkms_composer *plane_composer, void *vaddr_out) { - struct drm_gem_object *plane_obj; - struct drm_gem_shmem_object *plane_shmem_obj; struct drm_framebuffer *fb = &plane_composer->fb; + void *vaddr; void (*pixel_blend)(const u8 *p_src, u8 *p_dst); - plane_obj = drm_gem_fb_get_obj(&plane_composer->fb, 0); - plane_shmem_obj = to_drm_gem_shmem_obj(plane_obj); - - if (WARN_ON(!plane_shmem_obj->vaddr)) + if (WARN_ON(dma_buf_map_is_null(&primary_composer->map[0]))) return; + vaddr = plane_composer->map[0].vaddr; + if (fb->format->format == DRM_FORMAT_ARGB8888) pixel_blend = &alpha_blend; else pixel_blend = &x_blend; - blend(vaddr_out, plane_shmem_obj->vaddr, primary_composer, - plane_composer, pixel_blend); + blend(vaddr_out, vaddr, primary_composer, plane_composer, pixel_blend); } static int compose_active_planes(void **vaddr_out, @@ -180,21 +176,23 @@ static int compose_active_planes(void **vaddr_out, { struct drm_framebuffer *fb = &primary_composer->fb; struct drm_gem_object *gem_obj = drm_gem_fb_get_obj(fb, 0); - struct drm_gem_shmem_object *shmem_obj = to_drm_gem_shmem_obj(gem_obj); + const void *vaddr; int i; if (!*vaddr_out) { - *vaddr_out = kzalloc(shmem_obj->base.size, GFP_KERNEL); + *vaddr_out = kzalloc(gem_obj->size, GFP_KERNEL); if (!*vaddr_out) { DRM_ERROR("Cannot allocate memory for output frame."); return -ENOMEM; } } - if (WARN_ON(!shmem_obj->vaddr)) + if (WARN_ON(dma_buf_map_is_null(&primary_composer->map[0]))) return -EINVAL; - memcpy(*vaddr_out, shmem_obj->vaddr, shmem_obj->base.size); + vaddr = primary_composer->map[0].vaddr; + + memcpy(*vaddr_out, vaddr, gem_obj->size); /* If there are other planes besides primary, we consider the active * planes should be in z-order and compose them associatively: @@ -251,7 +249,7 @@ void vkms_composer_worker(struct work_struct *work) if (crtc_state->num_active_planes >= 1) { act_plane = crtc_state->active_planes[0]; - if (act_plane->base.plane->type == DRM_PLANE_TYPE_PRIMARY) + if (act_plane->base.base.plane->type == DRM_PLANE_TYPE_PRIMARY) primary_composer = act_plane->composer; } @@ -259,7 +257,7 @@ void vkms_composer_worker(struct work_struct *work) return; if (wb_pending) - vaddr_out = crtc_state->active_writeback; + vaddr_out = crtc_state->active_writeback->data[0].vaddr; ret = compose_active_planes(&vaddr_out, primary_composer, crtc_state); diff --git a/drivers/gpu/drm/vkms/vkms_drv.c b/drivers/gpu/drm/vkms/vkms_drv.c index 027ffe759440..0ffe5f0e33f7 100644 --- a/drivers/gpu/drm/vkms/vkms_drv.c +++ b/drivers/gpu/drm/vkms/vkms_drv.c @@ -28,6 +28,9 @@ #include "vkms_drv.h" +#include <drm/drm_print.h> +#include <drm/drm_debugfs.h> + #define DRIVER_NAME "vkms" #define DRIVER_DESC "Virtual Kernel Mode Setting" #define DRIVER_DATE "20180514" @@ -52,7 +55,7 @@ DEFINE_DRM_GEM_FOPS(vkms_driver_fops); static void vkms_release(struct drm_device *dev) { - struct vkms_device *vkms = container_of(dev, struct vkms_device, drm); + struct vkms_device *vkms = drm_device_to_vkms_device(dev); destroy_workqueue(vkms->output.composer_workq); } @@ -86,12 +89,37 @@ static void vkms_atomic_commit_tail(struct drm_atomic_state *old_state) drm_atomic_helper_cleanup_planes(dev, old_state); } +static int vkms_config_show(struct seq_file *m, void *data) +{ + struct drm_info_node *node = (struct drm_info_node *)m->private; + struct drm_device *dev = node->minor->dev; + struct vkms_device *vkmsdev = drm_device_to_vkms_device(dev); + + seq_printf(m, "writeback=%d\n", vkmsdev->config->writeback); + seq_printf(m, "cursor=%d\n", vkmsdev->config->cursor); + seq_printf(m, "overlay=%d\n", vkmsdev->config->overlay); + + return 0; +} + +static const struct drm_info_list vkms_config_debugfs_list[] = { + { "vkms_config", vkms_config_show, 0 }, +}; + +static void vkms_config_debugfs_init(struct drm_minor *minor) +{ + drm_debugfs_create_files(vkms_config_debugfs_list, ARRAY_SIZE(vkms_config_debugfs_list), + minor->debugfs_root, minor); +} + static const struct drm_driver vkms_driver = { .driver_features = DRIVER_MODESET | DRIVER_ATOMIC | DRIVER_GEM, .release = vkms_release, .fops = &vkms_driver_fops, DRM_GEM_SHMEM_DRIVER_OPS, + .debugfs_init = vkms_config_debugfs_init, + .name = DRIVER_NAME, .desc = DRIVER_DESC, .date = DRIVER_DATE, @@ -163,8 +191,6 @@ static int vkms_create(struct vkms_config *config) goto out_devres; } - vkms_device->drm.irq_enabled = true; - ret = drm_vblank_init(&vkms_device->drm, 1); if (ret) { DRM_ERROR("Failed to vblank\n"); diff --git a/drivers/gpu/drm/vkms/vkms_drv.h b/drivers/gpu/drm/vkms/vkms_drv.h index ac8c9c2fa4ed..d48c23d40ce5 100644 --- a/drivers/gpu/drm/vkms/vkms_drv.h +++ b/drivers/gpu/drm/vkms/vkms_drv.h @@ -7,6 +7,7 @@ #include <drm/drm.h> #include <drm/drm_gem.h> +#include <drm/drm_gem_atomic_helper.h> #include <drm/drm_encoder.h> #include <drm/drm_writeback.h> @@ -19,9 +20,15 @@ #define XRES_MAX 8192 #define YRES_MAX 8192 +struct vkms_writeback_job { + struct dma_buf_map map[DRM_FORMAT_MAX_PLANES]; + struct dma_buf_map data[DRM_FORMAT_MAX_PLANES]; +}; + struct vkms_composer { struct drm_framebuffer fb; struct drm_rect src, dst; + struct dma_buf_map map[4]; unsigned int offset; unsigned int pitch; unsigned int cpp; @@ -33,7 +40,7 @@ struct vkms_composer { * @composer: data required for composing computation */ struct vkms_plane_state { - struct drm_plane_state base; + struct drm_shadow_plane_state base; struct vkms_composer *composer; }; @@ -55,7 +62,7 @@ struct vkms_crtc_state { int num_active_planes; /* stack of active planes for crc computation, should be in z order */ struct vkms_plane_state **active_planes; - void *active_writeback; + struct vkms_writeback_job *active_writeback; /* below four are protected by vkms_output.composer_lock */ bool crc_pending; @@ -111,7 +118,7 @@ struct vkms_device { container_of(target, struct vkms_crtc_state, base) #define to_vkms_plane_state(target)\ - container_of(target, struct vkms_plane_state, base) + container_of(target, struct vkms_plane_state, base.base) /* CRTC */ int vkms_crtc_init(struct drm_device *dev, struct drm_crtc *crtc, diff --git a/drivers/gpu/drm/vkms/vkms_plane.c b/drivers/gpu/drm/vkms/vkms_plane.c index 107521ace597..32409e15244b 100644 --- a/drivers/gpu/drm/vkms/vkms_plane.c +++ b/drivers/gpu/drm/vkms/vkms_plane.c @@ -8,7 +8,6 @@ #include <drm/drm_gem_atomic_helper.h> #include <drm/drm_gem_framebuffer_helper.h> #include <drm/drm_plane_helper.h> -#include <drm/drm_gem_shmem_helper.h> #include "vkms_drv.h" @@ -40,17 +39,16 @@ vkms_plane_duplicate_state(struct drm_plane *plane) vkms_state->composer = composer; - __drm_atomic_helper_plane_duplicate_state(plane, - &vkms_state->base); + __drm_gem_duplicate_shadow_plane_state(plane, &vkms_state->base); - return &vkms_state->base; + return &vkms_state->base.base; } static void vkms_plane_destroy_state(struct drm_plane *plane, struct drm_plane_state *old_state) { struct vkms_plane_state *vkms_state = to_vkms_plane_state(old_state); - struct drm_crtc *crtc = vkms_state->base.crtc; + struct drm_crtc *crtc = vkms_state->base.base.crtc; if (crtc) { /* dropping the reference we acquired in @@ -63,7 +61,7 @@ static void vkms_plane_destroy_state(struct drm_plane *plane, kfree(vkms_state->composer); vkms_state->composer = NULL; - __drm_atomic_helper_plane_destroy_state(old_state); + __drm_gem_destroy_shadow_plane_state(&vkms_state->base); kfree(vkms_state); } @@ -71,8 +69,10 @@ static void vkms_plane_reset(struct drm_plane *plane) { struct vkms_plane_state *vkms_state; - if (plane->state) + if (plane->state) { vkms_plane_destroy_state(plane, plane->state); + plane->state = NULL; /* must be set to NULL here */ + } vkms_state = kzalloc(sizeof(*vkms_state), GFP_KERNEL); if (!vkms_state) { @@ -80,8 +80,7 @@ static void vkms_plane_reset(struct drm_plane *plane) return; } - plane->state = &vkms_state->base; - plane->state->plane = plane; + __drm_gem_reset_shadow_plane(plane, &vkms_state->base); } static const struct drm_plane_funcs vkms_plane_funcs = { @@ -98,6 +97,7 @@ static void vkms_plane_atomic_update(struct drm_plane *plane, struct drm_plane_state *new_state = drm_atomic_get_new_plane_state(state, plane); struct vkms_plane_state *vkms_plane_state; + struct drm_shadow_plane_state *shadow_plane_state; struct drm_framebuffer *fb = new_state->fb; struct vkms_composer *composer; @@ -105,11 +105,13 @@ static void vkms_plane_atomic_update(struct drm_plane *plane, return; vkms_plane_state = to_vkms_plane_state(new_state); + shadow_plane_state = &vkms_plane_state->base; composer = vkms_plane_state->composer; memcpy(&composer->src, &new_state->src, sizeof(struct drm_rect)); memcpy(&composer->dst, &new_state->dst, sizeof(struct drm_rect)); memcpy(&composer->fb, fb, sizeof(struct drm_framebuffer)); + memcpy(&composer->map, &shadow_plane_state->data, sizeof(composer->map)); drm_framebuffer_get(&composer->fb); composer->offset = fb->offsets[0]; composer->pitch = fb->pitches[0]; @@ -150,45 +152,10 @@ static int vkms_plane_atomic_check(struct drm_plane *plane, return 0; } -static int vkms_prepare_fb(struct drm_plane *plane, - struct drm_plane_state *state) -{ - struct drm_gem_object *gem_obj; - struct dma_buf_map map; - int ret; - - if (!state->fb) - return 0; - - gem_obj = drm_gem_fb_get_obj(state->fb, 0); - ret = drm_gem_shmem_vmap(gem_obj, &map); - if (ret) - DRM_ERROR("vmap failed: %d\n", ret); - - return drm_gem_plane_helper_prepare_fb(plane, state); -} - -static void vkms_cleanup_fb(struct drm_plane *plane, - struct drm_plane_state *old_state) -{ - struct drm_gem_object *gem_obj; - struct drm_gem_shmem_object *shmem_obj; - struct dma_buf_map map; - - if (!old_state->fb) - return; - - gem_obj = drm_gem_fb_get_obj(old_state->fb, 0); - shmem_obj = to_drm_gem_shmem_obj(drm_gem_fb_get_obj(old_state->fb, 0)); - dma_buf_map_set_vaddr(&map, shmem_obj->vaddr); - drm_gem_shmem_vunmap(gem_obj, &map); -} - static const struct drm_plane_helper_funcs vkms_primary_helper_funcs = { .atomic_update = vkms_plane_atomic_update, .atomic_check = vkms_plane_atomic_check, - .prepare_fb = vkms_prepare_fb, - .cleanup_fb = vkms_cleanup_fb, + DRM_GEM_SHADOW_PLANE_HELPER_FUNCS, }; struct vkms_plane *vkms_plane_init(struct vkms_device *vkmsdev, diff --git a/drivers/gpu/drm/vkms/vkms_writeback.c b/drivers/gpu/drm/vkms/vkms_writeback.c index 0935686475a0..8694227f555f 100644 --- a/drivers/gpu/drm/vkms/vkms_writeback.c +++ b/drivers/gpu/drm/vkms/vkms_writeback.c @@ -65,41 +65,45 @@ static int vkms_wb_connector_get_modes(struct drm_connector *connector) static int vkms_wb_prepare_job(struct drm_writeback_connector *wb_connector, struct drm_writeback_job *job) { - struct drm_gem_object *gem_obj; - struct dma_buf_map map; + struct vkms_writeback_job *vkmsjob; int ret; if (!job->fb) return 0; - gem_obj = drm_gem_fb_get_obj(job->fb, 0); - ret = drm_gem_shmem_vmap(gem_obj, &map); + vkmsjob = kzalloc(sizeof(*vkmsjob), GFP_KERNEL); + if (!vkmsjob) + return -ENOMEM; + + ret = drm_gem_fb_vmap(job->fb, vkmsjob->map, vkmsjob->data); if (ret) { DRM_ERROR("vmap failed: %d\n", ret); - return ret; + goto err_kfree; } - job->priv = map.vaddr; + job->priv = vkmsjob; return 0; + +err_kfree: + kfree(vkmsjob); + return ret; } static void vkms_wb_cleanup_job(struct drm_writeback_connector *connector, struct drm_writeback_job *job) { - struct drm_gem_object *gem_obj; + struct vkms_writeback_job *vkmsjob = job->priv; struct vkms_device *vkmsdev; - struct dma_buf_map map; if (!job->fb) return; - gem_obj = drm_gem_fb_get_obj(job->fb, 0); - dma_buf_map_set_vaddr(&map, job->priv); - drm_gem_shmem_vunmap(gem_obj, &map); + drm_gem_fb_vunmap(job->fb, vkmsjob->map); - vkmsdev = drm_device_to_vkms_device(gem_obj->dev); + vkmsdev = drm_device_to_vkms_device(job->fb->dev); vkms_set_composer(&vkmsdev->output, false); + kfree(vkmsjob); } static void vkms_wb_atomic_commit(struct drm_connector *conn, diff --git a/drivers/gpu/drm/vmwgfx/Kconfig b/drivers/gpu/drm/vmwgfx/Kconfig index 0060ef842b5a..c9ce47c448e0 100644 --- a/drivers/gpu/drm/vmwgfx/Kconfig +++ b/drivers/gpu/drm/vmwgfx/Kconfig @@ -22,3 +22,11 @@ config DRM_VMWGFX_FBCON Choose this option if you are shipping a new vmwgfx userspace driver that supports using the kernel driver. +config DRM_VMWGFX_MKSSTATS + bool "Enable mksGuestStats instrumentation of vmwgfx by default" + depends on DRM_VMWGFX + depends on X86 + default n + help + Choose this option to instrument the kernel driver for mksGuestStats. + diff --git a/drivers/gpu/drm/vmwgfx/Makefile b/drivers/gpu/drm/vmwgfx/Makefile index 09f6dcac768b..bc323f7d4032 100644 --- a/drivers/gpu/drm/vmwgfx/Makefile +++ b/drivers/gpu/drm/vmwgfx/Makefile @@ -9,7 +9,7 @@ vmwgfx-y := vmwgfx_execbuf.o vmwgfx_gmr.o vmwgfx_kms.o vmwgfx_drv.o \ vmwgfx_cotable.o vmwgfx_so.o vmwgfx_binding.o vmwgfx_msg.o \ vmwgfx_simple_resource.o vmwgfx_va.o vmwgfx_blit.o \ vmwgfx_validation.o vmwgfx_page_dirty.o vmwgfx_streamoutput.o \ - ttm_object.o ttm_memory.o + vmwgfx_devcaps.o ttm_object.o ttm_memory.o vmwgfx-$(CONFIG_DRM_FBDEV_EMULATION) += vmwgfx_fb.o vmwgfx-$(CONFIG_TRANSPARENT_HUGEPAGE) += vmwgfx_thp.o diff --git a/drivers/gpu/drm/vmwgfx/device_include/includeCheck.h b/drivers/gpu/drm/vmwgfx/device_include/includeCheck.h deleted file mode 100644 index 8cce7f15b6eb..000000000000 --- a/drivers/gpu/drm/vmwgfx/device_include/includeCheck.h +++ /dev/null @@ -1,3 +0,0 @@ -/* - * Intentionally empty file. - */ diff --git a/drivers/gpu/drm/vmwgfx/device_include/svga3d_caps.h b/drivers/gpu/drm/vmwgfx/device_include/svga3d_caps.h deleted file mode 100644 index 69c4253fbfbb..000000000000 --- a/drivers/gpu/drm/vmwgfx/device_include/svga3d_caps.h +++ /dev/null @@ -1,111 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 OR MIT */ -/********************************************************** - * Copyright 2007-2015 VMware, Inc. - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - **********************************************************/ - -/* - * svga3d_caps.h -- - * - * Definitions for SVGA3D hardware capabilities. Capabilities - * are used to query for optional rendering features during - * driver initialization. The capability data is stored as very - * basic key/value dictionary within the "FIFO register" memory - * area at the beginning of BAR2. - * - * Note that these definitions are only for 3D capabilities. - * The SVGA device also has "device capabilities" and "FIFO - * capabilities", which are non-3D-specific and are stored as - * bitfields rather than key/value pairs. - */ - -#ifndef _SVGA3D_CAPS_H_ -#define _SVGA3D_CAPS_H_ - -#define INCLUDE_ALLOW_MODULE -#define INCLUDE_ALLOW_USERLEVEL - -#include "includeCheck.h" - -#include "svga_reg.h" - -#define SVGA_FIFO_3D_CAPS_SIZE (SVGA_FIFO_3D_CAPS_LAST - \ - SVGA_FIFO_3D_CAPS + 1) - - -/* - * SVGA3dCapsRecordType - * - * Record types that can be found in the caps block. - * Related record types are grouped together numerically so that - * SVGA3dCaps_FindRecord() can be applied on a range of record - * types. - */ - -typedef enum { - SVGA3DCAPS_RECORD_UNKNOWN = 0, - SVGA3DCAPS_RECORD_DEVCAPS_MIN = 0x100, - SVGA3DCAPS_RECORD_DEVCAPS = 0x100, - SVGA3DCAPS_RECORD_DEVCAPS_MAX = 0x1ff, -} SVGA3dCapsRecordType; - - -/* - * SVGA3dCapsRecordHeader - * - * Header field leading each caps block record. Contains the offset (in - * register words, NOT bytes) to the next caps block record (or the end - * of caps block records which will be a zero word) and the record type - * as defined above. - */ - -typedef -#include "vmware_pack_begin.h" -struct SVGA3dCapsRecordHeader { - uint32 length; - SVGA3dCapsRecordType type; -} -#include "vmware_pack_end.h" -SVGA3dCapsRecordHeader; - - -/* - * SVGA3dCapsRecord - * - * Caps block record; "data" is a placeholder for the actual data structure - * contained within the record; - */ - -typedef -#include "vmware_pack_begin.h" -struct SVGA3dCapsRecord { - SVGA3dCapsRecordHeader header; - uint32 data[1]; -} -#include "vmware_pack_end.h" -SVGA3dCapsRecord; - - -typedef uint32 SVGA3dCapPair[2]; - -#endif diff --git a/drivers/gpu/drm/vmwgfx/device_include/svga3d_cmd.h b/drivers/gpu/drm/vmwgfx/device_include/svga3d_cmd.h index 799bc0963f7a..945c84b27e81 100644 --- a/drivers/gpu/drm/vmwgfx/device_include/svga3d_cmd.h +++ b/drivers/gpu/drm/vmwgfx/device_include/svga3d_cmd.h @@ -1,6 +1,6 @@ -/* SPDX-License-Identifier: GPL-2.0 OR MIT */ /********************************************************** - * Copyright 1998-2020 VMware, Inc. + * Copyright 2012-2021 VMware, Inc. + * SPDX-License-Identifier: GPL-2.0 OR MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation @@ -27,2249 +27,1487 @@ /* * svga3d_cmd.h -- * - * SVGA 3d hardware cmd definitions + * SVGA 3d hardware cmd definitions */ + + #ifndef _SVGA3D_CMD_H_ #define _SVGA3D_CMD_H_ -#define INCLUDE_ALLOW_MODULE -#define INCLUDE_ALLOW_USERLEVEL -#define INCLUDE_ALLOW_VMCORE - -#include "includeCheck.h" #include "svga3d_types.h" - -/* - * Identifiers for commands in the command FIFO. - * - * IDs between 1000 and 1039 (inclusive) were used by obsolete versions of - * the SVGA3D protocol and remain reserved; they should not be used in the - * future. - * - * IDs between 1040 and 2999 (inclusive) are available for use by the - * current SVGA3D protocol. - * - * FIFO clients other than SVGA3D should stay below 1000, or at 3000 - * and up. - */ - -typedef enum { - SVGA_3D_CMD_LEGACY_BASE = 1000, - SVGA_3D_CMD_BASE = 1040, - - SVGA_3D_CMD_SURFACE_DEFINE = 1040, - SVGA_3D_CMD_SURFACE_DESTROY = 1041, - SVGA_3D_CMD_SURFACE_COPY = 1042, - SVGA_3D_CMD_SURFACE_STRETCHBLT = 1043, - SVGA_3D_CMD_SURFACE_DMA = 1044, - SVGA_3D_CMD_CONTEXT_DEFINE = 1045, - SVGA_3D_CMD_CONTEXT_DESTROY = 1046, - SVGA_3D_CMD_SETTRANSFORM = 1047, - SVGA_3D_CMD_SETZRANGE = 1048, - SVGA_3D_CMD_SETRENDERSTATE = 1049, - SVGA_3D_CMD_SETRENDERTARGET = 1050, - SVGA_3D_CMD_SETTEXTURESTATE = 1051, - SVGA_3D_CMD_SETMATERIAL = 1052, - SVGA_3D_CMD_SETLIGHTDATA = 1053, - SVGA_3D_CMD_SETLIGHTENABLED = 1054, - SVGA_3D_CMD_SETVIEWPORT = 1055, - SVGA_3D_CMD_SETCLIPPLANE = 1056, - SVGA_3D_CMD_CLEAR = 1057, - SVGA_3D_CMD_PRESENT = 1058, - SVGA_3D_CMD_SHADER_DEFINE = 1059, - SVGA_3D_CMD_SHADER_DESTROY = 1060, - SVGA_3D_CMD_SET_SHADER = 1061, - SVGA_3D_CMD_SET_SHADER_CONST = 1062, - SVGA_3D_CMD_DRAW_PRIMITIVES = 1063, - SVGA_3D_CMD_SETSCISSORRECT = 1064, - SVGA_3D_CMD_BEGIN_QUERY = 1065, - SVGA_3D_CMD_END_QUERY = 1066, - SVGA_3D_CMD_WAIT_FOR_QUERY = 1067, - SVGA_3D_CMD_PRESENT_READBACK = 1068, - SVGA_3D_CMD_BLIT_SURFACE_TO_SCREEN = 1069, - SVGA_3D_CMD_SURFACE_DEFINE_V2 = 1070, - SVGA_3D_CMD_GENERATE_MIPMAPS = 1071, - SVGA_3D_CMD_DEAD4 = 1072, - SVGA_3D_CMD_DEAD5 = 1073, - SVGA_3D_CMD_DEAD6 = 1074, - SVGA_3D_CMD_DEAD7 = 1075, - SVGA_3D_CMD_DEAD8 = 1076, - SVGA_3D_CMD_DEAD9 = 1077, - SVGA_3D_CMD_DEAD10 = 1078, - SVGA_3D_CMD_DEAD11 = 1079, - SVGA_3D_CMD_ACTIVATE_SURFACE = 1080, - SVGA_3D_CMD_DEACTIVATE_SURFACE = 1081, - SVGA_3D_CMD_SCREEN_DMA = 1082, - SVGA_3D_CMD_DEAD1 = 1083, - SVGA_3D_CMD_DEAD2 = 1084, - - SVGA_3D_CMD_DEAD12 = 1085, - SVGA_3D_CMD_DEAD13 = 1086, - SVGA_3D_CMD_DEAD14 = 1087, - SVGA_3D_CMD_DEAD15 = 1088, - SVGA_3D_CMD_DEAD16 = 1089, - SVGA_3D_CMD_DEAD17 = 1090, - - SVGA_3D_CMD_SET_OTABLE_BASE = 1091, - SVGA_3D_CMD_READBACK_OTABLE = 1092, - - SVGA_3D_CMD_DEFINE_GB_MOB = 1093, - SVGA_3D_CMD_DESTROY_GB_MOB = 1094, - SVGA_3D_CMD_DEAD3 = 1095, - SVGA_3D_CMD_UPDATE_GB_MOB_MAPPING = 1096, - - SVGA_3D_CMD_DEFINE_GB_SURFACE = 1097, - SVGA_3D_CMD_DESTROY_GB_SURFACE = 1098, - SVGA_3D_CMD_BIND_GB_SURFACE = 1099, - SVGA_3D_CMD_COND_BIND_GB_SURFACE = 1100, - SVGA_3D_CMD_UPDATE_GB_IMAGE = 1101, - SVGA_3D_CMD_UPDATE_GB_SURFACE = 1102, - SVGA_3D_CMD_READBACK_GB_IMAGE = 1103, - SVGA_3D_CMD_READBACK_GB_SURFACE = 1104, - SVGA_3D_CMD_INVALIDATE_GB_IMAGE = 1105, - SVGA_3D_CMD_INVALIDATE_GB_SURFACE = 1106, - - SVGA_3D_CMD_DEFINE_GB_CONTEXT = 1107, - SVGA_3D_CMD_DESTROY_GB_CONTEXT = 1108, - SVGA_3D_CMD_BIND_GB_CONTEXT = 1109, - SVGA_3D_CMD_READBACK_GB_CONTEXT = 1110, - SVGA_3D_CMD_INVALIDATE_GB_CONTEXT = 1111, - - SVGA_3D_CMD_DEFINE_GB_SHADER = 1112, - SVGA_3D_CMD_DESTROY_GB_SHADER = 1113, - SVGA_3D_CMD_BIND_GB_SHADER = 1114, - - SVGA_3D_CMD_SET_OTABLE_BASE64 = 1115, - - SVGA_3D_CMD_BEGIN_GB_QUERY = 1116, - SVGA_3D_CMD_END_GB_QUERY = 1117, - SVGA_3D_CMD_WAIT_FOR_GB_QUERY = 1118, - - SVGA_3D_CMD_NOP = 1119, - - SVGA_3D_CMD_ENABLE_GART = 1120, - SVGA_3D_CMD_DISABLE_GART = 1121, - SVGA_3D_CMD_MAP_MOB_INTO_GART = 1122, - SVGA_3D_CMD_UNMAP_GART_RANGE = 1123, - - SVGA_3D_CMD_DEFINE_GB_SCREENTARGET = 1124, - SVGA_3D_CMD_DESTROY_GB_SCREENTARGET = 1125, - SVGA_3D_CMD_BIND_GB_SCREENTARGET = 1126, - SVGA_3D_CMD_UPDATE_GB_SCREENTARGET = 1127, - - SVGA_3D_CMD_READBACK_GB_IMAGE_PARTIAL = 1128, - SVGA_3D_CMD_INVALIDATE_GB_IMAGE_PARTIAL = 1129, - - SVGA_3D_CMD_SET_GB_SHADERCONSTS_INLINE = 1130, - - SVGA_3D_CMD_GB_SCREEN_DMA = 1131, - SVGA_3D_CMD_BIND_GB_SURFACE_WITH_PITCH = 1132, - SVGA_3D_CMD_GB_MOB_FENCE = 1133, - SVGA_3D_CMD_DEFINE_GB_SURFACE_V2 = 1134, - SVGA_3D_CMD_DEFINE_GB_MOB64 = 1135, - SVGA_3D_CMD_REDEFINE_GB_MOB64 = 1136, - SVGA_3D_CMD_NOP_ERROR = 1137, - - SVGA_3D_CMD_SET_VERTEX_STREAMS = 1138, - SVGA_3D_CMD_SET_VERTEX_DECLS = 1139, - SVGA_3D_CMD_SET_VERTEX_DIVISORS = 1140, - SVGA_3D_CMD_DRAW = 1141, - SVGA_3D_CMD_DRAW_INDEXED = 1142, - - /* - * DX10 Commands - */ - SVGA_3D_CMD_DX_MIN = 1143, - SVGA_3D_CMD_DX_DEFINE_CONTEXT = 1143, - SVGA_3D_CMD_DX_DESTROY_CONTEXT = 1144, - SVGA_3D_CMD_DX_BIND_CONTEXT = 1145, - SVGA_3D_CMD_DX_READBACK_CONTEXT = 1146, - SVGA_3D_CMD_DX_INVALIDATE_CONTEXT = 1147, - SVGA_3D_CMD_DX_SET_SINGLE_CONSTANT_BUFFER = 1148, - SVGA_3D_CMD_DX_SET_SHADER_RESOURCES = 1149, - SVGA_3D_CMD_DX_SET_SHADER = 1150, - SVGA_3D_CMD_DX_SET_SAMPLERS = 1151, - SVGA_3D_CMD_DX_DRAW = 1152, - SVGA_3D_CMD_DX_DRAW_INDEXED = 1153, - SVGA_3D_CMD_DX_DRAW_INSTANCED = 1154, - SVGA_3D_CMD_DX_DRAW_INDEXED_INSTANCED = 1155, - SVGA_3D_CMD_DX_DRAW_AUTO = 1156, - SVGA_3D_CMD_DX_SET_INPUT_LAYOUT = 1157, - SVGA_3D_CMD_DX_SET_VERTEX_BUFFERS = 1158, - SVGA_3D_CMD_DX_SET_INDEX_BUFFER = 1159, - SVGA_3D_CMD_DX_SET_TOPOLOGY = 1160, - SVGA_3D_CMD_DX_SET_RENDERTARGETS = 1161, - SVGA_3D_CMD_DX_SET_BLEND_STATE = 1162, - SVGA_3D_CMD_DX_SET_DEPTHSTENCIL_STATE = 1163, - SVGA_3D_CMD_DX_SET_RASTERIZER_STATE = 1164, - SVGA_3D_CMD_DX_DEFINE_QUERY = 1165, - SVGA_3D_CMD_DX_DESTROY_QUERY = 1166, - SVGA_3D_CMD_DX_BIND_QUERY = 1167, - SVGA_3D_CMD_DX_SET_QUERY_OFFSET = 1168, - SVGA_3D_CMD_DX_BEGIN_QUERY = 1169, - SVGA_3D_CMD_DX_END_QUERY = 1170, - SVGA_3D_CMD_DX_READBACK_QUERY = 1171, - SVGA_3D_CMD_DX_SET_PREDICATION = 1172, - SVGA_3D_CMD_DX_SET_SOTARGETS = 1173, - SVGA_3D_CMD_DX_SET_VIEWPORTS = 1174, - SVGA_3D_CMD_DX_SET_SCISSORRECTS = 1175, - SVGA_3D_CMD_DX_CLEAR_RENDERTARGET_VIEW = 1176, - SVGA_3D_CMD_DX_CLEAR_DEPTHSTENCIL_VIEW = 1177, - SVGA_3D_CMD_DX_PRED_COPY_REGION = 1178, - SVGA_3D_CMD_DX_PRED_COPY = 1179, - SVGA_3D_CMD_DX_PRESENTBLT = 1180, - SVGA_3D_CMD_DX_GENMIPS = 1181, - SVGA_3D_CMD_DX_UPDATE_SUBRESOURCE = 1182, - SVGA_3D_CMD_DX_READBACK_SUBRESOURCE = 1183, - SVGA_3D_CMD_DX_INVALIDATE_SUBRESOURCE = 1184, - SVGA_3D_CMD_DX_DEFINE_SHADERRESOURCE_VIEW = 1185, - SVGA_3D_CMD_DX_DESTROY_SHADERRESOURCE_VIEW = 1186, - SVGA_3D_CMD_DX_DEFINE_RENDERTARGET_VIEW = 1187, - SVGA_3D_CMD_DX_DESTROY_RENDERTARGET_VIEW = 1188, - SVGA_3D_CMD_DX_DEFINE_DEPTHSTENCIL_VIEW = 1189, - SVGA_3D_CMD_DX_DESTROY_DEPTHSTENCIL_VIEW = 1190, - SVGA_3D_CMD_DX_DEFINE_ELEMENTLAYOUT = 1191, - SVGA_3D_CMD_DX_DESTROY_ELEMENTLAYOUT = 1192, - SVGA_3D_CMD_DX_DEFINE_BLEND_STATE = 1193, - SVGA_3D_CMD_DX_DESTROY_BLEND_STATE = 1194, - SVGA_3D_CMD_DX_DEFINE_DEPTHSTENCIL_STATE = 1195, - SVGA_3D_CMD_DX_DESTROY_DEPTHSTENCIL_STATE = 1196, - SVGA_3D_CMD_DX_DEFINE_RASTERIZER_STATE = 1197, - SVGA_3D_CMD_DX_DESTROY_RASTERIZER_STATE = 1198, - SVGA_3D_CMD_DX_DEFINE_SAMPLER_STATE = 1199, - SVGA_3D_CMD_DX_DESTROY_SAMPLER_STATE = 1200, - SVGA_3D_CMD_DX_DEFINE_SHADER = 1201, - SVGA_3D_CMD_DX_DESTROY_SHADER = 1202, - SVGA_3D_CMD_DX_BIND_SHADER = 1203, - SVGA_3D_CMD_DX_DEFINE_STREAMOUTPUT = 1204, - SVGA_3D_CMD_DX_DESTROY_STREAMOUTPUT = 1205, - SVGA_3D_CMD_DX_SET_STREAMOUTPUT = 1206, - SVGA_3D_CMD_DX_SET_COTABLE = 1207, - SVGA_3D_CMD_DX_READBACK_COTABLE = 1208, - SVGA_3D_CMD_DX_BUFFER_COPY = 1209, - SVGA_3D_CMD_DX_TRANSFER_FROM_BUFFER = 1210, - SVGA_3D_CMD_DX_SURFACE_COPY_AND_READBACK = 1211, - SVGA_3D_CMD_DX_MOVE_QUERY = 1212, - SVGA_3D_CMD_DX_BIND_ALL_QUERY = 1213, - SVGA_3D_CMD_DX_READBACK_ALL_QUERY = 1214, - SVGA_3D_CMD_DX_PRED_TRANSFER_FROM_BUFFER = 1215, - SVGA_3D_CMD_DX_MOB_FENCE_64 = 1216, - SVGA_3D_CMD_DX_BIND_ALL_SHADER = 1217, - SVGA_3D_CMD_DX_HINT = 1218, - SVGA_3D_CMD_DX_BUFFER_UPDATE = 1219, - SVGA_3D_CMD_DX_SET_VS_CONSTANT_BUFFER_OFFSET = 1220, - SVGA_3D_CMD_DX_SET_PS_CONSTANT_BUFFER_OFFSET = 1221, - SVGA_3D_CMD_DX_SET_GS_CONSTANT_BUFFER_OFFSET = 1222, - SVGA_3D_CMD_DX_SET_HS_CONSTANT_BUFFER_OFFSET = 1223, - SVGA_3D_CMD_DX_SET_DS_CONSTANT_BUFFER_OFFSET = 1224, - SVGA_3D_CMD_DX_SET_CS_CONSTANT_BUFFER_OFFSET = 1225, - - SVGA_3D_CMD_DX_COND_BIND_ALL_SHADER = 1226, - SVGA_3D_CMD_DX_MAX = 1227, - - SVGA_3D_CMD_SCREEN_COPY = 1227, - - SVGA_3D_CMD_RESERVED1 = 1228, - SVGA_3D_CMD_RESERVED2 = 1229, - SVGA_3D_CMD_RESERVED3 = 1230, - SVGA_3D_CMD_RESERVED4 = 1231, - SVGA_3D_CMD_RESERVED5 = 1232, - SVGA_3D_CMD_RESERVED6 = 1233, - SVGA_3D_CMD_RESERVED7 = 1234, - SVGA_3D_CMD_RESERVED8 = 1235, - - SVGA_3D_CMD_GROW_OTABLE = 1236, - SVGA_3D_CMD_DX_GROW_COTABLE = 1237, - SVGA_3D_CMD_INTRA_SURFACE_COPY = 1238, - - SVGA_3D_CMD_DEFINE_GB_SURFACE_V3 = 1239, - - SVGA_3D_CMD_DX_RESOLVE_COPY = 1240, - SVGA_3D_CMD_DX_PRED_RESOLVE_COPY = 1241, - SVGA_3D_CMD_DX_PRED_CONVERT_REGION = 1242, - SVGA_3D_CMD_DX_PRED_CONVERT = 1243, - SVGA_3D_CMD_WHOLE_SURFACE_COPY = 1244, - - SVGA_3D_CMD_DX_DEFINE_UA_VIEW = 1245, - SVGA_3D_CMD_DX_DESTROY_UA_VIEW = 1246, - SVGA_3D_CMD_DX_CLEAR_UA_VIEW_UINT = 1247, - SVGA_3D_CMD_DX_CLEAR_UA_VIEW_FLOAT = 1248, - SVGA_3D_CMD_DX_COPY_STRUCTURE_COUNT = 1249, - SVGA_3D_CMD_DX_SET_UA_VIEWS = 1250, - - SVGA_3D_CMD_DX_DRAW_INDEXED_INSTANCED_INDIRECT = 1251, - SVGA_3D_CMD_DX_DRAW_INSTANCED_INDIRECT = 1252, - SVGA_3D_CMD_DX_DISPATCH = 1253, - SVGA_3D_CMD_DX_DISPATCH_INDIRECT = 1254, - - SVGA_3D_CMD_WRITE_ZERO_SURFACE = 1255, - SVGA_3D_CMD_HINT_ZERO_SURFACE = 1256, - SVGA_3D_CMD_DX_TRANSFER_TO_BUFFER = 1257, - SVGA_3D_CMD_DX_SET_STRUCTURE_COUNT = 1258, - - SVGA_3D_CMD_LOGICOPS_BITBLT = 1259, - SVGA_3D_CMD_LOGICOPS_TRANSBLT = 1260, - SVGA_3D_CMD_LOGICOPS_STRETCHBLT = 1261, - SVGA_3D_CMD_LOGICOPS_COLORFILL = 1262, - SVGA_3D_CMD_LOGICOPS_ALPHABLEND = 1263, - SVGA_3D_CMD_LOGICOPS_CLEARTYPEBLEND = 1264, - - SVGA_3D_CMD_RESERVED2_1 = 1265, - - SVGA_3D_CMD_RESERVED2_2 = 1266, - SVGA_3D_CMD_DEFINE_GB_SURFACE_V4 = 1267, - SVGA_3D_CMD_DX_SET_CS_UA_VIEWS = 1268, - SVGA_3D_CMD_DX_SET_MIN_LOD = 1269, - SVGA_3D_CMD_RESERVED2_3 = 1270, - SVGA_3D_CMD_RESERVED2_4 = 1271, - SVGA_3D_CMD_DX_DEFINE_DEPTHSTENCIL_VIEW_V2 = 1272, - SVGA_3D_CMD_DX_DEFINE_STREAMOUTPUT_WITH_MOB = 1273, - SVGA_3D_CMD_DX_SET_SHADER_IFACE = 1274, - SVGA_3D_CMD_DX_BIND_STREAMOUTPUT = 1275, - SVGA_3D_CMD_SURFACE_STRETCHBLT_NON_MS_TO_MS = 1276, - SVGA_3D_CMD_DX_BIND_SHADER_IFACE = 1277, - - SVGA_3D_CMD_MAX = 1278, - SVGA_3D_CMD_FUTURE_MAX = 3000 +#include "svga3d_limits.h" +#include "svga_reg.h" + +typedef enum SVGAFifo3dCmdId { + SVGA_3D_CMD_LEGACY_BASE = 1000, + SVGA_3D_CMD_BASE = 1040, + + SVGA_3D_CMD_SURFACE_DEFINE = 1040, + SVGA_3D_CMD_SURFACE_DESTROY = 1041, + SVGA_3D_CMD_SURFACE_COPY = 1042, + SVGA_3D_CMD_SURFACE_STRETCHBLT = 1043, + SVGA_3D_CMD_SURFACE_DMA = 1044, + SVGA_3D_CMD_CONTEXT_DEFINE = 1045, + SVGA_3D_CMD_CONTEXT_DESTROY = 1046, + SVGA_3D_CMD_SETTRANSFORM = 1047, + SVGA_3D_CMD_SETZRANGE = 1048, + SVGA_3D_CMD_SETRENDERSTATE = 1049, + SVGA_3D_CMD_SETRENDERTARGET = 1050, + SVGA_3D_CMD_SETTEXTURESTATE = 1051, + SVGA_3D_CMD_SETMATERIAL = 1052, + SVGA_3D_CMD_SETLIGHTDATA = 1053, + SVGA_3D_CMD_SETLIGHTENABLED = 1054, + SVGA_3D_CMD_SETVIEWPORT = 1055, + SVGA_3D_CMD_SETCLIPPLANE = 1056, + SVGA_3D_CMD_CLEAR = 1057, + SVGA_3D_CMD_PRESENT = 1058, + SVGA_3D_CMD_SHADER_DEFINE = 1059, + SVGA_3D_CMD_SHADER_DESTROY = 1060, + SVGA_3D_CMD_SET_SHADER = 1061, + SVGA_3D_CMD_SET_SHADER_CONST = 1062, + SVGA_3D_CMD_DRAW_PRIMITIVES = 1063, + SVGA_3D_CMD_SETSCISSORRECT = 1064, + SVGA_3D_CMD_BEGIN_QUERY = 1065, + SVGA_3D_CMD_END_QUERY = 1066, + SVGA_3D_CMD_WAIT_FOR_QUERY = 1067, + SVGA_3D_CMD_PRESENT_READBACK = 1068, + SVGA_3D_CMD_BLIT_SURFACE_TO_SCREEN = 1069, + SVGA_3D_CMD_SURFACE_DEFINE_V2 = 1070, + SVGA_3D_CMD_GENERATE_MIPMAPS = 1071, + SVGA_3D_CMD_DEAD4 = 1072, + SVGA_3D_CMD_DEAD5 = 1073, + SVGA_3D_CMD_DEAD6 = 1074, + SVGA_3D_CMD_DEAD7 = 1075, + SVGA_3D_CMD_DEAD8 = 1076, + SVGA_3D_CMD_DEAD9 = 1077, + SVGA_3D_CMD_DEAD10 = 1078, + SVGA_3D_CMD_DEAD11 = 1079, + SVGA_3D_CMD_ACTIVATE_SURFACE = 1080, + SVGA_3D_CMD_DEACTIVATE_SURFACE = 1081, + SVGA_3D_CMD_SCREEN_DMA = 1082, + SVGA_3D_CMD_DEAD1 = 1083, + SVGA_3D_CMD_DEAD2 = 1084, + + SVGA_3D_CMD_DEAD12 = 1085, + SVGA_3D_CMD_DEAD13 = 1086, + SVGA_3D_CMD_DEAD14 = 1087, + SVGA_3D_CMD_DEAD15 = 1088, + SVGA_3D_CMD_DEAD16 = 1089, + SVGA_3D_CMD_DEAD17 = 1090, + + SVGA_3D_CMD_SET_OTABLE_BASE = 1091, + SVGA_3D_CMD_READBACK_OTABLE = 1092, + + SVGA_3D_CMD_DEFINE_GB_MOB = 1093, + SVGA_3D_CMD_DESTROY_GB_MOB = 1094, + SVGA_3D_CMD_DEAD3 = 1095, + SVGA_3D_CMD_UPDATE_GB_MOB_MAPPING = 1096, + + SVGA_3D_CMD_DEFINE_GB_SURFACE = 1097, + SVGA_3D_CMD_DESTROY_GB_SURFACE = 1098, + SVGA_3D_CMD_BIND_GB_SURFACE = 1099, + SVGA_3D_CMD_COND_BIND_GB_SURFACE = 1100, + SVGA_3D_CMD_UPDATE_GB_IMAGE = 1101, + SVGA_3D_CMD_UPDATE_GB_SURFACE = 1102, + SVGA_3D_CMD_READBACK_GB_IMAGE = 1103, + SVGA_3D_CMD_READBACK_GB_SURFACE = 1104, + SVGA_3D_CMD_INVALIDATE_GB_IMAGE = 1105, + SVGA_3D_CMD_INVALIDATE_GB_SURFACE = 1106, + + SVGA_3D_CMD_DEFINE_GB_CONTEXT = 1107, + SVGA_3D_CMD_DESTROY_GB_CONTEXT = 1108, + SVGA_3D_CMD_BIND_GB_CONTEXT = 1109, + SVGA_3D_CMD_READBACK_GB_CONTEXT = 1110, + SVGA_3D_CMD_INVALIDATE_GB_CONTEXT = 1111, + + SVGA_3D_CMD_DEFINE_GB_SHADER = 1112, + SVGA_3D_CMD_DESTROY_GB_SHADER = 1113, + SVGA_3D_CMD_BIND_GB_SHADER = 1114, + + SVGA_3D_CMD_SET_OTABLE_BASE64 = 1115, + + SVGA_3D_CMD_BEGIN_GB_QUERY = 1116, + SVGA_3D_CMD_END_GB_QUERY = 1117, + SVGA_3D_CMD_WAIT_FOR_GB_QUERY = 1118, + + SVGA_3D_CMD_NOP = 1119, + + SVGA_3D_CMD_ENABLE_GART = 1120, + SVGA_3D_CMD_DISABLE_GART = 1121, + SVGA_3D_CMD_MAP_MOB_INTO_GART = 1122, + SVGA_3D_CMD_UNMAP_GART_RANGE = 1123, + + SVGA_3D_CMD_DEFINE_GB_SCREENTARGET = 1124, + SVGA_3D_CMD_DESTROY_GB_SCREENTARGET = 1125, + SVGA_3D_CMD_BIND_GB_SCREENTARGET = 1126, + SVGA_3D_CMD_UPDATE_GB_SCREENTARGET = 1127, + + SVGA_3D_CMD_READBACK_GB_IMAGE_PARTIAL = 1128, + SVGA_3D_CMD_INVALIDATE_GB_IMAGE_PARTIAL = 1129, + + SVGA_3D_CMD_SET_GB_SHADERCONSTS_INLINE = 1130, + + SVGA_3D_CMD_GB_SCREEN_DMA = 1131, + SVGA_3D_CMD_BIND_GB_SURFACE_WITH_PITCH = 1132, + SVGA_3D_CMD_GB_MOB_FENCE = 1133, + SVGA_3D_CMD_DEFINE_GB_SURFACE_V2 = 1134, + SVGA_3D_CMD_DEFINE_GB_MOB64 = 1135, + SVGA_3D_CMD_REDEFINE_GB_MOB64 = 1136, + SVGA_3D_CMD_NOP_ERROR = 1137, + + SVGA_3D_CMD_SET_VERTEX_STREAMS = 1138, + SVGA_3D_CMD_SET_VERTEX_DECLS = 1139, + SVGA_3D_CMD_SET_VERTEX_DIVISORS = 1140, + SVGA_3D_CMD_DRAW = 1141, + SVGA_3D_CMD_DRAW_INDEXED = 1142, + + SVGA_3D_CMD_DX_MIN = 1143, + SVGA_3D_CMD_DX_DEFINE_CONTEXT = 1143, + SVGA_3D_CMD_DX_DESTROY_CONTEXT = 1144, + SVGA_3D_CMD_DX_BIND_CONTEXT = 1145, + SVGA_3D_CMD_DX_READBACK_CONTEXT = 1146, + SVGA_3D_CMD_DX_INVALIDATE_CONTEXT = 1147, + SVGA_3D_CMD_DX_SET_SINGLE_CONSTANT_BUFFER = 1148, + SVGA_3D_CMD_DX_SET_SHADER_RESOURCES = 1149, + SVGA_3D_CMD_DX_SET_SHADER = 1150, + SVGA_3D_CMD_DX_SET_SAMPLERS = 1151, + SVGA_3D_CMD_DX_DRAW = 1152, + SVGA_3D_CMD_DX_DRAW_INDEXED = 1153, + SVGA_3D_CMD_DX_DRAW_INSTANCED = 1154, + SVGA_3D_CMD_DX_DRAW_INDEXED_INSTANCED = 1155, + SVGA_3D_CMD_DX_DRAW_AUTO = 1156, + SVGA_3D_CMD_DX_SET_INPUT_LAYOUT = 1157, + SVGA_3D_CMD_DX_SET_VERTEX_BUFFERS = 1158, + SVGA_3D_CMD_DX_SET_INDEX_BUFFER = 1159, + SVGA_3D_CMD_DX_SET_TOPOLOGY = 1160, + SVGA_3D_CMD_DX_SET_RENDERTARGETS = 1161, + SVGA_3D_CMD_DX_SET_BLEND_STATE = 1162, + SVGA_3D_CMD_DX_SET_DEPTHSTENCIL_STATE = 1163, + SVGA_3D_CMD_DX_SET_RASTERIZER_STATE = 1164, + SVGA_3D_CMD_DX_DEFINE_QUERY = 1165, + SVGA_3D_CMD_DX_DESTROY_QUERY = 1166, + SVGA_3D_CMD_DX_BIND_QUERY = 1167, + SVGA_3D_CMD_DX_SET_QUERY_OFFSET = 1168, + SVGA_3D_CMD_DX_BEGIN_QUERY = 1169, + SVGA_3D_CMD_DX_END_QUERY = 1170, + SVGA_3D_CMD_DX_READBACK_QUERY = 1171, + SVGA_3D_CMD_DX_SET_PREDICATION = 1172, + SVGA_3D_CMD_DX_SET_SOTARGETS = 1173, + SVGA_3D_CMD_DX_SET_VIEWPORTS = 1174, + SVGA_3D_CMD_DX_SET_SCISSORRECTS = 1175, + SVGA_3D_CMD_DX_CLEAR_RENDERTARGET_VIEW = 1176, + SVGA_3D_CMD_DX_CLEAR_DEPTHSTENCIL_VIEW = 1177, + SVGA_3D_CMD_DX_PRED_COPY_REGION = 1178, + SVGA_3D_CMD_DX_PRED_COPY = 1179, + SVGA_3D_CMD_DX_PRESENTBLT = 1180, + SVGA_3D_CMD_DX_GENMIPS = 1181, + SVGA_3D_CMD_DX_UPDATE_SUBRESOURCE = 1182, + SVGA_3D_CMD_DX_READBACK_SUBRESOURCE = 1183, + SVGA_3D_CMD_DX_INVALIDATE_SUBRESOURCE = 1184, + SVGA_3D_CMD_DX_DEFINE_SHADERRESOURCE_VIEW = 1185, + SVGA_3D_CMD_DX_DESTROY_SHADERRESOURCE_VIEW = 1186, + SVGA_3D_CMD_DX_DEFINE_RENDERTARGET_VIEW = 1187, + SVGA_3D_CMD_DX_DESTROY_RENDERTARGET_VIEW = 1188, + SVGA_3D_CMD_DX_DEFINE_DEPTHSTENCIL_VIEW = 1189, + SVGA_3D_CMD_DX_DESTROY_DEPTHSTENCIL_VIEW = 1190, + SVGA_3D_CMD_DX_DEFINE_ELEMENTLAYOUT = 1191, + SVGA_3D_CMD_DX_DESTROY_ELEMENTLAYOUT = 1192, + SVGA_3D_CMD_DX_DEFINE_BLEND_STATE = 1193, + SVGA_3D_CMD_DX_DESTROY_BLEND_STATE = 1194, + SVGA_3D_CMD_DX_DEFINE_DEPTHSTENCIL_STATE = 1195, + SVGA_3D_CMD_DX_DESTROY_DEPTHSTENCIL_STATE = 1196, + SVGA_3D_CMD_DX_DEFINE_RASTERIZER_STATE = 1197, + SVGA_3D_CMD_DX_DESTROY_RASTERIZER_STATE = 1198, + SVGA_3D_CMD_DX_DEFINE_SAMPLER_STATE = 1199, + SVGA_3D_CMD_DX_DESTROY_SAMPLER_STATE = 1200, + SVGA_3D_CMD_DX_DEFINE_SHADER = 1201, + SVGA_3D_CMD_DX_DESTROY_SHADER = 1202, + SVGA_3D_CMD_DX_BIND_SHADER = 1203, + SVGA_3D_CMD_DX_DEFINE_STREAMOUTPUT = 1204, + SVGA_3D_CMD_DX_DESTROY_STREAMOUTPUT = 1205, + SVGA_3D_CMD_DX_SET_STREAMOUTPUT = 1206, + SVGA_3D_CMD_DX_SET_COTABLE = 1207, + SVGA_3D_CMD_DX_READBACK_COTABLE = 1208, + SVGA_3D_CMD_DX_BUFFER_COPY = 1209, + SVGA_3D_CMD_DX_TRANSFER_FROM_BUFFER = 1210, + SVGA_3D_CMD_DX_SURFACE_COPY_AND_READBACK = 1211, + SVGA_3D_CMD_DX_MOVE_QUERY = 1212, + SVGA_3D_CMD_DX_BIND_ALL_QUERY = 1213, + SVGA_3D_CMD_DX_READBACK_ALL_QUERY = 1214, + SVGA_3D_CMD_DX_PRED_TRANSFER_FROM_BUFFER = 1215, + SVGA_3D_CMD_DX_MOB_FENCE_64 = 1216, + SVGA_3D_CMD_DX_BIND_ALL_SHADER = 1217, + SVGA_3D_CMD_DX_HINT = 1218, + SVGA_3D_CMD_DX_BUFFER_UPDATE = 1219, + SVGA_3D_CMD_DX_SET_VS_CONSTANT_BUFFER_OFFSET = 1220, + SVGA_3D_CMD_DX_SET_PS_CONSTANT_BUFFER_OFFSET = 1221, + SVGA_3D_CMD_DX_SET_GS_CONSTANT_BUFFER_OFFSET = 1222, + SVGA_3D_CMD_DX_SET_HS_CONSTANT_BUFFER_OFFSET = 1223, + SVGA_3D_CMD_DX_SET_DS_CONSTANT_BUFFER_OFFSET = 1224, + SVGA_3D_CMD_DX_SET_CS_CONSTANT_BUFFER_OFFSET = 1225, + + SVGA_3D_CMD_DX_COND_BIND_ALL_SHADER = 1226, + SVGA_3D_CMD_DX_MAX = 1227, + + SVGA_3D_CMD_SCREEN_COPY = 1227, + + SVGA_3D_CMD_RESERVED1 = 1228, + SVGA_3D_CMD_RESERVED2 = 1229, + SVGA_3D_CMD_RESERVED3 = 1230, + SVGA_3D_CMD_RESERVED4 = 1231, + SVGA_3D_CMD_RESERVED5 = 1232, + SVGA_3D_CMD_RESERVED6 = 1233, + SVGA_3D_CMD_RESERVED7 = 1234, + SVGA_3D_CMD_RESERVED8 = 1235, + + SVGA_3D_CMD_GROW_OTABLE = 1236, + SVGA_3D_CMD_DX_GROW_COTABLE = 1237, + SVGA_3D_CMD_INTRA_SURFACE_COPY = 1238, + + SVGA_3D_CMD_DEFINE_GB_SURFACE_V3 = 1239, + + SVGA_3D_CMD_DX_RESOLVE_COPY = 1240, + SVGA_3D_CMD_DX_PRED_RESOLVE_COPY = 1241, + SVGA_3D_CMD_DX_PRED_CONVERT_REGION = 1242, + SVGA_3D_CMD_DX_PRED_CONVERT = 1243, + SVGA_3D_CMD_WHOLE_SURFACE_COPY = 1244, + + SVGA_3D_CMD_DX_DEFINE_UA_VIEW = 1245, + SVGA_3D_CMD_DX_DESTROY_UA_VIEW = 1246, + SVGA_3D_CMD_DX_CLEAR_UA_VIEW_UINT = 1247, + SVGA_3D_CMD_DX_CLEAR_UA_VIEW_FLOAT = 1248, + SVGA_3D_CMD_DX_COPY_STRUCTURE_COUNT = 1249, + SVGA_3D_CMD_DX_SET_UA_VIEWS = 1250, + + SVGA_3D_CMD_DX_DRAW_INDEXED_INSTANCED_INDIRECT = 1251, + SVGA_3D_CMD_DX_DRAW_INSTANCED_INDIRECT = 1252, + SVGA_3D_CMD_DX_DISPATCH = 1253, + SVGA_3D_CMD_DX_DISPATCH_INDIRECT = 1254, + + SVGA_3D_CMD_WRITE_ZERO_SURFACE = 1255, + SVGA_3D_CMD_UPDATE_ZERO_SURFACE = 1256, + SVGA_3D_CMD_DX_TRANSFER_TO_BUFFER = 1257, + SVGA_3D_CMD_DX_SET_STRUCTURE_COUNT = 1258, + + SVGA_3D_CMD_LOGICOPS_BITBLT = 1259, + SVGA_3D_CMD_LOGICOPS_TRANSBLT = 1260, + SVGA_3D_CMD_LOGICOPS_STRETCHBLT = 1261, + SVGA_3D_CMD_LOGICOPS_COLORFILL = 1262, + SVGA_3D_CMD_LOGICOPS_ALPHABLEND = 1263, + SVGA_3D_CMD_LOGICOPS_CLEARTYPEBLEND = 1264, + + SVGA_3D_CMD_DX_COPY_COTABLE_INTO_MOB = 1265, + + SVGA_3D_CMD_UPDATE_GB_SCREENTARGET_V2 = 1266, + + SVGA_3D_CMD_DEFINE_GB_SURFACE_V4 = 1267, + SVGA_3D_CMD_DX_SET_CS_UA_VIEWS = 1268, + SVGA_3D_CMD_DX_SET_MIN_LOD = 1269, + + SVGA_3D_CMD_DX_DEFINE_DEPTHSTENCIL_VIEW_V2 = 1272, + SVGA_3D_CMD_DX_DEFINE_STREAMOUTPUT_WITH_MOB = 1273, + SVGA_3D_CMD_DX_SET_SHADER_IFACE = 1274, + SVGA_3D_CMD_DX_BIND_STREAMOUTPUT = 1275, + SVGA_3D_CMD_SURFACE_STRETCHBLT_NON_MS_TO_MS = 1276, + SVGA_3D_CMD_DX_BIND_SHADER_IFACE = 1277, + + SVGA_3D_CMD_UPDATE_GB_SCREENTARGET_MOVE = 1278, + + SVGA_3D_CMD_DX_PRED_STAGING_COPY = 1281, + SVGA_3D_CMD_DX_STAGING_COPY = 1282, + SVGA_3D_CMD_DX_PRED_STAGING_COPY_REGION = 1283, + SVGA_3D_CMD_DX_SET_VERTEX_BUFFERS_V2 = 1284, + SVGA_3D_CMD_DX_SET_INDEX_BUFFER_V2 = 1285, + SVGA_3D_CMD_DX_SET_VERTEX_BUFFERS_OFFSET_AND_SIZE = 1286, + SVGA_3D_CMD_DX_SET_INDEX_BUFFER_OFFSET_AND_SIZE = 1287, + SVGA_3D_CMD_DX_DEFINE_RASTERIZER_STATE_V2 = 1288, + SVGA_3D_CMD_DX_PRED_STAGING_CONVERT_REGION = 1289, + SVGA_3D_CMD_DX_PRED_STAGING_CONVERT = 1290, + SVGA_3D_CMD_DX_STAGING_BUFFER_COPY = 1291, + + SVGA_3D_CMD_MAX = 1303, + SVGA_3D_CMD_FUTURE_MAX = 3000 } SVGAFifo3dCmdId; #define SVGA_NUM_3D_CMD (SVGA_3D_CMD_MAX - SVGA_3D_CMD_BASE) -/* - * FIFO command format definitions: - */ - -/* - * The data size header following cmdNum for every 3d command - */ -typedef -#include "vmware_pack_begin.h" -struct { - uint32 id; - uint32 size; -} -#include "vmware_pack_end.h" -SVGA3dCmdHeader; - -typedef -#include "vmware_pack_begin.h" -struct { - uint32 numMipLevels; -} -#include "vmware_pack_end.h" -SVGA3dSurfaceFace; - -typedef -#include "vmware_pack_begin.h" -struct { - uint32 sid; - SVGA3dSurface1Flags surfaceFlags; - SVGA3dSurfaceFormat format; - - /* - * If surfaceFlags has SVGA3D_SURFACE_CUBEMAP bit set, all SVGA3dSurfaceFace - * structures must have the same value of numMipLevels field. - * Otherwise, all but the first SVGA3dSurfaceFace structures must have the - * numMipLevels set to 0. - */ - SVGA3dSurfaceFace face[SVGA3D_MAX_SURFACE_FACES]; - - /* - * Followed by an SVGA3dSize structure for each mip level in each face. - * - * A note on surface sizes: Sizes are always specified in pixels, - * even if the true surface size is not a multiple of the minimum - * block size of the surface's format. For example, a 3x3x1 DXT1 - * compressed texture would actually be stored as a 4x4x1 image in - * memory. - */ -} -#include "vmware_pack_end.h" -SVGA3dCmdDefineSurface; /* SVGA_3D_CMD_SURFACE_DEFINE */ - -typedef -#include "vmware_pack_begin.h" -struct { - uint32 sid; - SVGA3dSurface1Flags surfaceFlags; - SVGA3dSurfaceFormat format; - - /* - * If surfaceFlags has SVGA3D_SURFACE_CUBEMAP bit set, all SVGA3dSurfaceFace - * structures must have the same value of numMipLevels field. - * Otherwise, all but the first SVGA3dSurfaceFace structures must have the - * numMipLevels set to 0. - */ - SVGA3dSurfaceFace face[SVGA3D_MAX_SURFACE_FACES]; - uint32 multisampleCount; - SVGA3dTextureFilter autogenFilter; - - /* - * Followed by an SVGA3dSize structure for each mip level in each face. - * - * A note on surface sizes: Sizes are always specified in pixels, - * even if the true surface size is not a multiple of the minimum - * block size of the surface's format. For example, a 3x3x1 DXT1 - * compressed texture would actually be stored as a 4x4x1 image in - * memory. - */ -} -#include "vmware_pack_end.h" -SVGA3dCmdDefineSurface_v2; /* SVGA_3D_CMD_SURFACE_DEFINE_V2 */ - -typedef -#include "vmware_pack_begin.h" -struct { - uint32 sid; -} -#include "vmware_pack_end.h" -SVGA3dCmdDestroySurface; /* SVGA_3D_CMD_SURFACE_DESTROY */ - -typedef -#include "vmware_pack_begin.h" -struct { - uint32 cid; -} -#include "vmware_pack_end.h" -SVGA3dCmdDefineContext; /* SVGA_3D_CMD_CONTEXT_DEFINE */ - -typedef -#include "vmware_pack_begin.h" -struct { - uint32 cid; -} -#include "vmware_pack_end.h" -SVGA3dCmdDestroyContext; /* SVGA_3D_CMD_CONTEXT_DESTROY */ - -typedef -#include "vmware_pack_begin.h" -struct { - uint32 cid; - SVGA3dClearFlag clearFlag; - uint32 color; - float depth; - uint32 stencil; - /* Followed by variable number of SVGA3dRect structures */ -} -#include "vmware_pack_end.h" -SVGA3dCmdClear; /* SVGA_3D_CMD_CLEAR */ - -typedef -#include "vmware_pack_begin.h" -struct { - SVGA3dLightType type; - SVGA3dBool inWorldSpace; - float diffuse[4]; - float specular[4]; - float ambient[4]; - float position[4]; - float direction[4]; - float range; - float falloff; - float attenuation0; - float attenuation1; - float attenuation2; - float theta; - float phi; -} -#include "vmware_pack_end.h" -SVGA3dLightData; - -typedef -#include "vmware_pack_begin.h" -struct { - uint32 sid; - /* Followed by variable number of SVGA3dCopyRect structures */ -} -#include "vmware_pack_end.h" -SVGA3dCmdPresent; /* SVGA_3D_CMD_PRESENT */ - -typedef -#include "vmware_pack_begin.h" -struct { - SVGA3dRenderStateName state; - union { - uint32 uintValue; - float floatValue; - }; -} -#include "vmware_pack_end.h" -SVGA3dRenderState; - -typedef -#include "vmware_pack_begin.h" -struct { - uint32 cid; - /* Followed by variable number of SVGA3dRenderState structures */ -} -#include "vmware_pack_end.h" -SVGA3dCmdSetRenderState; /* SVGA_3D_CMD_SETRENDERSTATE */ - -typedef -#include "vmware_pack_begin.h" -struct { - uint32 cid; - SVGA3dRenderTargetType type; - SVGA3dSurfaceImageId target; -} -#include "vmware_pack_end.h" -SVGA3dCmdSetRenderTarget; /* SVGA_3D_CMD_SETRENDERTARGET */ - -typedef -#include "vmware_pack_begin.h" -struct { - SVGA3dSurfaceImageId src; - SVGA3dSurfaceImageId dest; - /* Followed by variable number of SVGA3dCopyBox structures */ -} -#include "vmware_pack_end.h" -SVGA3dCmdSurfaceCopy; /* SVGA_3D_CMD_SURFACE_COPY */ +#pragma pack(push, 1) +typedef struct { + uint32 id; + uint32 size; +} SVGA3dCmdHeader; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct { + uint32 numMipLevels; +} SVGA3dSurfaceFace; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct { + uint32 sid; + SVGA3dSurface1Flags surfaceFlags; + SVGA3dSurfaceFormat format; + + SVGA3dSurfaceFace face[SVGA3D_MAX_SURFACE_FACES]; + +} SVGA3dCmdDefineSurface; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct { + uint32 sid; + SVGA3dSurface1Flags surfaceFlags; + SVGA3dSurfaceFormat format; + + SVGA3dSurfaceFace face[SVGA3D_MAX_SURFACE_FACES]; + uint32 multisampleCount; + SVGA3dTextureFilter autogenFilter; + +} SVGA3dCmdDefineSurface_v2; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct { + uint32 sid; +} SVGA3dCmdDestroySurface; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct { + uint32 cid; +} SVGA3dCmdDefineContext; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct { + uint32 cid; +} SVGA3dCmdDestroyContext; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct { + uint32 cid; + SVGA3dClearFlag clearFlag; + uint32 color; + float depth; + uint32 stencil; + +} SVGA3dCmdClear; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct { + SVGA3dLightType type; + SVGA3dBool inWorldSpace; + float diffuse[4]; + float specular[4]; + float ambient[4]; + float position[4]; + float direction[4]; + float range; + float falloff; + float attenuation0; + float attenuation1; + float attenuation2; + float theta; + float phi; +} SVGA3dLightData; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct { + uint32 sid; + +} SVGA3dCmdPresent; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct { + SVGA3dRenderStateName state; + union { + uint32 uintValue; + float floatValue; + }; +} SVGA3dRenderState; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct { + uint32 cid; + +} SVGA3dCmdSetRenderState; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct { + uint32 cid; + SVGA3dRenderTargetType type; + SVGA3dSurfaceImageId target; +} SVGA3dCmdSetRenderTarget; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct { + SVGA3dSurfaceImageId src; + SVGA3dSurfaceImageId dest; + +} SVGA3dCmdSurfaceCopy; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct { + SVGA3dSurfaceImageId surface; + SVGA3dCopyBox box; +} SVGA3dCmdIntraSurfaceCopy; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct { + uint32 srcSid; + uint32 destSid; +} SVGA3dCmdWholeSurfaceCopy; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct { + SVGA3dSurfaceImageId src; + SVGA3dSurfaceImageId dest; + SVGA3dBox boxSrc; + SVGA3dBox boxDest; +} SVGA3dCmdSurfaceStretchBltNonMSToMS; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct { + SVGA3dSurfaceImageId src; + SVGA3dSurfaceImageId dest; + SVGA3dBox boxSrc; + SVGA3dBox boxDest; + SVGA3dStretchBltMode mode; +} SVGA3dCmdSurfaceStretchBlt; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct { + uint32 discard : 1; + + uint32 unsynchronized : 1; + + uint32 reserved : 30; +} SVGA3dSurfaceDMAFlags; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct { + SVGAGuestImage guest; + SVGA3dSurfaceImageId host; + SVGA3dTransferType transfer; + +} SVGA3dCmdSurfaceDMA; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct { + uint32 suffixSize; + + uint32 maximumOffset; + + SVGA3dSurfaceDMAFlags flags; +} SVGA3dCmdSurfaceDMASuffix; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct { + uint32 first; + uint32 last; +} SVGA3dArrayRangeHint; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct { + uint32 surfaceId; + uint32 offset; + uint32 stride; +} SVGA3dArray; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct { + SVGA3dDeclType type; + SVGA3dDeclMethod method; + SVGA3dDeclUsage usage; + uint32 usageIndex; +} SVGA3dVertexArrayIdentity; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct SVGA3dVertexDecl { + SVGA3dVertexArrayIdentity identity; + SVGA3dArray array; + SVGA3dArrayRangeHint rangeHint; +} SVGA3dVertexDecl; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct SVGA3dPrimitiveRange { + SVGA3dPrimitiveType primType; + uint32 primitiveCount; + + SVGA3dArray indexArray; + uint32 indexWidth; + + int32 indexBias; +} SVGA3dPrimitiveRange; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct { + uint32 cid; + uint32 numVertexDecls; + uint32 numRanges; + +} SVGA3dCmdDrawPrimitives; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct { + uint32 cid; + + uint32 primitiveCount; + uint32 startVertexLocation; + + uint8 primitiveType; + uint8 padding[3]; +} SVGA3dCmdDraw; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct { + uint32 cid; + + uint8 primitiveType; + + uint32 indexBufferSid; + uint32 indexBufferOffset; + + uint8 indexBufferStride; + + int32 baseVertexLocation; + + uint32 primitiveCount; + uint32 pad0; + uint16 pad1; +} SVGA3dCmdDrawIndexed; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct { + uint16 streamOffset; + uint8 stream; + uint8 type; + uint8 method; + uint8 usage; + uint8 usageIndex; + uint8 padding; + +} SVGA3dVertexElement; +#pragma pack(pop) -/* - * Perform a surface copy within the same image. - * The src/dest boxes are allowed to overlap. - */ -typedef -#include "vmware_pack_begin.h" -struct { - SVGA3dSurfaceImageId surface; - SVGA3dCopyBox box; -} -#include "vmware_pack_end.h" -SVGA3dCmdIntraSurfaceCopy; /* SVGA_3D_CMD_INTRA_SURFACE_COPY */ - -typedef -#include "vmware_pack_begin.h" -struct { - uint32 srcSid; - uint32 destSid; -} -#include "vmware_pack_end.h" -SVGA3dCmdWholeSurfaceCopy; /* SVGA_3D_CMD_WHOLE_SURFACE_COPY */ - -typedef -#include "vmware_pack_begin.h" -struct { - SVGA3dSurfaceImageId src; - SVGA3dSurfaceImageId dest; - SVGA3dBox boxSrc; - SVGA3dBox boxDest; -} -#include "vmware_pack_end.h" -SVGA3dCmdSurfaceStretchBltNonMSToMS; -/* SVGA_3D_CMD_SURFACE_STRETCHBLT_NON_MS_TO_MS */ - -typedef -#include "vmware_pack_begin.h" -struct { - SVGA3dSurfaceImageId src; - SVGA3dSurfaceImageId dest; - SVGA3dBox boxSrc; - SVGA3dBox boxDest; - SVGA3dStretchBltMode mode; -} -#include "vmware_pack_end.h" -SVGA3dCmdSurfaceStretchBlt; /* SVGA_3D_CMD_SURFACE_STRETCHBLT */ - -typedef -#include "vmware_pack_begin.h" -struct { - /* - * If the discard flag is present in a surface DMA operation, the host may - * discard the contents of the current mipmap level and face of the target - * surface before applying the surface DMA contents. - */ - uint32 discard : 1; - - /* - * If the unsynchronized flag is present, the host may perform this upload - * without syncing to pending reads on this surface. - */ - uint32 unsynchronized : 1; - - /* - * Guests *MUST* set the reserved bits to 0 before submitting the command - * suffix as future flags may occupy these bits. - */ - uint32 reserved : 30; -} -#include "vmware_pack_end.h" -SVGA3dSurfaceDMAFlags; - -typedef -#include "vmware_pack_begin.h" -struct { - SVGAGuestImage guest; - SVGA3dSurfaceImageId host; - SVGA3dTransferType transfer; - - /* - * Followed by variable number of SVGA3dCopyBox structures. For consistency - * in all clipping logic and coordinate translation, we define the - * "source" in each copyBox as the guest image and the - * "destination" as the host image, regardless of transfer - * direction. - * - * For efficiency, the SVGA3D device is free to copy more data than - * specified. For example, it may round copy boxes outwards such - * that they lie on particular alignment boundaries. - */ -} -#include "vmware_pack_end.h" -SVGA3dCmdSurfaceDMA; /* SVGA_3D_CMD_SURFACE_DMA */ - -/* - * SVGA3dCmdSurfaceDMASuffix -- - * - * This is a command suffix that will appear after a SurfaceDMA command in - * the FIFO. It contains some extra information that hosts may use to - * optimize performance or protect the guest. This suffix exists to preserve - * backwards compatibility while also allowing for new functionality to be - * implemented. - */ - -typedef -#include "vmware_pack_begin.h" -struct { - uint32 suffixSize; - - /* - * The maximum offset is used to determine the maximum offset from the - * guestPtr base address that will be accessed or written to during this - * surfaceDMA. If the suffix is supported, the host will respect this - * boundary while performing surface DMAs. - * - * Defaults to MAX_UINT32 - */ - uint32 maximumOffset; - - /* - * A set of flags that describes optimizations that the host may perform - * while performing this surface DMA operation. The guest should never rely - * on behaviour that is different when these flags are set for correctness. - * - * Defaults to 0 - */ - SVGA3dSurfaceDMAFlags flags; -} -#include "vmware_pack_end.h" -SVGA3dCmdSurfaceDMASuffix; - -/* - * SVGA_3D_CMD_DRAW_PRIMITIVES -- - * - * This command is the SVGA3D device's generic drawing entry point. - * It can draw multiple ranges of primitives, optionally using an - * index buffer, using an arbitrary collection of vertex buffers. - * - * Each SVGA3dVertexDecl defines a distinct vertex array to bind - * during this draw call. The declarations specify which surface - * the vertex data lives in, what that vertex data is used for, - * and how to interpret it. - * - * Each SVGA3dPrimitiveRange defines a collection of primitives - * to render using the same vertex arrays. An index buffer is - * optional. - */ - -typedef -#include "vmware_pack_begin.h" -struct { - /* - * A range hint is an optional specification for the range of indices - * in an SVGA3dArray that will be used. If 'last' is zero, it is assumed - * that the entire array will be used. - * - * These are only hints. The SVGA3D device may use them for - * performance optimization if possible, but it's also allowed to - * ignore these values. - */ - uint32 first; - uint32 last; -} -#include "vmware_pack_end.h" -SVGA3dArrayRangeHint; - -typedef -#include "vmware_pack_begin.h" -struct { - /* - * Define the origin and shape of a vertex or index array. Both - * 'offset' and 'stride' are in bytes. The provided surface will be - * reinterpreted as a flat array of bytes in the same format used - * by surface DMA operations. To avoid unnecessary conversions, the - * surface should be created with the SVGA3D_BUFFER format. - * - * Index 0 in the array starts 'offset' bytes into the surface. - * Index 1 begins at byte 'offset + stride', etc. Array indices may - * not be negative. - */ - uint32 surfaceId; - uint32 offset; - uint32 stride; -} -#include "vmware_pack_end.h" -SVGA3dArray; - -typedef -#include "vmware_pack_begin.h" -struct { - /* - * Describe a vertex array's data type, and define how it is to be - * used by the fixed function pipeline or the vertex shader. It - * isn't useful to have two VertexDecls with the same - * VertexArrayIdentity in one draw call. - */ - SVGA3dDeclType type; - SVGA3dDeclMethod method; - SVGA3dDeclUsage usage; - uint32 usageIndex; -} -#include "vmware_pack_end.h" -SVGA3dVertexArrayIdentity; - -typedef -#include "vmware_pack_begin.h" -struct SVGA3dVertexDecl { - SVGA3dVertexArrayIdentity identity; - SVGA3dArray array; - SVGA3dArrayRangeHint rangeHint; -} -#include "vmware_pack_end.h" -SVGA3dVertexDecl; - -typedef -#include "vmware_pack_begin.h" -struct SVGA3dPrimitiveRange { - /* - * Define a group of primitives to render, from sequential indices. - * - * The value of 'primitiveType' and 'primitiveCount' imply the - * total number of vertices that will be rendered. - */ - SVGA3dPrimitiveType primType; - uint32 primitiveCount; - - /* - * Optional index buffer. If indexArray.surfaceId is - * SVGA3D_INVALID_ID, we render without an index buffer. Rendering - * without an index buffer is identical to rendering with an index - * buffer containing the sequence [0, 1, 2, 3, ...]. - * - * If an index buffer is in use, indexWidth specifies the width in - * bytes of each index value. It must be less than or equal to - * indexArray.stride. - * - * (Currently, the SVGA3D device requires index buffers to be tightly - * packed. In other words, indexWidth == indexArray.stride) - */ - SVGA3dArray indexArray; - uint32 indexWidth; - - /* - * Optional index bias. This number is added to all indices from - * indexArray before they are used as vertex array indices. This - * can be used in multiple ways: - * - * - When not using an indexArray, this bias can be used to - * specify where in the vertex arrays to begin rendering. - * - * - A positive number here is equivalent to increasing the - * offset in each vertex array. - * - * - A negative number can be used to render using a small - * vertex array and an index buffer that contains large - * values. This may be used by some applications that - * crop a vertex buffer without modifying their index - * buffer. - * - * Note that rendering with a negative bias value may be slower and - * use more memory than rendering with a positive or zero bias. - */ - int32 indexBias; -} -#include "vmware_pack_end.h" -SVGA3dPrimitiveRange; - -typedef -#include "vmware_pack_begin.h" -struct { - uint32 cid; - uint32 numVertexDecls; - uint32 numRanges; - - /* - * There are two variable size arrays after the - * SVGA3dCmdDrawPrimitives structure. In order, - * they are: - * - * 1. SVGA3dVertexDecl, quantity 'numVertexDecls', but no more than - * SVGA3D_MAX_VERTEX_ARRAYS; - * 2. SVGA3dPrimitiveRange, quantity 'numRanges', but no more than - * SVGA3D_MAX_DRAW_PRIMITIVE_RANGES; - * 3. Optionally, SVGA3dVertexDivisor, quantity 'numVertexDecls' (contains - * the frequency divisor for the corresponding vertex decl). - */ -} -#include "vmware_pack_end.h" -SVGA3dCmdDrawPrimitives; /* SVGA_3D_CMD_DRAWPRIMITIVES */ - -typedef -#include "vmware_pack_begin.h" -struct { - uint32 cid; - - uint32 primitiveCount; /* How many primitives to render */ - uint32 startVertexLocation; /* Which vertex do we start rendering at. */ - - uint8 primitiveType; /* SVGA3dPrimitiveType */ - uint8 padding[3]; -} -#include "vmware_pack_end.h" -SVGA3dCmdDraw; - -typedef -#include "vmware_pack_begin.h" -struct { - uint32 cid; - - uint8 primitiveType; /* SVGA3dPrimitiveType */ - - uint32 indexBufferSid; /* Valid index buffer sid. */ - uint32 indexBufferOffset; /* Byte offset into the vertex buffer, almost */ - /* always 0 for pre SM guests, non-zero for OpenGL */ - /* guests. We can't represent non-multiple of */ - /* stride offsets in D3D9Renderer... */ - uint8 indexBufferStride; /* Allowable values = 1, 2, or 4 */ - - int32 baseVertexLocation; /* Bias applied to the index when selecting a */ - /* vertex from the streams, may be negative */ - - uint32 primitiveCount; /* How many primitives to render */ - uint32 pad0; - uint16 pad1; -} -#include "vmware_pack_end.h" -SVGA3dCmdDrawIndexed; - -typedef -#include "vmware_pack_begin.h" -struct { - /* - * Describe a vertex array's data type, and define how it is to be - * used by the fixed function pipeline or the vertex shader. It - * isn't useful to have two VertexDecls with the same - * VertexArrayIdentity in one draw call. - */ - uint16 streamOffset; - uint8 stream; - uint8 type; /* SVGA3dDeclType */ - uint8 method; /* SVGA3dDeclMethod */ - uint8 usage; /* SVGA3dDeclUsage */ - uint8 usageIndex; - uint8 padding; - -} -#include "vmware_pack_end.h" -SVGA3dVertexElement; - -/* - * Should the vertex element respect the stream value? The high bit of the - * stream should be set to indicate that the stream should be respected. If - * the high bit is not set, the stream will be ignored and replaced by the index - * of the position of the currently considered vertex element. - * - * All guests should set this bit and correctly specify the stream going - * forward. - */ #define SVGA3D_VERTEX_ELEMENT_RESPECT_STREAM (1 << 7) -typedef -#include "vmware_pack_begin.h" -struct { - uint32 cid; - - uint32 numElements; - - /* - * Followed by numElements SVGA3dVertexElement structures. - * - * If numElements < SVGA3D_MAX_VERTEX_ARRAYS, the remaining elements - * are cleared and will not be used by following draws. - */ -} -#include "vmware_pack_end.h" -SVGA3dCmdSetVertexDecls; - -typedef -#include "vmware_pack_begin.h" -struct { - uint32 sid; - uint32 stride; - uint32 offset; -} -#include "vmware_pack_end.h" -SVGA3dVertexStream; - -typedef -#include "vmware_pack_begin.h" -struct { - uint32 cid; - - uint32 numStreams; - /* - * Followed by numStream SVGA3dVertexStream structures. - * - * If numStreams < SVGA3D_MAX_VERTEX_ARRAYS, the remaining streams - * are cleared and will not be used by following draws. - */ -} -#include "vmware_pack_end.h" -SVGA3dCmdSetVertexStreams; - -typedef -#include "vmware_pack_begin.h" -struct { - uint32 cid; - uint32 numDivisors; -} -#include "vmware_pack_end.h" -SVGA3dCmdSetVertexDivisors; - -typedef -#include "vmware_pack_begin.h" -struct { - uint32 stage; - SVGA3dTextureStateName name; - union { - uint32 value; - float floatValue; - }; -} -#include "vmware_pack_end.h" -SVGA3dTextureState; - -typedef -#include "vmware_pack_begin.h" -struct { - uint32 cid; - /* Followed by variable number of SVGA3dTextureState structures */ -} -#include "vmware_pack_end.h" -SVGA3dCmdSetTextureState; /* SVGA_3D_CMD_SETTEXTURESTATE */ - -typedef -#include "vmware_pack_begin.h" -struct { - uint32 cid; - SVGA3dTransformType type; - float matrix[16]; -} -#include "vmware_pack_end.h" -SVGA3dCmdSetTransform; /* SVGA_3D_CMD_SETTRANSFORM */ - -typedef -#include "vmware_pack_begin.h" -struct { - float min; - float max; -} -#include "vmware_pack_end.h" -SVGA3dZRange; - -typedef -#include "vmware_pack_begin.h" -struct { - uint32 cid; - SVGA3dZRange zRange; -} -#include "vmware_pack_end.h" -SVGA3dCmdSetZRange; /* SVGA_3D_CMD_SETZRANGE */ - -typedef -#include "vmware_pack_begin.h" -struct { - float diffuse[4]; - float ambient[4]; - float specular[4]; - float emissive[4]; - float shininess; -} -#include "vmware_pack_end.h" -SVGA3dMaterial; - -typedef -#include "vmware_pack_begin.h" -struct { - uint32 cid; - SVGA3dFace face; - SVGA3dMaterial material; -} -#include "vmware_pack_end.h" -SVGA3dCmdSetMaterial; /* SVGA_3D_CMD_SETMATERIAL */ - -typedef -#include "vmware_pack_begin.h" -struct { - uint32 cid; - uint32 index; - SVGA3dLightData data; -} -#include "vmware_pack_end.h" -SVGA3dCmdSetLightData; /* SVGA_3D_CMD_SETLIGHTDATA */ - -typedef -#include "vmware_pack_begin.h" -struct { - uint32 cid; - uint32 index; - uint32 enabled; -} -#include "vmware_pack_end.h" -SVGA3dCmdSetLightEnabled; /* SVGA_3D_CMD_SETLIGHTENABLED */ - -typedef -#include "vmware_pack_begin.h" -struct { - uint32 cid; - SVGA3dRect rect; -} -#include "vmware_pack_end.h" -SVGA3dCmdSetViewport; /* SVGA_3D_CMD_SETVIEWPORT */ - -typedef -#include "vmware_pack_begin.h" -struct { - uint32 cid; - SVGA3dRect rect; -} -#include "vmware_pack_end.h" -SVGA3dCmdSetScissorRect; /* SVGA_3D_CMD_SETSCISSORRECT */ - -typedef -#include "vmware_pack_begin.h" -struct { - uint32 cid; - uint32 index; - float plane[4]; -} -#include "vmware_pack_end.h" -SVGA3dCmdSetClipPlane; /* SVGA_3D_CMD_SETCLIPPLANE */ - -typedef -#include "vmware_pack_begin.h" -struct { - uint32 cid; - uint32 shid; - SVGA3dShaderType type; - /* Followed by variable number of DWORDs for shader bycode */ -} -#include "vmware_pack_end.h" -SVGA3dCmdDefineShader; /* SVGA_3D_CMD_SHADER_DEFINE */ - -typedef -#include "vmware_pack_begin.h" -struct { - uint32 cid; - uint32 shid; - SVGA3dShaderType type; -} -#include "vmware_pack_end.h" -SVGA3dCmdDestroyShader; /* SVGA_3D_CMD_SHADER_DESTROY */ - -typedef -#include "vmware_pack_begin.h" -struct { - uint32 cid; - uint32 reg; /* register number */ - SVGA3dShaderType type; - SVGA3dShaderConstType ctype; - uint32 values[4]; - - /* - * Followed by a variable number of additional values. - */ -} -#include "vmware_pack_end.h" -SVGA3dCmdSetShaderConst; /* SVGA_3D_CMD_SET_SHADER_CONST */ - -typedef -#include "vmware_pack_begin.h" -struct { - uint32 cid; - SVGA3dShaderType type; - uint32 shid; -} -#include "vmware_pack_end.h" -SVGA3dCmdSetShader; /* SVGA_3D_CMD_SET_SHADER */ - -typedef -#include "vmware_pack_begin.h" -struct { - uint32 cid; - SVGA3dQueryType type; -} -#include "vmware_pack_end.h" -SVGA3dCmdBeginQuery; /* SVGA_3D_CMD_BEGIN_QUERY */ - -typedef -#include "vmware_pack_begin.h" -struct { - uint32 cid; - SVGA3dQueryType type; - SVGAGuestPtr guestResult; /* Points to an SVGA3dQueryResult structure */ -} -#include "vmware_pack_end.h" -SVGA3dCmdEndQuery; /* SVGA_3D_CMD_END_QUERY */ - - -/* - * SVGA3D_CMD_WAIT_FOR_QUERY -- - * - * Will read the SVGA3dQueryResult structure pointed to by guestResult, - * and if the state member is set to anything else than - * SVGA3D_QUERYSTATE_PENDING, this command will always be a no-op. - * - * Otherwise, in addition to the query explicitly waited for, - * All queries with the same type and issued with the same cid, for which - * an SVGA_3D_CMD_END_QUERY command has previously been sent, will - * be finished after execution of this command. - * - * A query will be identified by the gmrId and offset of the guestResult - * member. If the device can't find an SVGA_3D_CMD_END_QUERY that has - * been sent previously with an indentical gmrId and offset, it will - * effectively end all queries with an identical type issued with the - * same cid, and the SVGA3dQueryResult structure pointed to by - * guestResult will not be written to. This property can be used to - * implement a query barrier for a given cid and query type. - */ - -typedef -#include "vmware_pack_begin.h" -struct { - uint32 cid; /* Same parameters passed to END_QUERY */ - SVGA3dQueryType type; - SVGAGuestPtr guestResult; -} -#include "vmware_pack_end.h" -SVGA3dCmdWaitForQuery; /* SVGA_3D_CMD_WAIT_FOR_QUERY */ - -typedef -#include "vmware_pack_begin.h" -struct { - uint32 totalSize; /* Set by guest before query is ended. */ - SVGA3dQueryState state; /* Set by host or guest. See SVGA3dQueryState. */ - union { /* Set by host on exit from PENDING state */ - uint32 result32; - uint32 queryCookie; /* May be used to identify which QueryGetData this - result corresponds to. */ - }; -} -#include "vmware_pack_end.h" -SVGA3dQueryResult; - - -/* - * SVGA_3D_CMD_BLIT_SURFACE_TO_SCREEN -- - * - * This is a blit from an SVGA3D surface to a Screen Object. - * This blit must be directed at a specific screen. - * - * The blit copies from a rectangular region of an SVGA3D surface - * image to a rectangular region of a screen. - * - * This command takes an optional variable-length list of clipping - * rectangles after the body of the command. If no rectangles are - * specified, there is no clipping region. The entire destRect is - * drawn to. If one or more rectangles are included, they describe - * a clipping region. The clip rectangle coordinates are measured - * relative to the top-left corner of destRect. - * - * The srcImage must be from mip=0 face=0. - * - * This supports scaling if the src and dest are of different sizes. - * - * Availability: - * SVGA_FIFO_CAP_SCREEN_OBJECT - */ - -typedef -#include "vmware_pack_begin.h" -struct { - SVGA3dSurfaceImageId srcImage; - SVGASignedRect srcRect; - uint32 destScreenId; /* Screen Object ID */ - SVGASignedRect destRect; - /* Clipping: zero or more SVGASignedRects follow */ -} -#include "vmware_pack_end.h" -SVGA3dCmdBlitSurfaceToScreen; /* SVGA_3D_CMD_BLIT_SURFACE_TO_SCREEN */ - -typedef -#include "vmware_pack_begin.h" -struct { - uint32 sid; - SVGA3dTextureFilter filter; -} -#include "vmware_pack_end.h" -SVGA3dCmdGenerateMipmaps; /* SVGA_3D_CMD_GENERATE_MIPMAPS */ - -typedef -#include "vmware_pack_begin.h" -struct { - uint32 sid; -} -#include "vmware_pack_end.h" -SVGA3dCmdActivateSurface; /* SVGA_3D_CMD_ACTIVATE_SURFACE */ - -typedef -#include "vmware_pack_begin.h" -struct { - uint32 sid; -} -#include "vmware_pack_end.h" -SVGA3dCmdDeactivateSurface; /* SVGA_3D_CMD_DEACTIVATE_SURFACE */ - -/* - * Screen DMA command - * - * Available with SVGA_FIFO_CAP_SCREEN_OBJECT_2. The SVGA_CAP_3D device - * cap bit is not required. - * - * - refBuffer and destBuffer are 32bit BGRX; refBuffer and destBuffer could - * be different, but it is required that guest makes sure refBuffer has - * exactly the same contents that were written to when last time screen DMA - * command is received by host. - * - * - changemap is generated by lib/blit, and it has the changes from last - * received screen DMA or more. - */ - -typedef -#include "vmware_pack_begin.h" -struct SVGA3dCmdScreenDMA { - uint32 screenId; - SVGAGuestImage refBuffer; - SVGAGuestImage destBuffer; - SVGAGuestImage changeMap; -} -#include "vmware_pack_end.h" -SVGA3dCmdScreenDMA; /* SVGA_3D_CMD_SCREEN_DMA */ - -/* - * Logic ops - */ - -#define SVGA3D_LOTRANSBLT_HONORALPHA (0x01) -#define SVGA3D_LOSTRETCHBLT_MIRRORX (0x01) -#define SVGA3D_LOSTRETCHBLT_MIRRORY (0x02) -#define SVGA3D_LOALPHABLEND_SRCHASALPHA (0x01) - -typedef -#include "vmware_pack_begin.h" -struct SVGA3dCmdLogicOpsBitBlt { - /* - * All LogicOps surfaces are one-level - * surfaces so mipmap & face should always - * be zero. - */ - SVGA3dSurfaceImageId src; - SVGA3dSurfaceImageId dst; - SVGA3dLogicOp logicOp; - SVGA3dLogicOpRop3 logicOpRop3; - /* Followed by variable number of SVGA3dCopyBox structures */ -} -#include "vmware_pack_end.h" -SVGA3dCmdLogicOpsBitBlt; /* SVGA_3D_CMD_LOGICOPS_BITBLT */ - - -typedef -#include "vmware_pack_begin.h" -struct SVGA3dCmdLogicOpsTransBlt { - /* - * All LogicOps surfaces are one-level - * surfaces so mipmap & face should always - * be zero. - */ - SVGA3dSurfaceImageId src; - SVGA3dSurfaceImageId dst; - uint32 color; - uint32 flags; - SVGA3dBox srcBox; - SVGA3dSignedBox dstBox; - SVGA3dBox clipBox; -} -#include "vmware_pack_end.h" -SVGA3dCmdLogicOpsTransBlt; /* SVGA_3D_CMD_LOGICOPS_TRANSBLT */ - - -typedef -#include "vmware_pack_begin.h" -struct SVGA3dCmdLogicOpsStretchBlt { - /* - * All LogicOps surfaces are one-level - * surfaces so mipmap & face should always - * be zero. - */ - SVGA3dSurfaceImageId src; - SVGA3dSurfaceImageId dst; - uint16 mode; - uint16 flags; - SVGA3dBox srcBox; - SVGA3dSignedBox dstBox; - SVGA3dBox clipBox; -} -#include "vmware_pack_end.h" -SVGA3dCmdLogicOpsStretchBlt; /* SVGA_3D_CMD_LOGICOPS_STRETCHBLT */ - - -typedef -#include "vmware_pack_begin.h" -struct SVGA3dCmdLogicOpsColorFill { - /* - * All LogicOps surfaces are one-level - * surfaces so mipmap & face should always - * be zero. - */ - SVGA3dSurfaceImageId dst; - uint32 color; - SVGA3dLogicOp logicOp; - SVGA3dLogicOpRop3 logicOpRop3; - /* Followed by variable number of SVGA3dRect structures. */ -} -#include "vmware_pack_end.h" -SVGA3dCmdLogicOpsColorFill; /* SVGA_3D_CMD_LOGICOPS_COLORFILL */ - - -typedef -#include "vmware_pack_begin.h" -struct SVGA3dCmdLogicOpsAlphaBlend { - /* - * All LogicOps surfaces are one-level - * surfaces so mipmap & face should always - * be zero. - */ - SVGA3dSurfaceImageId src; - SVGA3dSurfaceImageId dst; - uint32 alphaVal; - uint32 flags; - SVGA3dBox srcBox; - SVGA3dSignedBox dstBox; - SVGA3dBox clipBox; -} -#include "vmware_pack_end.h" -SVGA3dCmdLogicOpsAlphaBlend; /* SVGA_3D_CMD_LOGICOPS_ALPHABLEND */ +#pragma pack(push, 1) +typedef struct { + uint32 cid; + + uint32 numElements; + +} SVGA3dCmdSetVertexDecls; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct { + uint32 sid; + uint32 stride; + uint32 offset; +} SVGA3dVertexStream; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct { + uint32 cid; + + uint32 numStreams; + +} SVGA3dCmdSetVertexStreams; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct { + uint32 cid; + uint32 numDivisors; +} SVGA3dCmdSetVertexDivisors; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct { + uint32 stage; + SVGA3dTextureStateName name; + union { + uint32 value; + float floatValue; + }; +} SVGA3dTextureState; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct { + uint32 cid; + +} SVGA3dCmdSetTextureState; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct { + uint32 cid; + SVGA3dTransformType type; + float matrix[16]; +} SVGA3dCmdSetTransform; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct { + float min; + float max; +} SVGA3dZRange; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct { + uint32 cid; + SVGA3dZRange zRange; +} SVGA3dCmdSetZRange; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct { + float diffuse[4]; + float ambient[4]; + float specular[4]; + float emissive[4]; + float shininess; +} SVGA3dMaterial; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct { + uint32 cid; + SVGA3dFace face; + SVGA3dMaterial material; +} SVGA3dCmdSetMaterial; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct { + uint32 cid; + uint32 index; + SVGA3dLightData data; +} SVGA3dCmdSetLightData; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct { + uint32 cid; + uint32 index; + uint32 enabled; +} SVGA3dCmdSetLightEnabled; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct { + uint32 cid; + SVGA3dRect rect; +} SVGA3dCmdSetViewport; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct { + uint32 cid; + SVGA3dRect rect; +} SVGA3dCmdSetScissorRect; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct { + uint32 cid; + uint32 index; + float plane[4]; +} SVGA3dCmdSetClipPlane; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct { + uint32 cid; + uint32 shid; + SVGA3dShaderType type; + +} SVGA3dCmdDefineShader; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct { + uint32 cid; + uint32 shid; + SVGA3dShaderType type; +} SVGA3dCmdDestroyShader; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct { + uint32 cid; + uint32 reg; + SVGA3dShaderType type; + SVGA3dShaderConstType ctype; + uint32 values[4]; + +} SVGA3dCmdSetShaderConst; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct { + uint32 cid; + SVGA3dShaderType type; + uint32 shid; +} SVGA3dCmdSetShader; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct { + uint32 cid; + SVGA3dQueryType type; +} SVGA3dCmdBeginQuery; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct { + uint32 cid; + SVGA3dQueryType type; + SVGAGuestPtr guestResult; +} SVGA3dCmdEndQuery; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct { + uint32 cid; + SVGA3dQueryType type; + SVGAGuestPtr guestResult; +} SVGA3dCmdWaitForQuery; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct { + uint32 totalSize; + SVGA3dQueryState state; + union { + uint32 result32; + uint32 queryCookie; + }; +} SVGA3dQueryResult; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct { + SVGA3dSurfaceImageId srcImage; + SVGASignedRect srcRect; + uint32 destScreenId; + SVGASignedRect destRect; + +} SVGA3dCmdBlitSurfaceToScreen; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct { + uint32 sid; + SVGA3dTextureFilter filter; +} SVGA3dCmdGenerateMipmaps; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct { + uint32 sid; +} SVGA3dCmdActivateSurface; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct { + uint32 sid; +} SVGA3dCmdDeactivateSurface; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct SVGA3dCmdScreenDMA { + uint32 screenId; + SVGAGuestImage refBuffer; + SVGAGuestImage destBuffer; + SVGAGuestImage changeMap; +} SVGA3dCmdScreenDMA; +#pragma pack(pop) + +#define SVGA3D_LOTRANSBLT_HONORALPHA (0x01) +#define SVGA3D_LOSTRETCHBLT_MIRRORX (0x01) +#define SVGA3D_LOSTRETCHBLT_MIRRORY (0x02) +#define SVGA3D_LOALPHABLEND_SRCHASALPHA (0x01) + +#pragma pack(push, 1) +typedef struct SVGA3dCmdLogicOpsBitBlt { + SVGA3dSurfaceImageId src; + SVGA3dSurfaceImageId dst; + SVGA3dLogicOp logicOp; + SVGA3dLogicOpRop3 logicOpRop3; + +} SVGA3dCmdLogicOpsBitBlt; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct SVGA3dCmdLogicOpsTransBlt { + SVGA3dSurfaceImageId src; + SVGA3dSurfaceImageId dst; + uint32 color; + uint32 flags; + SVGA3dBox srcBox; + SVGA3dSignedBox dstBox; + SVGA3dBox clipBox; +} SVGA3dCmdLogicOpsTransBlt; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct SVGA3dCmdLogicOpsStretchBlt { + SVGA3dSurfaceImageId src; + SVGA3dSurfaceImageId dst; + uint16 mode; + uint16 flags; + SVGA3dBox srcBox; + SVGA3dSignedBox dstBox; + SVGA3dBox clipBox; +} SVGA3dCmdLogicOpsStretchBlt; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct SVGA3dCmdLogicOpsColorFill { + SVGA3dSurfaceImageId dst; + uint32 color; + SVGA3dLogicOp logicOp; + SVGA3dLogicOpRop3 logicOpRop3; + +} SVGA3dCmdLogicOpsColorFill; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct SVGA3dCmdLogicOpsAlphaBlend { + SVGA3dSurfaceImageId src; + SVGA3dSurfaceImageId dst; + uint32 alphaVal; + uint32 flags; + SVGA3dBox srcBox; + SVGA3dSignedBox dstBox; + SVGA3dBox clipBox; +} SVGA3dCmdLogicOpsAlphaBlend; +#pragma pack(pop) #define SVGA3D_CLEARTYPE_INVALID_GAMMA_INDEX 0xFFFFFFFF -#define SVGA3D_CLEARTYPE_GAMMA_WIDTH 512 +#define SVGA3D_CLEARTYPE_GAMMA_WIDTH 512 #define SVGA3D_CLEARTYPE_GAMMA_HEIGHT 16 -typedef -#include "vmware_pack_begin.h" -struct SVGA3dCmdLogicOpsClearTypeBlend { - /* - * All LogicOps surfaces are one-level - * surfaces so mipmap & face should always - * be zero. - */ - SVGA3dSurfaceImageId tmp; - SVGA3dSurfaceImageId dst; - SVGA3dSurfaceImageId gammaSurf; - SVGA3dSurfaceImageId alphaSurf; - uint32 gamma; - uint32 color; - uint32 color2; - int32 alphaOffsetX; - int32 alphaOffsetY; - /* Followed by variable number of SVGA3dBox structures */ -} -#include "vmware_pack_end.h" -SVGA3dCmdLogicOpsClearTypeBlend; /* SVGA_3D_CMD_LOGICOPS_CLEARTYPEBLEND */ - - -/* - * Guest-backed objects definitions. - */ - -typedef -#include "vmware_pack_begin.h" -struct { - SVGAMobFormat ptDepth; - uint32 sizeInBytes; - PPN64 base; -} -#include "vmware_pack_end.h" -SVGAOTableMobEntry; -#define SVGA3D_OTABLE_MOB_ENTRY_SIZE (sizeof(SVGAOTableMobEntry)) - -typedef -#include "vmware_pack_begin.h" -struct { - SVGA3dSurfaceFormat format; - SVGA3dSurface1Flags surface1Flags; - uint32 numMipLevels; - uint32 multisampleCount; - SVGA3dTextureFilter autogenFilter; - SVGA3dSize size; - SVGAMobId mobid; - uint32 arraySize; - uint32 mobPitch; - SVGA3dSurface2Flags surface2Flags; - uint8 multisamplePattern; - uint8 qualityLevel; - uint16 bufferByteStride; - float minLOD; - uint32 pad0[2]; -} -#include "vmware_pack_end.h" -SVGAOTableSurfaceEntry; -#define SVGA3D_OTABLE_SURFACE_ENTRY_SIZE (sizeof(SVGAOTableSurfaceEntry)) - -typedef -#include "vmware_pack_begin.h" -struct { - uint32 cid; - SVGAMobId mobid; -} -#include "vmware_pack_end.h" -SVGAOTableContextEntry; -#define SVGA3D_OTABLE_CONTEXT_ENTRY_SIZE (sizeof(SVGAOTableContextEntry)) - -typedef -#include "vmware_pack_begin.h" -struct { - SVGA3dShaderType type; - uint32 sizeInBytes; - uint32 offsetInBytes; - SVGAMobId mobid; -} -#include "vmware_pack_end.h" -SVGAOTableShaderEntry; -#define SVGA3D_OTABLE_SHADER_ENTRY_SIZE (sizeof(SVGAOTableShaderEntry)) - -#define SVGA_STFLAG_PRIMARY (1 << 0) -#define SVGA_STFLAG_RESERVED (1 << 1) /* Added with cap SVGA_CAP_HP_CMD_QUEUE */ +#pragma pack(push, 1) +typedef struct SVGA3dCmdLogicOpsClearTypeBlend { + SVGA3dSurfaceImageId tmp; + SVGA3dSurfaceImageId dst; + SVGA3dSurfaceImageId gammaSurf; + SVGA3dSurfaceImageId alphaSurf; + uint32 gamma; + uint32 color; + uint32 color2; + int32 alphaOffsetX; + int32 alphaOffsetY; + +} SVGA3dCmdLogicOpsClearTypeBlend; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct { + SVGAMobFormat ptDepth; + uint32 sizeInBytes; + PPN64 base; +} SVGAOTableMobEntry; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct { + SVGA3dSurfaceFormat format; + SVGA3dSurface1Flags surface1Flags; + uint32 numMipLevels; + uint32 multisampleCount; + SVGA3dTextureFilter autogenFilter; + SVGA3dSize size; + SVGAMobId mobid; + uint32 arraySize; + uint32 mobPitch; + SVGA3dSurface2Flags surface2Flags; + uint8 multisamplePattern; + uint8 qualityLevel; + uint16 bufferByteStride; + float minLOD; + uint32 pad0[2]; +} SVGAOTableSurfaceEntry; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct { + uint32 cid; + SVGAMobId mobid; +} SVGAOTableContextEntry; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct { + SVGA3dShaderType type; + uint32 sizeInBytes; + uint32 offsetInBytes; + SVGAMobId mobid; +} SVGAOTableShaderEntry; +#pragma pack(pop) + +#define SVGA_STFLAG_PRIMARY (1 << 0) +#define SVGA_STFLAG_RESERVED (1 << 1) typedef uint32 SVGAScreenTargetFlags; -typedef -#include "vmware_pack_begin.h" -struct { - SVGA3dSurfaceImageId image; - uint32 width; - uint32 height; - int32 xRoot; - int32 yRoot; - SVGAScreenTargetFlags flags; - uint32 dpi; - uint32 pad[7]; -} -#include "vmware_pack_end.h" -SVGAOTableScreenTargetEntry; -#define SVGA3D_OTABLE_SCREEN_TARGET_ENTRY_SIZE \ - (sizeof(SVGAOTableScreenTargetEntry)) - -typedef -#include "vmware_pack_begin.h" -struct { - float value[4]; -} -#include "vmware_pack_end.h" -SVGA3dShaderConstFloat; - -typedef -#include "vmware_pack_begin.h" -struct { - int32 value[4]; -} -#include "vmware_pack_end.h" -SVGA3dShaderConstInt; - -typedef -#include "vmware_pack_begin.h" -struct { - uint32 value; -} -#include "vmware_pack_end.h" -SVGA3dShaderConstBool; - -typedef -#include "vmware_pack_begin.h" -struct { - uint16 streamOffset; - uint8 stream; - uint8 type; - uint8 methodUsage; - uint8 usageIndex; -} -#include "vmware_pack_end.h" -SVGAGBVertexElement; - -typedef -#include "vmware_pack_begin.h" -struct { - uint32 sid; - uint16 stride; - uint32 offset; -} -#include "vmware_pack_end.h" -SVGAGBVertexStream; -typedef -#include "vmware_pack_begin.h" -struct { - SVGA3dRect viewport; - SVGA3dRect scissorRect; - SVGA3dZRange zRange; - - SVGA3dSurfaceImageId renderTargets[SVGA3D_RT_MAX]; - SVGAGBVertexElement decl1[4]; - - uint32 renderStates[SVGA3D_RS_MAX]; - SVGAGBVertexElement decl2[18]; - uint32 pad0[2]; - - struct { - SVGA3dFace face; - SVGA3dMaterial material; - } material; - - float clipPlanes[SVGA3D_NUM_CLIPPLANES][4]; - float matrices[SVGA3D_TRANSFORM_MAX][16]; - - SVGA3dBool lightEnabled[SVGA3D_NUM_LIGHTS]; - SVGA3dLightData lightData[SVGA3D_NUM_LIGHTS]; - - /* - * Shaders currently bound - */ - uint32 shaders[SVGA3D_NUM_SHADERTYPE_PREDX]; - SVGAGBVertexElement decl3[10]; - uint32 pad1[3]; - - uint32 occQueryActive; - uint32 occQueryValue; - - /* - * Int/Bool Shader constants - */ - SVGA3dShaderConstInt pShaderIValues[SVGA3D_CONSTINTREG_MAX]; - SVGA3dShaderConstInt vShaderIValues[SVGA3D_CONSTINTREG_MAX]; - uint16 pShaderBValues; - uint16 vShaderBValues; - - - SVGAGBVertexStream streams[SVGA3D_MAX_VERTEX_ARRAYS]; - SVGA3dVertexDivisor divisors[SVGA3D_MAX_VERTEX_ARRAYS]; - uint32 numVertexDecls; - uint32 numVertexStreams; - uint32 numVertexDivisors; - uint32 pad2[30]; - - /* - * Texture Stages - * - * SVGA3D_TS_INVALID through SVGA3D_TS_CONSTANT are in the - * textureStages array. - * SVGA3D_TS_COLOR_KEY is in tsColorKey. - */ - uint32 tsColorKey[SVGA3D_NUM_TEXTURE_UNITS]; - uint32 textureStages[SVGA3D_NUM_TEXTURE_UNITS][SVGA3D_TS_CONSTANT + 1]; - uint32 tsColorKeyEnable[SVGA3D_NUM_TEXTURE_UNITS]; - - /* - * Float Shader constants. - */ - SVGA3dShaderConstFloat pShaderFValues[SVGA3D_CONSTREG_MAX]; - SVGA3dShaderConstFloat vShaderFValues[SVGA3D_CONSTREG_MAX]; -} -#include "vmware_pack_end.h" -SVGAGBContextData; -#define SVGA3D_CONTEXT_DATA_SIZE (sizeof(SVGAGBContextData)) - -/* - * SVGA3dCmdSetOTableBase -- - * - * This command allows the guest to specify the base PPN of the - * specified object table. - */ - -typedef -#include "vmware_pack_begin.h" -struct { - SVGAOTableType type; - PPN32 baseAddress; - uint32 sizeInBytes; - uint32 validSizeInBytes; - SVGAMobFormat ptDepth; -} -#include "vmware_pack_end.h" -SVGA3dCmdSetOTableBase; /* SVGA_3D_CMD_SET_OTABLE_BASE */ - -typedef -#include "vmware_pack_begin.h" -struct { - SVGAOTableType type; - PPN64 baseAddress; - uint32 sizeInBytes; - uint32 validSizeInBytes; - SVGAMobFormat ptDepth; -} -#include "vmware_pack_end.h" -SVGA3dCmdSetOTableBase64; /* SVGA_3D_CMD_SET_OTABLE_BASE64 */ - -/* - * Guests using SVGA_3D_CMD_GROW_OTABLE are promising that - * the new OTable contains the same contents as the old one, except possibly - * for some new invalid entries at the end. - * - * (Otherwise, guests should use one of the SetOTableBase commands.) - */ -typedef -#include "vmware_pack_begin.h" -struct { - SVGAOTableType type; - PPN64 baseAddress; - uint32 sizeInBytes; - uint32 validSizeInBytes; - SVGAMobFormat ptDepth; -} -#include "vmware_pack_end.h" -SVGA3dCmdGrowOTable; /* SVGA_3D_CMD_GROW_OTABLE */ - -typedef -#include "vmware_pack_begin.h" -struct { - SVGAOTableType type; -} -#include "vmware_pack_end.h" -SVGA3dCmdReadbackOTable; /* SVGA_3D_CMD_READBACK_OTABLE */ - -/* - * Define a memory object (Mob) in the OTable. - */ - -typedef -#include "vmware_pack_begin.h" -struct SVGA3dCmdDefineGBMob { - SVGAMobId mobid; - SVGAMobFormat ptDepth; - PPN32 base; - uint32 sizeInBytes; -} -#include "vmware_pack_end.h" -SVGA3dCmdDefineGBMob; /* SVGA_3D_CMD_DEFINE_GB_MOB */ - - -/* - * Destroys an object in the OTable. - */ - -typedef -#include "vmware_pack_begin.h" -struct SVGA3dCmdDestroyGBMob { - SVGAMobId mobid; -} -#include "vmware_pack_end.h" -SVGA3dCmdDestroyGBMob; /* SVGA_3D_CMD_DESTROY_GB_MOB */ - -/* - * Define a memory object (Mob) in the OTable with a PPN64 base. - */ - -typedef -#include "vmware_pack_begin.h" -struct SVGA3dCmdDefineGBMob64 { - SVGAMobId mobid; - SVGAMobFormat ptDepth; - PPN64 base; - uint32 sizeInBytes; -} -#include "vmware_pack_end.h" -SVGA3dCmdDefineGBMob64; /* SVGA_3D_CMD_DEFINE_GB_MOB64 */ - -/* - * Redefine an object in the OTable with PPN64 base. - */ - -typedef -#include "vmware_pack_begin.h" -struct SVGA3dCmdRedefineGBMob64 { - SVGAMobId mobid; - SVGAMobFormat ptDepth; - PPN64 base; - uint32 sizeInBytes; -} -#include "vmware_pack_end.h" -SVGA3dCmdRedefineGBMob64; /* SVGA_3D_CMD_REDEFINE_GB_MOB64 */ - -/* - * Notification that the page tables have been modified. - */ - -typedef -#include "vmware_pack_begin.h" -struct SVGA3dCmdUpdateGBMobMapping { - SVGAMobId mobid; -} -#include "vmware_pack_end.h" -SVGA3dCmdUpdateGBMobMapping; /* SVGA_3D_CMD_UPDATE_GB_MOB_MAPPING */ - -/* - * Define a guest-backed surface. - */ - -typedef -#include "vmware_pack_begin.h" -struct SVGA3dCmdDefineGBSurface { - uint32 sid; - SVGA3dSurface1Flags surfaceFlags; - SVGA3dSurfaceFormat format; - uint32 numMipLevels; - uint32 multisampleCount; - SVGA3dTextureFilter autogenFilter; - SVGA3dSize size; -} -#include "vmware_pack_end.h" -SVGA3dCmdDefineGBSurface; /* SVGA_3D_CMD_DEFINE_GB_SURFACE */ - -/* - * Defines a guest-backed surface, adding the arraySize field. - */ -typedef -#include "vmware_pack_begin.h" -struct SVGA3dCmdDefineGBSurface_v2 { - uint32 sid; - SVGA3dSurface1Flags surfaceFlags; - SVGA3dSurfaceFormat format; - uint32 numMipLevels; - uint32 multisampleCount; - SVGA3dTextureFilter autogenFilter; - SVGA3dSize size; - uint32 arraySize; - uint32 pad; -} -#include "vmware_pack_end.h" -SVGA3dCmdDefineGBSurface_v2; /* SVGA_3D_CMD_DEFINE_GB_SURFACE_V2 */ - -/* - * Defines a guest-backed surface, adding the larger flags. - */ -typedef -#include "vmware_pack_begin.h" -struct SVGA3dCmdDefineGBSurface_v3 { - uint32 sid; - SVGA3dSurfaceAllFlags surfaceFlags; - SVGA3dSurfaceFormat format; - uint32 numMipLevels; - uint32 multisampleCount; - SVGA3dMSPattern multisamplePattern; - SVGA3dMSQualityLevel qualityLevel; - SVGA3dTextureFilter autogenFilter; - SVGA3dSize size; - uint32 arraySize; -} -#include "vmware_pack_end.h" -SVGA3dCmdDefineGBSurface_v3; /* SVGA_3D_CMD_DEFINE_GB_SURFACE_V3 */ - -/* - * Defines a guest-backed surface, adding buffer byte stride. - */ -typedef -#include "vmware_pack_begin.h" -struct SVGA3dCmdDefineGBSurface_v4 { - uint32 sid; - SVGA3dSurfaceAllFlags surfaceFlags; - SVGA3dSurfaceFormat format; - uint32 numMipLevels; - uint32 multisampleCount; - SVGA3dMSPattern multisamplePattern; - SVGA3dMSQualityLevel qualityLevel; - SVGA3dTextureFilter autogenFilter; - SVGA3dSize size; - uint32 arraySize; - uint32 bufferByteStride; -} -#include "vmware_pack_end.h" -SVGA3dCmdDefineGBSurface_v4; /* SVGA_3D_CMD_DEFINE_GB_SURFACE_V4 */ - -/* - * Destroy a guest-backed surface. - */ - -typedef -#include "vmware_pack_begin.h" -struct SVGA3dCmdDestroyGBSurface { - uint32 sid; -} -#include "vmware_pack_end.h" -SVGA3dCmdDestroyGBSurface; /* SVGA_3D_CMD_DESTROY_GB_SURFACE */ - -/* - * Bind a guest-backed surface to a mob. - */ - -typedef -#include "vmware_pack_begin.h" -struct SVGA3dCmdBindGBSurface { - uint32 sid; - SVGAMobId mobid; -} -#include "vmware_pack_end.h" -SVGA3dCmdBindGBSurface; /* SVGA_3D_CMD_BIND_GB_SURFACE */ - -typedef -#include "vmware_pack_begin.h" -struct SVGA3dCmdBindGBSurfaceWithPitch { - uint32 sid; - SVGAMobId mobid; - uint32 baseLevelPitch; -} -#include "vmware_pack_end.h" -SVGA3dCmdBindGBSurfaceWithPitch; /* SVGA_3D_CMD_BIND_GB_SURFACE_WITH_PITCH */ - -/* - * Conditionally bind a mob to a guest-backed surface if testMobid - * matches the currently bound mob. Optionally issue a - * readback/update on the surface while it is still bound to the old - * mobid if the mobid is changed by this command. - */ +#pragma pack(push, 1) +typedef struct { + SVGA3dSurfaceImageId image; + uint32 width; + uint32 height; + int32 xRoot; + int32 yRoot; + SVGAScreenTargetFlags flags; + uint32 dpi; + uint32 pad[7]; +} SVGAOTableScreenTargetEntry; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct { + float value[4]; +} SVGA3dShaderConstFloat; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct { + int32 value[4]; +} SVGA3dShaderConstInt; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct { + uint32 value; +} SVGA3dShaderConstBool; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct { + uint16 streamOffset; + uint8 stream; + uint8 type; + uint8 methodUsage; + uint8 usageIndex; +} SVGAGBVertexElement; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct { + uint32 sid; + uint16 stride; + uint32 offset; +} SVGAGBVertexStream; +#pragma pack(pop) +#pragma pack(push, 1) +typedef struct { + SVGA3dRect viewport; + SVGA3dRect scissorRect; + SVGA3dZRange zRange; + + SVGA3dSurfaceImageId renderTargets[SVGA3D_RT_MAX]; + SVGAGBVertexElement decl1[4]; + + uint32 renderStates[SVGA3D_RS_MAX]; + SVGAGBVertexElement decl2[18]; + uint32 pad0[2]; + + struct { + SVGA3dFace face; + SVGA3dMaterial material; + } material; + + float clipPlanes[SVGA3D_MAX_CLIP_PLANES][4]; + float matrices[SVGA3D_TRANSFORM_MAX][16]; + + SVGA3dBool lightEnabled[SVGA3D_NUM_LIGHTS]; + SVGA3dLightData lightData[SVGA3D_NUM_LIGHTS]; + + uint32 shaders[SVGA3D_NUM_SHADERTYPE_PREDX]; + SVGAGBVertexElement decl3[10]; + uint32 pad1[3]; + + uint32 occQueryActive; + uint32 occQueryValue; + + SVGA3dShaderConstInt pShaderIValues[SVGA3D_CONSTINTREG_MAX]; + SVGA3dShaderConstInt vShaderIValues[SVGA3D_CONSTINTREG_MAX]; + uint16 pShaderBValues; + uint16 vShaderBValues; + + SVGAGBVertexStream streams[SVGA3D_MAX_VERTEX_ARRAYS]; + SVGA3dVertexDivisor divisors[SVGA3D_MAX_VERTEX_ARRAYS]; + uint32 numVertexDecls; + uint32 numVertexStreams; + uint32 numVertexDivisors; + uint32 pad2[30]; + + uint32 tsColorKey[SVGA3D_NUM_TEXTURE_UNITS]; + uint32 textureStages[SVGA3D_NUM_TEXTURE_UNITS][SVGA3D_TS_CONSTANT + 1]; + uint32 tsColorKeyEnable[SVGA3D_NUM_TEXTURE_UNITS]; + + SVGA3dShaderConstFloat pShaderFValues[SVGA3D_CONSTREG_MAX]; + SVGA3dShaderConstFloat vShaderFValues[SVGA3D_CONSTREG_MAX]; +} SVGAGBContextData; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct { + SVGAOTableType type; + PPN32 baseAddress; + uint32 sizeInBytes; + uint32 validSizeInBytes; + SVGAMobFormat ptDepth; +} SVGA3dCmdSetOTableBase; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct { + SVGAOTableType type; + PPN64 baseAddress; + uint32 sizeInBytes; + uint32 validSizeInBytes; + SVGAMobFormat ptDepth; +} SVGA3dCmdSetOTableBase64; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct { + SVGAOTableType type; + PPN64 baseAddress; + uint32 sizeInBytes; + uint32 validSizeInBytes; + SVGAMobFormat ptDepth; +} SVGA3dCmdGrowOTable; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct { + SVGAOTableType type; +} SVGA3dCmdReadbackOTable; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct SVGA3dCmdDefineGBMob { + SVGAMobId mobid; + SVGAMobFormat ptDepth; + PPN32 base; + uint32 sizeInBytes; +} SVGA3dCmdDefineGBMob; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct SVGA3dCmdDestroyGBMob { + SVGAMobId mobid; +} SVGA3dCmdDestroyGBMob; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct SVGA3dCmdDefineGBMob64 { + SVGAMobId mobid; + SVGAMobFormat ptDepth; + PPN64 base; + uint32 sizeInBytes; +} SVGA3dCmdDefineGBMob64; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct SVGA3dCmdRedefineGBMob64 { + SVGAMobId mobid; + SVGAMobFormat ptDepth; + PPN64 base; + uint32 sizeInBytes; +} SVGA3dCmdRedefineGBMob64; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct SVGA3dCmdUpdateGBMobMapping { + SVGAMobId mobid; +} SVGA3dCmdUpdateGBMobMapping; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct SVGA3dCmdDefineGBSurface { + uint32 sid; + SVGA3dSurface1Flags surfaceFlags; + SVGA3dSurfaceFormat format; + uint32 numMipLevels; + uint32 multisampleCount; + SVGA3dTextureFilter autogenFilter; + SVGA3dSize size; +} SVGA3dCmdDefineGBSurface; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct SVGA3dCmdDefineGBSurface_v2 { + uint32 sid; + SVGA3dSurface1Flags surfaceFlags; + SVGA3dSurfaceFormat format; + uint32 numMipLevels; + uint32 multisampleCount; + SVGA3dTextureFilter autogenFilter; + SVGA3dSize size; + uint32 arraySize; + uint32 pad; +} SVGA3dCmdDefineGBSurface_v2; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct SVGA3dCmdDefineGBSurface_v3 { + uint32 sid; + SVGA3dSurfaceAllFlags surfaceFlags; + SVGA3dSurfaceFormat format; + uint32 numMipLevels; + uint32 multisampleCount; + SVGA3dMSPattern multisamplePattern; + SVGA3dMSQualityLevel qualityLevel; + SVGA3dTextureFilter autogenFilter; + SVGA3dSize size; + uint32 arraySize; +} SVGA3dCmdDefineGBSurface_v3; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct SVGA3dCmdDefineGBSurface_v4 { + uint32 sid; + SVGA3dSurfaceAllFlags surfaceFlags; + SVGA3dSurfaceFormat format; + uint32 numMipLevels; + uint32 multisampleCount; + SVGA3dMSPattern multisamplePattern; + SVGA3dMSQualityLevel qualityLevel; + SVGA3dTextureFilter autogenFilter; + SVGA3dSize size; + uint32 arraySize; + uint32 bufferByteStride; +} SVGA3dCmdDefineGBSurface_v4; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct SVGA3dCmdDestroyGBSurface { + uint32 sid; +} SVGA3dCmdDestroyGBSurface; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct SVGA3dCmdBindGBSurface { + uint32 sid; + SVGAMobId mobid; +} SVGA3dCmdBindGBSurface; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct SVGA3dCmdBindGBSurfaceWithPitch { + uint32 sid; + SVGAMobId mobid; + uint32 baseLevelPitch; +} SVGA3dCmdBindGBSurfaceWithPitch; +#pragma pack(pop) #define SVGA3D_COND_BIND_GB_SURFACE_FLAG_READBACK (1 << 0) -#define SVGA3D_COND_BIND_GB_SURFACE_FLAG_UPDATE (1 << 1) - -typedef -#include "vmware_pack_begin.h" -struct SVGA3dCmdCondBindGBSurface { - uint32 sid; - SVGAMobId testMobid; - SVGAMobId mobid; - uint32 flags; -} -#include "vmware_pack_end.h" -SVGA3dCmdCondBindGBSurface; /* SVGA_3D_CMD_COND_BIND_GB_SURFACE */ - -/* - * Update an image in a guest-backed surface. - * (Inform the device that the guest-contents have been updated.) - */ - -typedef -#include "vmware_pack_begin.h" -struct SVGA3dCmdUpdateGBImage { - SVGA3dSurfaceImageId image; - SVGA3dBox box; -} -#include "vmware_pack_end.h" -SVGA3dCmdUpdateGBImage; /* SVGA_3D_CMD_UPDATE_GB_IMAGE */ - -/* - * Update an entire guest-backed surface. - * (Inform the device that the guest-contents have been updated.) - */ - -typedef -#include "vmware_pack_begin.h" -struct SVGA3dCmdUpdateGBSurface { - uint32 sid; -} -#include "vmware_pack_end.h" -SVGA3dCmdUpdateGBSurface; /* SVGA_3D_CMD_UPDATE_GB_SURFACE */ - -/* - * Readback an image in a guest-backed surface. - * (Request the device to flush the dirty contents into the guest.) - */ - -typedef -#include "vmware_pack_begin.h" -struct SVGA3dCmdReadbackGBImage { - SVGA3dSurfaceImageId image; -} -#include "vmware_pack_end.h" -SVGA3dCmdReadbackGBImage; /* SVGA_3D_CMD_READBACK_GB_IMAGE */ - -/* - * Readback an entire guest-backed surface. - * (Request the device to flush the dirty contents into the guest.) - */ - -typedef -#include "vmware_pack_begin.h" -struct SVGA3dCmdReadbackGBSurface { - uint32 sid; -} -#include "vmware_pack_end.h" -SVGA3dCmdReadbackGBSurface; /* SVGA_3D_CMD_READBACK_GB_SURFACE */ - -/* - * Readback a sub rect of an image in a guest-backed surface. After - * issuing this command the driver is required to issue an update call - * of the same region before issuing any other commands that reference - * this surface or rendering is not guaranteed. - */ - -typedef -#include "vmware_pack_begin.h" -struct SVGA3dCmdReadbackGBImagePartial { - SVGA3dSurfaceImageId image; - SVGA3dBox box; - uint32 invertBox; -} -#include "vmware_pack_end.h" -SVGA3dCmdReadbackGBImagePartial; /* SVGA_3D_CMD_READBACK_GB_IMAGE_PARTIAL */ - - -/* - * Invalidate an image in a guest-backed surface. - * (Notify the device that the contents can be lost.) - */ - -typedef -#include "vmware_pack_begin.h" -struct SVGA3dCmdInvalidateGBImage { - SVGA3dSurfaceImageId image; -} -#include "vmware_pack_end.h" -SVGA3dCmdInvalidateGBImage; /* SVGA_3D_CMD_INVALIDATE_GB_IMAGE */ - -/* - * Invalidate an entire guest-backed surface. - * (Notify the device that the contents if all images can be lost.) - */ - -typedef -#include "vmware_pack_begin.h" -struct SVGA3dCmdInvalidateGBSurface { - uint32 sid; -} -#include "vmware_pack_end.h" -SVGA3dCmdInvalidateGBSurface; /* SVGA_3D_CMD_INVALIDATE_GB_SURFACE */ - -/* - * Invalidate a sub rect of an image in a guest-backed surface. After - * issuing this command the driver is required to issue an update call - * of the same region before issuing any other commands that reference - * this surface or rendering is not guaranteed. - */ - -typedef -#include "vmware_pack_begin.h" -struct SVGA3dCmdInvalidateGBImagePartial { - SVGA3dSurfaceImageId image; - SVGA3dBox box; - uint32 invertBox; -} -#include "vmware_pack_end.h" -SVGA3dCmdInvalidateGBImagePartial; /* SVGA_3D_CMD_INVALIDATE_GB_IMAGE_PARTIAL */ - - -/* - * Define a guest-backed context. - */ - -typedef -#include "vmware_pack_begin.h" -struct SVGA3dCmdDefineGBContext { - uint32 cid; -} -#include "vmware_pack_end.h" -SVGA3dCmdDefineGBContext; /* SVGA_3D_CMD_DEFINE_GB_CONTEXT */ - -/* - * Destroy a guest-backed context. - */ - -typedef -#include "vmware_pack_begin.h" -struct SVGA3dCmdDestroyGBContext { - uint32 cid; -} -#include "vmware_pack_end.h" -SVGA3dCmdDestroyGBContext; /* SVGA_3D_CMD_DESTROY_GB_CONTEXT */ - -/* - * Bind a guest-backed context. - * - * validContents should be set to 0 for new contexts, - * and 1 if this is an old context which is getting paged - * back on to the device. - * - * For new contexts, it is recommended that the driver - * issue commands to initialize all interesting state - * prior to rendering. - */ - -typedef -#include "vmware_pack_begin.h" -struct SVGA3dCmdBindGBContext { - uint32 cid; - SVGAMobId mobid; - uint32 validContents; -} -#include "vmware_pack_end.h" -SVGA3dCmdBindGBContext; /* SVGA_3D_CMD_BIND_GB_CONTEXT */ - -/* - * Readback a guest-backed context. - * (Request that the device flush the contents back into guest memory.) - */ - -typedef -#include "vmware_pack_begin.h" -struct SVGA3dCmdReadbackGBContext { - uint32 cid; -} -#include "vmware_pack_end.h" -SVGA3dCmdReadbackGBContext; /* SVGA_3D_CMD_READBACK_GB_CONTEXT */ - -/* - * Invalidate a guest-backed context. - */ -typedef -#include "vmware_pack_begin.h" -struct SVGA3dCmdInvalidateGBContext { - uint32 cid; -} -#include "vmware_pack_end.h" -SVGA3dCmdInvalidateGBContext; /* SVGA_3D_CMD_INVALIDATE_GB_CONTEXT */ - -/* - * Define a guest-backed shader. - */ - -typedef -#include "vmware_pack_begin.h" -struct SVGA3dCmdDefineGBShader { - uint32 shid; - SVGA3dShaderType type; - uint32 sizeInBytes; -} -#include "vmware_pack_end.h" -SVGA3dCmdDefineGBShader; /* SVGA_3D_CMD_DEFINE_GB_SHADER */ - -/* - * Bind a guest-backed shader. - */ - -typedef -#include "vmware_pack_begin.h" -struct SVGA3dCmdBindGBShader { - uint32 shid; - SVGAMobId mobid; - uint32 offsetInBytes; -} -#include "vmware_pack_end.h" -SVGA3dCmdBindGBShader; /* SVGA_3D_CMD_BIND_GB_SHADER */ - -/* - * Destroy a guest-backed shader. - */ - -typedef -#include "vmware_pack_begin.h" -struct SVGA3dCmdDestroyGBShader { - uint32 shid; -} -#include "vmware_pack_end.h" -SVGA3dCmdDestroyGBShader; /* SVGA_3D_CMD_DESTROY_GB_SHADER */ - -typedef -#include "vmware_pack_begin.h" -struct { - uint32 cid; - uint32 regStart; - SVGA3dShaderType shaderType; - SVGA3dShaderConstType constType; - - /* - * Followed by a variable number of shader constants. - * - * Note that FLOAT and INT constants are 4-dwords in length, while - * BOOL constants are 1-dword in length. - */ -} -#include "vmware_pack_end.h" -SVGA3dCmdSetGBShaderConstInline; /* SVGA_3D_CMD_SET_GB_SHADERCONSTS_INLINE */ - - -typedef -#include "vmware_pack_begin.h" -struct { - uint32 cid; - SVGA3dQueryType type; -} -#include "vmware_pack_end.h" -SVGA3dCmdBeginGBQuery; /* SVGA_3D_CMD_BEGIN_GB_QUERY */ - -typedef -#include "vmware_pack_begin.h" -struct { - uint32 cid; - SVGA3dQueryType type; - SVGAMobId mobid; - uint32 offset; -} -#include "vmware_pack_end.h" -SVGA3dCmdEndGBQuery; /* SVGA_3D_CMD_END_GB_QUERY */ - - -/* - * SVGA_3D_CMD_WAIT_FOR_GB_QUERY -- - * - * The semantics of this command are identical to the - * SVGA_3D_CMD_WAIT_FOR_QUERY except that the results are written - * to a Mob instead of a GMR. - */ - -typedef -#include "vmware_pack_begin.h" -struct { - uint32 cid; - SVGA3dQueryType type; - SVGAMobId mobid; - uint32 offset; -} -#include "vmware_pack_end.h" -SVGA3dCmdWaitForGBQuery; /* SVGA_3D_CMD_WAIT_FOR_GB_QUERY */ - - -typedef -#include "vmware_pack_begin.h" -struct { - SVGAMobId mobid; - uint32 mustBeZero; - uint32 initialized; -} -#include "vmware_pack_end.h" -SVGA3dCmdEnableGart; /* SVGA_3D_CMD_ENABLE_GART */ - -typedef -#include "vmware_pack_begin.h" -struct { - SVGAMobId mobid; - uint32 gartOffset; -} -#include "vmware_pack_end.h" -SVGA3dCmdMapMobIntoGart; /* SVGA_3D_CMD_MAP_MOB_INTO_GART */ - - -typedef -#include "vmware_pack_begin.h" -struct { - uint32 gartOffset; - uint32 numPages; -} -#include "vmware_pack_end.h" -SVGA3dCmdUnmapGartRange; /* SVGA_3D_CMD_UNMAP_GART_RANGE */ - - -/* - * Screen Targets - */ - -typedef -#include "vmware_pack_begin.h" -struct { - uint32 stid; - uint32 width; - uint32 height; - int32 xRoot; - int32 yRoot; - SVGAScreenTargetFlags flags; - - /* - * The physical DPI that the guest expects this screen displayed at. - * - * Guests which are not DPI-aware should set this to zero. - */ - uint32 dpi; -} -#include "vmware_pack_end.h" -SVGA3dCmdDefineGBScreenTarget; /* SVGA_3D_CMD_DEFINE_GB_SCREENTARGET */ - -typedef -#include "vmware_pack_begin.h" -struct { - uint32 stid; -} -#include "vmware_pack_end.h" -SVGA3dCmdDestroyGBScreenTarget; /* SVGA_3D_CMD_DESTROY_GB_SCREENTARGET */ - -typedef -#include "vmware_pack_begin.h" -struct { - uint32 stid; - SVGA3dSurfaceImageId image; -} -#include "vmware_pack_end.h" -SVGA3dCmdBindGBScreenTarget; /* SVGA_3D_CMD_BIND_GB_SCREENTARGET */ - -typedef -#include "vmware_pack_begin.h" -struct { - uint32 stid; - SVGA3dRect rect; -} -#include "vmware_pack_end.h" -SVGA3dCmdUpdateGBScreenTarget; /* SVGA_3D_CMD_UPDATE_GB_SCREENTARGET */ - -typedef -#include "vmware_pack_begin.h" -struct SVGA3dCmdGBScreenDMA { - uint32 screenId; - uint32 dead; - SVGAMobId destMobID; - uint32 destPitch; - SVGAMobId changeMapMobID; -} -#include "vmware_pack_end.h" -SVGA3dCmdGBScreenDMA; /* SVGA_3D_CMD_GB_SCREEN_DMA */ - -typedef -#include "vmware_pack_begin.h" -struct { - uint32 value; - uint32 mobId; - uint32 mobOffset; -} -#include "vmware_pack_end.h" -SVGA3dCmdGBMobFence; /* SVGA_3D_CMD_GB_MOB_FENCE */ - -typedef -#include "vmware_pack_begin.h" -struct { - uint32 stid; - SVGA3dSurfaceImageId dest; - - uint32 statusMobId; - uint32 statusMobOffset; - - /* Reserved fields */ - uint32 mustBeInvalidId; - uint32 mustBeZero; -} -#include "vmware_pack_end.h" -SVGA3dCmdScreenCopy; /* SVGA_3D_CMD_SCREEN_COPY */ +#define SVGA3D_COND_BIND_GB_SURFACE_FLAG_UPDATE (1 << 1) + +#pragma pack(push, 1) +typedef struct SVGA3dCmdCondBindGBSurface { + uint32 sid; + SVGAMobId testMobid; + SVGAMobId mobid; + uint32 flags; +} SVGA3dCmdCondBindGBSurface; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct SVGA3dCmdUpdateGBImage { + SVGA3dSurfaceImageId image; + SVGA3dBox box; +} SVGA3dCmdUpdateGBImage; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct SVGA3dCmdUpdateGBSurface { + uint32 sid; +} SVGA3dCmdUpdateGBSurface; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct SVGA3dCmdReadbackGBImage { + SVGA3dSurfaceImageId image; +} SVGA3dCmdReadbackGBImage; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct SVGA3dCmdReadbackGBSurface { + uint32 sid; +} SVGA3dCmdReadbackGBSurface; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct SVGA3dCmdReadbackGBImagePartial { + SVGA3dSurfaceImageId image; + SVGA3dBox box; + uint32 invertBox; +} SVGA3dCmdReadbackGBImagePartial; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct SVGA3dCmdInvalidateGBImage { + SVGA3dSurfaceImageId image; +} SVGA3dCmdInvalidateGBImage; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct SVGA3dCmdInvalidateGBSurface { + uint32 sid; +} SVGA3dCmdInvalidateGBSurface; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct SVGA3dCmdInvalidateGBImagePartial { + SVGA3dSurfaceImageId image; + SVGA3dBox box; + uint32 invertBox; +} SVGA3dCmdInvalidateGBImagePartial; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct SVGA3dCmdDefineGBContext { + uint32 cid; +} SVGA3dCmdDefineGBContext; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct SVGA3dCmdDestroyGBContext { + uint32 cid; +} SVGA3dCmdDestroyGBContext; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct SVGA3dCmdBindGBContext { + uint32 cid; + SVGAMobId mobid; + uint32 validContents; +} SVGA3dCmdBindGBContext; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct SVGA3dCmdReadbackGBContext { + uint32 cid; +} SVGA3dCmdReadbackGBContext; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct SVGA3dCmdInvalidateGBContext { + uint32 cid; +} SVGA3dCmdInvalidateGBContext; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct SVGA3dCmdDefineGBShader { + uint32 shid; + SVGA3dShaderType type; + uint32 sizeInBytes; +} SVGA3dCmdDefineGBShader; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct SVGA3dCmdBindGBShader { + uint32 shid; + SVGAMobId mobid; + uint32 offsetInBytes; +} SVGA3dCmdBindGBShader; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct SVGA3dCmdDestroyGBShader { + uint32 shid; +} SVGA3dCmdDestroyGBShader; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct { + uint32 cid; + uint32 regStart; + SVGA3dShaderType shaderType; + SVGA3dShaderConstType constType; + +} SVGA3dCmdSetGBShaderConstInline; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct { + uint32 cid; + SVGA3dQueryType type; +} SVGA3dCmdBeginGBQuery; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct { + uint32 cid; + SVGA3dQueryType type; + SVGAMobId mobid; + uint32 offset; +} SVGA3dCmdEndGBQuery; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct { + uint32 cid; + SVGA3dQueryType type; + SVGAMobId mobid; + uint32 offset; +} SVGA3dCmdWaitForGBQuery; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct { + SVGAMobId mobid; + uint32 mustBeZero; + uint32 initialized; +} SVGA3dCmdEnableGart; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct { + SVGAMobId mobid; + uint32 gartOffset; +} SVGA3dCmdMapMobIntoGart; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct { + uint32 gartOffset; + uint32 numPages; +} SVGA3dCmdUnmapGartRange; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct { + uint32 stid; + uint32 width; + uint32 height; + int32 xRoot; + int32 yRoot; + SVGAScreenTargetFlags flags; + + uint32 dpi; +} SVGA3dCmdDefineGBScreenTarget; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct { + uint32 stid; +} SVGA3dCmdDestroyGBScreenTarget; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct { + uint32 stid; + SVGA3dSurfaceImageId image; +} SVGA3dCmdBindGBScreenTarget; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct { + uint32 stid; + SVGA3dRect rect; +} SVGA3dCmdUpdateGBScreenTarget; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct { + uint32 stid; + SVGA3dRect rect; + SVGA3dFrameUpdateType type; +} SVGA3dCmdUpdateGBScreenTarget_v2; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct { + uint32 stid; + SVGA3dRect rect; + SVGA3dFrameUpdateType type; + SVGAUnsignedPoint srcPoint; +} SVGA3dCmdUpdateGBScreenTargetMove; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct SVGA3dCmdGBScreenDMA { + uint32 screenId; + uint32 dead; + SVGAMobId destMobID; + uint32 destPitch; + SVGAMobId changeMapMobID; +} SVGA3dCmdGBScreenDMA; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct { + uint32 value; + uint32 mobId; + uint32 mobOffset; +} SVGA3dCmdGBMobFence; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct { + uint32 stid; + SVGA3dSurfaceImageId dest; + + uint32 statusMobId; + uint32 statusMobOffset; + + uint32 mustBeInvalidId; + uint32 mustBeZero; +} SVGA3dCmdScreenCopy; +#pragma pack(pop) #define SVGA_SCREEN_COPY_STATUS_FAILURE 0x00 #define SVGA_SCREEN_COPY_STATUS_SUCCESS 0x01 #define SVGA_SCREEN_COPY_STATUS_INVALID 0xFFFFFFFF -typedef -#include "vmware_pack_begin.h" -struct { - uint32 sid; -} -#include "vmware_pack_end.h" -SVGA3dCmdWriteZeroSurface; /* SVGA_3D_CMD_WRITE_ZERO_SURFACE */ - -typedef -#include "vmware_pack_begin.h" -struct { - uint32 sid; -} -#include "vmware_pack_end.h" -SVGA3dCmdHintZeroSurface; /* SVGA_3D_CMD_HINT_ZERO_SURFACE */ - -#endif /* _SVGA3D_CMD_H_ */ +#pragma pack(push, 1) +typedef struct { + uint32 sid; +} SVGA3dCmdWriteZeroSurface; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct { + uint32 sid; +} SVGA3dCmdUpdateZeroSurface; +#pragma pack(pop) + +#endif diff --git a/drivers/gpu/drm/vmwgfx/device_include/svga3d_devcaps.h b/drivers/gpu/drm/vmwgfx/device_include/svga3d_devcaps.h index 617b468c626c..379ec15c7758 100644 --- a/drivers/gpu/drm/vmwgfx/device_include/svga3d_devcaps.h +++ b/drivers/gpu/drm/vmwgfx/device_include/svga3d_devcaps.h @@ -1,6 +1,6 @@ -/* SPDX-License-Identifier: GPL-2.0 OR MIT */ /********************************************************** - * Copyright 1998-2019 VMware, Inc. + * Copyright 1998-2021 VMware, Inc. + * SPDX-License-Identifier: GPL-2.0 OR MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation @@ -27,484 +27,345 @@ /* * svga3d_devcaps.h -- * - * SVGA 3d caps definitions + * SVGA 3d caps definitions */ -#ifndef _SVGA3D_DEVCAPS_H_ -#define _SVGA3D_DEVCAPS_H_ -#define INCLUDE_ALLOW_MODULE -#define INCLUDE_ALLOW_USERLEVEL -#define INCLUDE_ALLOW_VMCORE -#include "includeCheck.h" +#ifndef _SVGA3D_DEVCAPS_H_ +#define _SVGA3D_DEVCAPS_H_ #include "svga3d_types.h" -/* - * 3D Hardware Version - * - * The hardware version is stored in the SVGA_FIFO_3D_HWVERSION fifo - * register. Is set by the host and read by the guest. This lets - * us make new guest drivers which are backwards-compatible with old - * SVGA hardware revisions. It does not let us support old guest - * drivers. Good enough for now. - * - */ - -#define SVGA3D_MAKE_HWVERSION(major, minor) (((major) << 16) | ((minor) & 0xFF)) -#define SVGA3D_MAJOR_HWVERSION(version) ((version) >> 16) -#define SVGA3D_MINOR_HWVERSION(version) ((version) & 0xFF) +#define SVGA3D_MAKE_HWVERSION(major, minor) (((major) << 16) | ((minor)&0xFF)) +#define SVGA3D_MAJOR_HWVERSION(version) ((version) >> 16) +#define SVGA3D_MINOR_HWVERSION(version) ((version)&0xFF) typedef enum { - SVGA3D_HWVERSION_WS5_RC1 = SVGA3D_MAKE_HWVERSION(0, 1), - SVGA3D_HWVERSION_WS5_RC2 = SVGA3D_MAKE_HWVERSION(0, 2), - SVGA3D_HWVERSION_WS51_RC1 = SVGA3D_MAKE_HWVERSION(0, 3), - SVGA3D_HWVERSION_WS6_B1 = SVGA3D_MAKE_HWVERSION(1, 1), - SVGA3D_HWVERSION_FUSION_11 = SVGA3D_MAKE_HWVERSION(1, 4), - SVGA3D_HWVERSION_WS65_B1 = SVGA3D_MAKE_HWVERSION(2, 0), - SVGA3D_HWVERSION_WS8_B1 = SVGA3D_MAKE_HWVERSION(2, 1), - SVGA3D_HWVERSION_CURRENT = SVGA3D_HWVERSION_WS8_B1, + SVGA3D_HWVERSION_WS5_RC1 = SVGA3D_MAKE_HWVERSION(0, 1), + SVGA3D_HWVERSION_WS5_RC2 = SVGA3D_MAKE_HWVERSION(0, 2), + SVGA3D_HWVERSION_WS51_RC1 = SVGA3D_MAKE_HWVERSION(0, 3), + SVGA3D_HWVERSION_WS6_B1 = SVGA3D_MAKE_HWVERSION(1, 1), + SVGA3D_HWVERSION_FUSION_11 = SVGA3D_MAKE_HWVERSION(1, 4), + SVGA3D_HWVERSION_WS65_B1 = SVGA3D_MAKE_HWVERSION(2, 0), + SVGA3D_HWVERSION_WS8_B1 = SVGA3D_MAKE_HWVERSION(2, 1), + SVGA3D_HWVERSION_CURRENT = SVGA3D_HWVERSION_WS8_B1, } SVGA3dHardwareVersion; -/* - * DevCap indexes. - */ - typedef uint32 SVGA3dDevCapIndex; -#define SVGA3D_DEVCAP_INVALID ((uint32)-1) -#define SVGA3D_DEVCAP_3D 0 -#define SVGA3D_DEVCAP_MAX_LIGHTS 1 - -/* - * SVGA3D_DEVCAP_MAX_TEXTURES reflects the maximum number of - * fixed-function texture units available. Each of these units - * work in both FFP and Shader modes, and they support texture - * transforms and texture coordinates. The host may have additional - * texture image units that are only usable with shaders. - */ -#define SVGA3D_DEVCAP_MAX_TEXTURES 2 -#define SVGA3D_DEVCAP_MAX_CLIP_PLANES 3 -#define SVGA3D_DEVCAP_VERTEX_SHADER_VERSION 4 -#define SVGA3D_DEVCAP_VERTEX_SHADER 5 -#define SVGA3D_DEVCAP_FRAGMENT_SHADER_VERSION 6 -#define SVGA3D_DEVCAP_FRAGMENT_SHADER 7 -#define SVGA3D_DEVCAP_MAX_RENDER_TARGETS 8 -#define SVGA3D_DEVCAP_S23E8_TEXTURES 9 -#define SVGA3D_DEVCAP_S10E5_TEXTURES 10 -#define SVGA3D_DEVCAP_MAX_FIXED_VERTEXBLEND 11 -#define SVGA3D_DEVCAP_D16_BUFFER_FORMAT 12 -#define SVGA3D_DEVCAP_D24S8_BUFFER_FORMAT 13 -#define SVGA3D_DEVCAP_D24X8_BUFFER_FORMAT 14 -#define SVGA3D_DEVCAP_QUERY_TYPES 15 -#define SVGA3D_DEVCAP_TEXTURE_GRADIENT_SAMPLING 16 -#define SVGA3D_DEVCAP_MAX_POINT_SIZE 17 -#define SVGA3D_DEVCAP_MAX_SHADER_TEXTURES 18 -#define SVGA3D_DEVCAP_MAX_TEXTURE_WIDTH 19 -#define SVGA3D_DEVCAP_MAX_TEXTURE_HEIGHT 20 -#define SVGA3D_DEVCAP_MAX_VOLUME_EXTENT 21 -#define SVGA3D_DEVCAP_MAX_TEXTURE_REPEAT 22 -#define SVGA3D_DEVCAP_MAX_TEXTURE_ASPECT_RATIO 23 -#define SVGA3D_DEVCAP_MAX_TEXTURE_ANISOTROPY 24 -#define SVGA3D_DEVCAP_MAX_PRIMITIVE_COUNT 25 -#define SVGA3D_DEVCAP_MAX_VERTEX_INDEX 26 -#define SVGA3D_DEVCAP_MAX_VERTEX_SHADER_INSTRUCTIONS 27 -#define SVGA3D_DEVCAP_MAX_FRAGMENT_SHADER_INSTRUCTIONS 28 -#define SVGA3D_DEVCAP_MAX_VERTEX_SHADER_TEMPS 29 -#define SVGA3D_DEVCAP_MAX_FRAGMENT_SHADER_TEMPS 30 -#define SVGA3D_DEVCAP_TEXTURE_OPS 31 -#define SVGA3D_DEVCAP_SURFACEFMT_X8R8G8B8 32 -#define SVGA3D_DEVCAP_SURFACEFMT_A8R8G8B8 33 -#define SVGA3D_DEVCAP_SURFACEFMT_A2R10G10B10 34 -#define SVGA3D_DEVCAP_SURFACEFMT_X1R5G5B5 35 -#define SVGA3D_DEVCAP_SURFACEFMT_A1R5G5B5 36 -#define SVGA3D_DEVCAP_SURFACEFMT_A4R4G4B4 37 -#define SVGA3D_DEVCAP_SURFACEFMT_R5G6B5 38 -#define SVGA3D_DEVCAP_SURFACEFMT_LUMINANCE16 39 -#define SVGA3D_DEVCAP_SURFACEFMT_LUMINANCE8_ALPHA8 40 -#define SVGA3D_DEVCAP_SURFACEFMT_ALPHA8 41 -#define SVGA3D_DEVCAP_SURFACEFMT_LUMINANCE8 42 -#define SVGA3D_DEVCAP_SURFACEFMT_Z_D16 43 -#define SVGA3D_DEVCAP_SURFACEFMT_Z_D24S8 44 -#define SVGA3D_DEVCAP_SURFACEFMT_Z_D24X8 45 -#define SVGA3D_DEVCAP_SURFACEFMT_DXT1 46 -#define SVGA3D_DEVCAP_SURFACEFMT_DXT2 47 -#define SVGA3D_DEVCAP_SURFACEFMT_DXT3 48 -#define SVGA3D_DEVCAP_SURFACEFMT_DXT4 49 -#define SVGA3D_DEVCAP_SURFACEFMT_DXT5 50 -#define SVGA3D_DEVCAP_SURFACEFMT_BUMPX8L8V8U8 51 -#define SVGA3D_DEVCAP_SURFACEFMT_A2W10V10U10 52 -#define SVGA3D_DEVCAP_SURFACEFMT_BUMPU8V8 53 -#define SVGA3D_DEVCAP_SURFACEFMT_Q8W8V8U8 54 -#define SVGA3D_DEVCAP_SURFACEFMT_CxV8U8 55 -#define SVGA3D_DEVCAP_SURFACEFMT_R_S10E5 56 -#define SVGA3D_DEVCAP_SURFACEFMT_R_S23E8 57 -#define SVGA3D_DEVCAP_SURFACEFMT_RG_S10E5 58 -#define SVGA3D_DEVCAP_SURFACEFMT_RG_S23E8 59 -#define SVGA3D_DEVCAP_SURFACEFMT_ARGB_S10E5 60 -#define SVGA3D_DEVCAP_SURFACEFMT_ARGB_S23E8 61 - -/* - * There is a hole in our devcap definitions for - * historical reasons. - * - * Define a constant just for completeness. - */ -#define SVGA3D_DEVCAP_MISSING62 62 - -#define SVGA3D_DEVCAP_MAX_VERTEX_SHADER_TEXTURES 63 - -/* - * Note that MAX_SIMULTANEOUS_RENDER_TARGETS is a maximum count of color - * render targets. This does not include the depth or stencil targets. - */ -#define SVGA3D_DEVCAP_MAX_SIMULTANEOUS_RENDER_TARGETS 64 - -#define SVGA3D_DEVCAP_SURFACEFMT_V16U16 65 -#define SVGA3D_DEVCAP_SURFACEFMT_G16R16 66 -#define SVGA3D_DEVCAP_SURFACEFMT_A16B16G16R16 67 -#define SVGA3D_DEVCAP_SURFACEFMT_UYVY 68 -#define SVGA3D_DEVCAP_SURFACEFMT_YUY2 69 - -/* - * Deprecated. - */ -#define SVGA3D_DEVCAP_DEAD4 70 -#define SVGA3D_DEVCAP_DEAD5 71 -#define SVGA3D_DEVCAP_DEAD7 72 -#define SVGA3D_DEVCAP_DEAD6 73 - -#define SVGA3D_DEVCAP_AUTOGENMIPMAPS 74 -#define SVGA3D_DEVCAP_SURFACEFMT_NV12 75 -#define SVGA3D_DEVCAP_DEAD10 76 - -/* - * This is the maximum number of SVGA context IDs that the guest - * can define using SVGA_3D_CMD_CONTEXT_DEFINE. - */ -#define SVGA3D_DEVCAP_MAX_CONTEXT_IDS 77 - -/* - * This is the maximum number of SVGA surface IDs that the guest - * can define using SVGA_3D_CMD_SURFACE_DEFINE*. - */ -#define SVGA3D_DEVCAP_MAX_SURFACE_IDS 78 - -#define SVGA3D_DEVCAP_SURFACEFMT_Z_DF16 79 -#define SVGA3D_DEVCAP_SURFACEFMT_Z_DF24 80 -#define SVGA3D_DEVCAP_SURFACEFMT_Z_D24S8_INT 81 - -#define SVGA3D_DEVCAP_SURFACEFMT_ATI1 82 -#define SVGA3D_DEVCAP_SURFACEFMT_ATI2 83 - -/* - * Deprecated. - */ -#define SVGA3D_DEVCAP_DEAD1 84 -#define SVGA3D_DEVCAP_DEAD8 85 -#define SVGA3D_DEVCAP_DEAD9 86 - -#define SVGA3D_DEVCAP_LINE_AA 87 /* boolean */ -#define SVGA3D_DEVCAP_LINE_STIPPLE 88 /* boolean */ -#define SVGA3D_DEVCAP_MAX_LINE_WIDTH 89 /* float */ -#define SVGA3D_DEVCAP_MAX_AA_LINE_WIDTH 90 /* float */ - -#define SVGA3D_DEVCAP_SURFACEFMT_YV12 91 - -/* - * Deprecated. - */ -#define SVGA3D_DEVCAP_DEAD3 92 - -/* - * Are TS_CONSTANT, TS_COLOR_KEY, and TS_COLOR_KEY_ENABLE supported? - */ -#define SVGA3D_DEVCAP_TS_COLOR_KEY 93 /* boolean */ - -/* - * Deprecated. - */ -#define SVGA3D_DEVCAP_DEAD2 94 - -/* - * Does the device support DXContexts? - */ -#define SVGA3D_DEVCAP_DXCONTEXT 95 - -/* - * Deprecated. - */ -#define SVGA3D_DEVCAP_DEAD11 96 - -/* - * What is the maximum number of vertex buffers or vertex input registers - * that can be expected to work correctly with a DXContext? - * - * The guest is allowed to set up to SVGA3D_DX_MAX_VERTEXBUFFERS, but - * anything in excess of this cap is not guaranteed to render correctly. - * - * Similarly, the guest can set up to SVGA3D_DX_MAX_VERTEXINPUTREGISTERS - * input registers without the SVGA3D_DEVCAP_SM4_1 cap, or - * SVGA3D_DX_SM41_MAX_VERTEXINPUTREGISTERS with the SVGA3D_DEVCAP_SM4_1, - * but only the registers up to this cap value are guaranteed to render - * correctly. - * - * If guest-drivers are able to expose a lower-limit, it's recommended - * that they clamp to this value. Otherwise, the host will make a - * best-effort on case-by-case basis if guests exceed this. - */ -#define SVGA3D_DEVCAP_DX_MAX_VERTEXBUFFERS 97 - -/* - * What is the maximum number of constant buffers that can be expected to - * work correctly with a DX context? - * - * The guest is allowed to set up to SVGA3D_DX_MAX_CONSTBUFFERS, but - * anything in excess of this cap is not guaranteed to render correctly. - * - * If guest-drivers are able to expose a lower-limit, it's recommended - * that they clamp to this value. Otherwise, the host will make a - * best-effort on case-by-case basis if guests exceed this. - */ -#define SVGA3D_DEVCAP_DX_MAX_CONSTANT_BUFFERS 98 - -/* - * Does the device support provoking vertex control? - * - * If this cap is present, the provokingVertexLast field in the - * rasterizer state is enabled. (Guests can then set it to FALSE, - * meaning that the first vertex is the provoking vertex, or TRUE, - * meaning that the last verteix is the provoking vertex.) - * - * If this cap is FALSE, then guests should set the provokingVertexLast - * to FALSE, otherwise rendering behavior is undefined. - */ -#define SVGA3D_DEVCAP_DX_PROVOKING_VERTEX 99 - -#define SVGA3D_DEVCAP_DXFMT_X8R8G8B8 100 -#define SVGA3D_DEVCAP_DXFMT_A8R8G8B8 101 -#define SVGA3D_DEVCAP_DXFMT_R5G6B5 102 -#define SVGA3D_DEVCAP_DXFMT_X1R5G5B5 103 -#define SVGA3D_DEVCAP_DXFMT_A1R5G5B5 104 -#define SVGA3D_DEVCAP_DXFMT_A4R4G4B4 105 -#define SVGA3D_DEVCAP_DXFMT_Z_D32 106 -#define SVGA3D_DEVCAP_DXFMT_Z_D16 107 -#define SVGA3D_DEVCAP_DXFMT_Z_D24S8 108 -#define SVGA3D_DEVCAP_DXFMT_Z_D15S1 109 -#define SVGA3D_DEVCAP_DXFMT_LUMINANCE8 110 -#define SVGA3D_DEVCAP_DXFMT_LUMINANCE4_ALPHA4 111 -#define SVGA3D_DEVCAP_DXFMT_LUMINANCE16 112 -#define SVGA3D_DEVCAP_DXFMT_LUMINANCE8_ALPHA8 113 -#define SVGA3D_DEVCAP_DXFMT_DXT1 114 -#define SVGA3D_DEVCAP_DXFMT_DXT2 115 -#define SVGA3D_DEVCAP_DXFMT_DXT3 116 -#define SVGA3D_DEVCAP_DXFMT_DXT4 117 -#define SVGA3D_DEVCAP_DXFMT_DXT5 118 -#define SVGA3D_DEVCAP_DXFMT_BUMPU8V8 119 -#define SVGA3D_DEVCAP_DXFMT_BUMPL6V5U5 120 -#define SVGA3D_DEVCAP_DXFMT_BUMPX8L8V8U8 121 -#define SVGA3D_DEVCAP_DXFMT_FORMAT_DEAD1 122 -#define SVGA3D_DEVCAP_DXFMT_ARGB_S10E5 123 -#define SVGA3D_DEVCAP_DXFMT_ARGB_S23E8 124 -#define SVGA3D_DEVCAP_DXFMT_A2R10G10B10 125 -#define SVGA3D_DEVCAP_DXFMT_V8U8 126 -#define SVGA3D_DEVCAP_DXFMT_Q8W8V8U8 127 -#define SVGA3D_DEVCAP_DXFMT_CxV8U8 128 -#define SVGA3D_DEVCAP_DXFMT_X8L8V8U8 129 -#define SVGA3D_DEVCAP_DXFMT_A2W10V10U10 130 -#define SVGA3D_DEVCAP_DXFMT_ALPHA8 131 -#define SVGA3D_DEVCAP_DXFMT_R_S10E5 132 -#define SVGA3D_DEVCAP_DXFMT_R_S23E8 133 -#define SVGA3D_DEVCAP_DXFMT_RG_S10E5 134 -#define SVGA3D_DEVCAP_DXFMT_RG_S23E8 135 -#define SVGA3D_DEVCAP_DXFMT_BUFFER 136 -#define SVGA3D_DEVCAP_DXFMT_Z_D24X8 137 -#define SVGA3D_DEVCAP_DXFMT_V16U16 138 -#define SVGA3D_DEVCAP_DXFMT_G16R16 139 -#define SVGA3D_DEVCAP_DXFMT_A16B16G16R16 140 -#define SVGA3D_DEVCAP_DXFMT_UYVY 141 -#define SVGA3D_DEVCAP_DXFMT_YUY2 142 -#define SVGA3D_DEVCAP_DXFMT_NV12 143 -#define SVGA3D_DEVCAP_DXFMT_FORMAT_DEAD2 144 -#define SVGA3D_DEVCAP_DXFMT_R32G32B32A32_TYPELESS 145 -#define SVGA3D_DEVCAP_DXFMT_R32G32B32A32_UINT 146 -#define SVGA3D_DEVCAP_DXFMT_R32G32B32A32_SINT 147 -#define SVGA3D_DEVCAP_DXFMT_R32G32B32_TYPELESS 148 -#define SVGA3D_DEVCAP_DXFMT_R32G32B32_FLOAT 149 -#define SVGA3D_DEVCAP_DXFMT_R32G32B32_UINT 150 -#define SVGA3D_DEVCAP_DXFMT_R32G32B32_SINT 151 -#define SVGA3D_DEVCAP_DXFMT_R16G16B16A16_TYPELESS 152 -#define SVGA3D_DEVCAP_DXFMT_R16G16B16A16_UINT 153 -#define SVGA3D_DEVCAP_DXFMT_R16G16B16A16_SNORM 154 -#define SVGA3D_DEVCAP_DXFMT_R16G16B16A16_SINT 155 -#define SVGA3D_DEVCAP_DXFMT_R32G32_TYPELESS 156 -#define SVGA3D_DEVCAP_DXFMT_R32G32_UINT 157 -#define SVGA3D_DEVCAP_DXFMT_R32G32_SINT 158 -#define SVGA3D_DEVCAP_DXFMT_R32G8X24_TYPELESS 159 -#define SVGA3D_DEVCAP_DXFMT_D32_FLOAT_S8X24_UINT 160 -#define SVGA3D_DEVCAP_DXFMT_R32_FLOAT_X8X24 161 -#define SVGA3D_DEVCAP_DXFMT_X32_G8X24_UINT 162 -#define SVGA3D_DEVCAP_DXFMT_R10G10B10A2_TYPELESS 163 -#define SVGA3D_DEVCAP_DXFMT_R10G10B10A2_UINT 164 -#define SVGA3D_DEVCAP_DXFMT_R11G11B10_FLOAT 165 -#define SVGA3D_DEVCAP_DXFMT_R8G8B8A8_TYPELESS 166 -#define SVGA3D_DEVCAP_DXFMT_R8G8B8A8_UNORM 167 -#define SVGA3D_DEVCAP_DXFMT_R8G8B8A8_UNORM_SRGB 168 -#define SVGA3D_DEVCAP_DXFMT_R8G8B8A8_UINT 169 -#define SVGA3D_DEVCAP_DXFMT_R8G8B8A8_SINT 170 -#define SVGA3D_DEVCAP_DXFMT_R16G16_TYPELESS 171 -#define SVGA3D_DEVCAP_DXFMT_R16G16_UINT 172 -#define SVGA3D_DEVCAP_DXFMT_R16G16_SINT 173 -#define SVGA3D_DEVCAP_DXFMT_R32_TYPELESS 174 -#define SVGA3D_DEVCAP_DXFMT_D32_FLOAT 175 -#define SVGA3D_DEVCAP_DXFMT_R32_UINT 176 -#define SVGA3D_DEVCAP_DXFMT_R32_SINT 177 -#define SVGA3D_DEVCAP_DXFMT_R24G8_TYPELESS 178 -#define SVGA3D_DEVCAP_DXFMT_D24_UNORM_S8_UINT 179 -#define SVGA3D_DEVCAP_DXFMT_R24_UNORM_X8 180 -#define SVGA3D_DEVCAP_DXFMT_X24_G8_UINT 181 -#define SVGA3D_DEVCAP_DXFMT_R8G8_TYPELESS 182 -#define SVGA3D_DEVCAP_DXFMT_R8G8_UNORM 183 -#define SVGA3D_DEVCAP_DXFMT_R8G8_UINT 184 -#define SVGA3D_DEVCAP_DXFMT_R8G8_SINT 185 -#define SVGA3D_DEVCAP_DXFMT_R16_TYPELESS 186 -#define SVGA3D_DEVCAP_DXFMT_R16_UNORM 187 -#define SVGA3D_DEVCAP_DXFMT_R16_UINT 188 -#define SVGA3D_DEVCAP_DXFMT_R16_SNORM 189 -#define SVGA3D_DEVCAP_DXFMT_R16_SINT 190 -#define SVGA3D_DEVCAP_DXFMT_R8_TYPELESS 191 -#define SVGA3D_DEVCAP_DXFMT_R8_UNORM 192 -#define SVGA3D_DEVCAP_DXFMT_R8_UINT 193 -#define SVGA3D_DEVCAP_DXFMT_R8_SNORM 194 -#define SVGA3D_DEVCAP_DXFMT_R8_SINT 195 -#define SVGA3D_DEVCAP_DXFMT_P8 196 -#define SVGA3D_DEVCAP_DXFMT_R9G9B9E5_SHAREDEXP 197 -#define SVGA3D_DEVCAP_DXFMT_R8G8_B8G8_UNORM 198 -#define SVGA3D_DEVCAP_DXFMT_G8R8_G8B8_UNORM 199 -#define SVGA3D_DEVCAP_DXFMT_BC1_TYPELESS 200 -#define SVGA3D_DEVCAP_DXFMT_BC1_UNORM_SRGB 201 -#define SVGA3D_DEVCAP_DXFMT_BC2_TYPELESS 202 -#define SVGA3D_DEVCAP_DXFMT_BC2_UNORM_SRGB 203 -#define SVGA3D_DEVCAP_DXFMT_BC3_TYPELESS 204 -#define SVGA3D_DEVCAP_DXFMT_BC3_UNORM_SRGB 205 -#define SVGA3D_DEVCAP_DXFMT_BC4_TYPELESS 206 -#define SVGA3D_DEVCAP_DXFMT_ATI1 207 -#define SVGA3D_DEVCAP_DXFMT_BC4_SNORM 208 -#define SVGA3D_DEVCAP_DXFMT_BC5_TYPELESS 209 -#define SVGA3D_DEVCAP_DXFMT_ATI2 210 -#define SVGA3D_DEVCAP_DXFMT_BC5_SNORM 211 -#define SVGA3D_DEVCAP_DXFMT_R10G10B10_XR_BIAS_A2_UNORM 212 -#define SVGA3D_DEVCAP_DXFMT_B8G8R8A8_TYPELESS 213 -#define SVGA3D_DEVCAP_DXFMT_B8G8R8A8_UNORM_SRGB 214 -#define SVGA3D_DEVCAP_DXFMT_B8G8R8X8_TYPELESS 215 -#define SVGA3D_DEVCAP_DXFMT_B8G8R8X8_UNORM_SRGB 216 -#define SVGA3D_DEVCAP_DXFMT_Z_DF16 217 -#define SVGA3D_DEVCAP_DXFMT_Z_DF24 218 -#define SVGA3D_DEVCAP_DXFMT_Z_D24S8_INT 219 -#define SVGA3D_DEVCAP_DXFMT_YV12 220 -#define SVGA3D_DEVCAP_DXFMT_R32G32B32A32_FLOAT 221 -#define SVGA3D_DEVCAP_DXFMT_R16G16B16A16_FLOAT 222 -#define SVGA3D_DEVCAP_DXFMT_R16G16B16A16_UNORM 223 -#define SVGA3D_DEVCAP_DXFMT_R32G32_FLOAT 224 -#define SVGA3D_DEVCAP_DXFMT_R10G10B10A2_UNORM 225 -#define SVGA3D_DEVCAP_DXFMT_R8G8B8A8_SNORM 226 -#define SVGA3D_DEVCAP_DXFMT_R16G16_FLOAT 227 -#define SVGA3D_DEVCAP_DXFMT_R16G16_UNORM 228 -#define SVGA3D_DEVCAP_DXFMT_R16G16_SNORM 229 -#define SVGA3D_DEVCAP_DXFMT_R32_FLOAT 230 -#define SVGA3D_DEVCAP_DXFMT_R8G8_SNORM 231 -#define SVGA3D_DEVCAP_DXFMT_R16_FLOAT 232 -#define SVGA3D_DEVCAP_DXFMT_D16_UNORM 233 -#define SVGA3D_DEVCAP_DXFMT_A8_UNORM 234 -#define SVGA3D_DEVCAP_DXFMT_BC1_UNORM 235 -#define SVGA3D_DEVCAP_DXFMT_BC2_UNORM 236 -#define SVGA3D_DEVCAP_DXFMT_BC3_UNORM 237 -#define SVGA3D_DEVCAP_DXFMT_B5G6R5_UNORM 238 -#define SVGA3D_DEVCAP_DXFMT_B5G5R5A1_UNORM 239 -#define SVGA3D_DEVCAP_DXFMT_B8G8R8A8_UNORM 240 -#define SVGA3D_DEVCAP_DXFMT_B8G8R8X8_UNORM 241 -#define SVGA3D_DEVCAP_DXFMT_BC4_UNORM 242 -#define SVGA3D_DEVCAP_DXFMT_BC5_UNORM 243 - -/* - * Advertises shaderModel 4.1 support, independent blend-states, - * cube-map arrays, and a higher vertex input registers limit. - * - * (See documentation on SVGA3D_DEVCAP_DX_MAX_VERTEXBUFFERS.) - */ -#define SVGA3D_DEVCAP_SM41 244 -#define SVGA3D_DEVCAP_MULTISAMPLE_2X 245 -#define SVGA3D_DEVCAP_MULTISAMPLE_4X 246 - -/* - * Indicates that the device has rendering support for - * the full multisample quality. If this cap is not present, - * the host may or may not support full quality rendering. - * - * See also SVGA_REG_MS_HINT_RESOLVED. - */ -#define SVGA3D_DEVCAP_MS_FULL_QUALITY 247 - -/* - * Advertises support for the SVGA3D LogicOps commands. - */ -#define SVGA3D_DEVCAP_LOGICOPS 248 - -/* - * Advertises support for using logicOps in the DXBlendStates. - */ -#define SVGA3D_DEVCAP_LOGIC_BLENDOPS 249 - -/* -* Note DXFMT range is now non-contiguous. -*/ -#define SVGA3D_DEVCAP_RESERVED_1 250 -#define SVGA3D_DEVCAP_DXFMT_BC6H_TYPELESS 251 -#define SVGA3D_DEVCAP_DXFMT_BC6H_UF16 252 -#define SVGA3D_DEVCAP_DXFMT_BC6H_SF16 253 -#define SVGA3D_DEVCAP_DXFMT_BC7_TYPELESS 254 -#define SVGA3D_DEVCAP_DXFMT_BC7_UNORM 255 -#define SVGA3D_DEVCAP_DXFMT_BC7_UNORM_SRGB 256 -#define SVGA3D_DEVCAP_RESERVED_2 257 - -#define SVGA3D_DEVCAP_SM5 258 -#define SVGA3D_DEVCAP_MULTISAMPLE_8X 259 - -/* This must be the last index. */ -#define SVGA3D_DEVCAP_MAX 260 - -/* - * Bit definitions for DXFMT devcaps - * - * - * SUPPORTED: Can the format be defined? - * SHADER_SAMPLE: Can the format be sampled from a shader? - * COLOR_RENDERTARGET: Can the format be a color render target? - * DEPTH_RENDERTARGET: Can the format be a depth render target? - * BLENDABLE: Is the format blendable? - * MIPS: Does the format support mip levels? - * ARRAY: Does the format support texture arrays? - * VOLUME: Does the format support having volume? - * MULTISAMPLE: Does the format support multisample? - */ -#define SVGA3D_DXFMT_SUPPORTED (1 << 0) -#define SVGA3D_DXFMT_SHADER_SAMPLE (1 << 1) -#define SVGA3D_DXFMT_COLOR_RENDERTARGET (1 << 2) -#define SVGA3D_DXFMT_DEPTH_RENDERTARGET (1 << 3) -#define SVGA3D_DXFMT_BLENDABLE (1 << 4) -#define SVGA3D_DXFMT_MIPS (1 << 5) -#define SVGA3D_DXFMT_ARRAY (1 << 6) -#define SVGA3D_DXFMT_VOLUME (1 << 7) -#define SVGA3D_DXFMT_DX_VERTEX_BUFFER (1 << 8) -#define SVGA3D_DXFMT_MULTISAMPLE (1 << 9) -#define SVGA3D_DXFMT_MAX (1 << 10) +#define SVGA3D_DEVCAP_INVALID ((uint32)-1) +#define SVGA3D_DEVCAP_3D 0 +#define SVGA3D_DEVCAP_MAX_LIGHTS 1 + +#define SVGA3D_DEVCAP_MAX_TEXTURES 2 +#define SVGA3D_DEVCAP_MAX_CLIP_PLANES 3 +#define SVGA3D_DEVCAP_VERTEX_SHADER_VERSION 4 +#define SVGA3D_DEVCAP_VERTEX_SHADER 5 +#define SVGA3D_DEVCAP_FRAGMENT_SHADER_VERSION 6 +#define SVGA3D_DEVCAP_FRAGMENT_SHADER 7 +#define SVGA3D_DEVCAP_MAX_RENDER_TARGETS 8 +#define SVGA3D_DEVCAP_S23E8_TEXTURES 9 +#define SVGA3D_DEVCAP_S10E5_TEXTURES 10 +#define SVGA3D_DEVCAP_MAX_FIXED_VERTEXBLEND 11 +#define SVGA3D_DEVCAP_D16_BUFFER_FORMAT 12 +#define SVGA3D_DEVCAP_D24S8_BUFFER_FORMAT 13 +#define SVGA3D_DEVCAP_D24X8_BUFFER_FORMAT 14 +#define SVGA3D_DEVCAP_QUERY_TYPES 15 +#define SVGA3D_DEVCAP_TEXTURE_GRADIENT_SAMPLING 16 +#define SVGA3D_DEVCAP_MAX_POINT_SIZE 17 +#define SVGA3D_DEVCAP_MAX_SHADER_TEXTURES 18 +#define SVGA3D_DEVCAP_MAX_TEXTURE_WIDTH 19 +#define SVGA3D_DEVCAP_MAX_TEXTURE_HEIGHT 20 +#define SVGA3D_DEVCAP_MAX_VOLUME_EXTENT 21 +#define SVGA3D_DEVCAP_MAX_TEXTURE_REPEAT 22 +#define SVGA3D_DEVCAP_MAX_TEXTURE_ASPECT_RATIO 23 +#define SVGA3D_DEVCAP_MAX_TEXTURE_ANISOTROPY 24 +#define SVGA3D_DEVCAP_MAX_PRIMITIVE_COUNT 25 +#define SVGA3D_DEVCAP_MAX_VERTEX_INDEX 26 +#define SVGA3D_DEVCAP_MAX_VERTEX_SHADER_INSTRUCTIONS 27 +#define SVGA3D_DEVCAP_MAX_FRAGMENT_SHADER_INSTRUCTIONS 28 +#define SVGA3D_DEVCAP_MAX_VERTEX_SHADER_TEMPS 29 +#define SVGA3D_DEVCAP_MAX_FRAGMENT_SHADER_TEMPS 30 +#define SVGA3D_DEVCAP_TEXTURE_OPS 31 +#define SVGA3D_DEVCAP_SURFACEFMT_X8R8G8B8 32 +#define SVGA3D_DEVCAP_SURFACEFMT_A8R8G8B8 33 +#define SVGA3D_DEVCAP_SURFACEFMT_A2R10G10B10 34 +#define SVGA3D_DEVCAP_SURFACEFMT_X1R5G5B5 35 +#define SVGA3D_DEVCAP_SURFACEFMT_A1R5G5B5 36 +#define SVGA3D_DEVCAP_SURFACEFMT_A4R4G4B4 37 +#define SVGA3D_DEVCAP_SURFACEFMT_R5G6B5 38 +#define SVGA3D_DEVCAP_SURFACEFMT_LUMINANCE16 39 +#define SVGA3D_DEVCAP_SURFACEFMT_LUMINANCE8_ALPHA8 40 +#define SVGA3D_DEVCAP_SURFACEFMT_ALPHA8 41 +#define SVGA3D_DEVCAP_SURFACEFMT_LUMINANCE8 42 +#define SVGA3D_DEVCAP_SURFACEFMT_Z_D16 43 +#define SVGA3D_DEVCAP_SURFACEFMT_Z_D24S8 44 +#define SVGA3D_DEVCAP_SURFACEFMT_Z_D24X8 45 +#define SVGA3D_DEVCAP_SURFACEFMT_DXT1 46 +#define SVGA3D_DEVCAP_SURFACEFMT_DXT2 47 +#define SVGA3D_DEVCAP_SURFACEFMT_DXT3 48 +#define SVGA3D_DEVCAP_SURFACEFMT_DXT4 49 +#define SVGA3D_DEVCAP_SURFACEFMT_DXT5 50 +#define SVGA3D_DEVCAP_SURFACEFMT_BUMPX8L8V8U8 51 +#define SVGA3D_DEVCAP_SURFACEFMT_A2W10V10U10 52 +#define SVGA3D_DEVCAP_SURFACEFMT_BUMPU8V8 53 +#define SVGA3D_DEVCAP_SURFACEFMT_Q8W8V8U8 54 +#define SVGA3D_DEVCAP_SURFACEFMT_CxV8U8 55 +#define SVGA3D_DEVCAP_SURFACEFMT_R_S10E5 56 +#define SVGA3D_DEVCAP_SURFACEFMT_R_S23E8 57 +#define SVGA3D_DEVCAP_SURFACEFMT_RG_S10E5 58 +#define SVGA3D_DEVCAP_SURFACEFMT_RG_S23E8 59 +#define SVGA3D_DEVCAP_SURFACEFMT_ARGB_S10E5 60 +#define SVGA3D_DEVCAP_SURFACEFMT_ARGB_S23E8 61 + +#define SVGA3D_DEVCAP_MISSING62 62 + +#define SVGA3D_DEVCAP_MAX_VERTEX_SHADER_TEXTURES 63 + +#define SVGA3D_DEVCAP_MAX_SIMULTANEOUS_RENDER_TARGETS 64 + +#define SVGA3D_DEVCAP_SURFACEFMT_V16U16 65 +#define SVGA3D_DEVCAP_SURFACEFMT_G16R16 66 +#define SVGA3D_DEVCAP_SURFACEFMT_A16B16G16R16 67 +#define SVGA3D_DEVCAP_SURFACEFMT_UYVY 68 +#define SVGA3D_DEVCAP_SURFACEFMT_YUY2 69 + +#define SVGA3D_DEVCAP_DEAD4 70 +#define SVGA3D_DEVCAP_DEAD5 71 +#define SVGA3D_DEVCAP_DEAD7 72 +#define SVGA3D_DEVCAP_DEAD6 73 + +#define SVGA3D_DEVCAP_AUTOGENMIPMAPS 74 +#define SVGA3D_DEVCAP_SURFACEFMT_NV12 75 +#define SVGA3D_DEVCAP_DEAD10 76 + +#define SVGA3D_DEVCAP_MAX_CONTEXT_IDS 77 + +#define SVGA3D_DEVCAP_MAX_SURFACE_IDS 78 + +#define SVGA3D_DEVCAP_SURFACEFMT_Z_DF16 79 +#define SVGA3D_DEVCAP_SURFACEFMT_Z_DF24 80 +#define SVGA3D_DEVCAP_SURFACEFMT_Z_D24S8_INT 81 + +#define SVGA3D_DEVCAP_SURFACEFMT_ATI1 82 +#define SVGA3D_DEVCAP_SURFACEFMT_ATI2 83 + +#define SVGA3D_DEVCAP_DEAD1 84 +#define SVGA3D_DEVCAP_DEAD8 85 +#define SVGA3D_DEVCAP_DEAD9 86 + +#define SVGA3D_DEVCAP_LINE_AA 87 +#define SVGA3D_DEVCAP_LINE_STIPPLE 88 +#define SVGA3D_DEVCAP_MAX_LINE_WIDTH 89 +#define SVGA3D_DEVCAP_MAX_AA_LINE_WIDTH 90 + +#define SVGA3D_DEVCAP_SURFACEFMT_YV12 91 + +#define SVGA3D_DEVCAP_DEAD3 92 + +#define SVGA3D_DEVCAP_TS_COLOR_KEY 93 + +#define SVGA3D_DEVCAP_DEAD2 94 + +#define SVGA3D_DEVCAP_DXCONTEXT 95 + +#define SVGA3D_DEVCAP_DEAD11 96 + +#define SVGA3D_DEVCAP_DX_MAX_VERTEXBUFFERS 97 + +#define SVGA3D_DEVCAP_DX_MAX_CONSTANT_BUFFERS 98 + +#define SVGA3D_DEVCAP_DX_PROVOKING_VERTEX 99 + +#define SVGA3D_DEVCAP_DXFMT_X8R8G8B8 100 +#define SVGA3D_DEVCAP_DXFMT_A8R8G8B8 101 +#define SVGA3D_DEVCAP_DXFMT_R5G6B5 102 +#define SVGA3D_DEVCAP_DXFMT_X1R5G5B5 103 +#define SVGA3D_DEVCAP_DXFMT_A1R5G5B5 104 +#define SVGA3D_DEVCAP_DXFMT_A4R4G4B4 105 +#define SVGA3D_DEVCAP_DXFMT_Z_D32 106 +#define SVGA3D_DEVCAP_DXFMT_Z_D16 107 +#define SVGA3D_DEVCAP_DXFMT_Z_D24S8 108 +#define SVGA3D_DEVCAP_DXFMT_Z_D15S1 109 +#define SVGA3D_DEVCAP_DXFMT_LUMINANCE8 110 +#define SVGA3D_DEVCAP_DXFMT_LUMINANCE4_ALPHA4 111 +#define SVGA3D_DEVCAP_DXFMT_LUMINANCE16 112 +#define SVGA3D_DEVCAP_DXFMT_LUMINANCE8_ALPHA8 113 +#define SVGA3D_DEVCAP_DXFMT_DXT1 114 +#define SVGA3D_DEVCAP_DXFMT_DXT2 115 +#define SVGA3D_DEVCAP_DXFMT_DXT3 116 +#define SVGA3D_DEVCAP_DXFMT_DXT4 117 +#define SVGA3D_DEVCAP_DXFMT_DXT5 118 +#define SVGA3D_DEVCAP_DXFMT_BUMPU8V8 119 +#define SVGA3D_DEVCAP_DXFMT_BUMPL6V5U5 120 +#define SVGA3D_DEVCAP_DXFMT_BUMPX8L8V8U8 121 +#define SVGA3D_DEVCAP_DXFMT_FORMAT_DEAD1 122 +#define SVGA3D_DEVCAP_DXFMT_ARGB_S10E5 123 +#define SVGA3D_DEVCAP_DXFMT_ARGB_S23E8 124 +#define SVGA3D_DEVCAP_DXFMT_A2R10G10B10 125 +#define SVGA3D_DEVCAP_DXFMT_V8U8 126 +#define SVGA3D_DEVCAP_DXFMT_Q8W8V8U8 127 +#define SVGA3D_DEVCAP_DXFMT_CxV8U8 128 +#define SVGA3D_DEVCAP_DXFMT_X8L8V8U8 129 +#define SVGA3D_DEVCAP_DXFMT_A2W10V10U10 130 +#define SVGA3D_DEVCAP_DXFMT_ALPHA8 131 +#define SVGA3D_DEVCAP_DXFMT_R_S10E5 132 +#define SVGA3D_DEVCAP_DXFMT_R_S23E8 133 +#define SVGA3D_DEVCAP_DXFMT_RG_S10E5 134 +#define SVGA3D_DEVCAP_DXFMT_RG_S23E8 135 +#define SVGA3D_DEVCAP_DXFMT_BUFFER 136 +#define SVGA3D_DEVCAP_DXFMT_Z_D24X8 137 +#define SVGA3D_DEVCAP_DXFMT_V16U16 138 +#define SVGA3D_DEVCAP_DXFMT_G16R16 139 +#define SVGA3D_DEVCAP_DXFMT_A16B16G16R16 140 +#define SVGA3D_DEVCAP_DXFMT_UYVY 141 +#define SVGA3D_DEVCAP_DXFMT_YUY2 142 +#define SVGA3D_DEVCAP_DXFMT_NV12 143 +#define SVGA3D_DEVCAP_DXFMT_FORMAT_DEAD2 144 +#define SVGA3D_DEVCAP_DXFMT_R32G32B32A32_TYPELESS 145 +#define SVGA3D_DEVCAP_DXFMT_R32G32B32A32_UINT 146 +#define SVGA3D_DEVCAP_DXFMT_R32G32B32A32_SINT 147 +#define SVGA3D_DEVCAP_DXFMT_R32G32B32_TYPELESS 148 +#define SVGA3D_DEVCAP_DXFMT_R32G32B32_FLOAT 149 +#define SVGA3D_DEVCAP_DXFMT_R32G32B32_UINT 150 +#define SVGA3D_DEVCAP_DXFMT_R32G32B32_SINT 151 +#define SVGA3D_DEVCAP_DXFMT_R16G16B16A16_TYPELESS 152 +#define SVGA3D_DEVCAP_DXFMT_R16G16B16A16_UINT 153 +#define SVGA3D_DEVCAP_DXFMT_R16G16B16A16_SNORM 154 +#define SVGA3D_DEVCAP_DXFMT_R16G16B16A16_SINT 155 +#define SVGA3D_DEVCAP_DXFMT_R32G32_TYPELESS 156 +#define SVGA3D_DEVCAP_DXFMT_R32G32_UINT 157 +#define SVGA3D_DEVCAP_DXFMT_R32G32_SINT 158 +#define SVGA3D_DEVCAP_DXFMT_R32G8X24_TYPELESS 159 +#define SVGA3D_DEVCAP_DXFMT_D32_FLOAT_S8X24_UINT 160 +#define SVGA3D_DEVCAP_DXFMT_R32_FLOAT_X8X24 161 +#define SVGA3D_DEVCAP_DXFMT_X32_G8X24_UINT 162 +#define SVGA3D_DEVCAP_DXFMT_R10G10B10A2_TYPELESS 163 +#define SVGA3D_DEVCAP_DXFMT_R10G10B10A2_UINT 164 +#define SVGA3D_DEVCAP_DXFMT_R11G11B10_FLOAT 165 +#define SVGA3D_DEVCAP_DXFMT_R8G8B8A8_TYPELESS 166 +#define SVGA3D_DEVCAP_DXFMT_R8G8B8A8_UNORM 167 +#define SVGA3D_DEVCAP_DXFMT_R8G8B8A8_UNORM_SRGB 168 +#define SVGA3D_DEVCAP_DXFMT_R8G8B8A8_UINT 169 +#define SVGA3D_DEVCAP_DXFMT_R8G8B8A8_SINT 170 +#define SVGA3D_DEVCAP_DXFMT_R16G16_TYPELESS 171 +#define SVGA3D_DEVCAP_DXFMT_R16G16_UINT 172 +#define SVGA3D_DEVCAP_DXFMT_R16G16_SINT 173 +#define SVGA3D_DEVCAP_DXFMT_R32_TYPELESS 174 +#define SVGA3D_DEVCAP_DXFMT_D32_FLOAT 175 +#define SVGA3D_DEVCAP_DXFMT_R32_UINT 176 +#define SVGA3D_DEVCAP_DXFMT_R32_SINT 177 +#define SVGA3D_DEVCAP_DXFMT_R24G8_TYPELESS 178 +#define SVGA3D_DEVCAP_DXFMT_D24_UNORM_S8_UINT 179 +#define SVGA3D_DEVCAP_DXFMT_R24_UNORM_X8 180 +#define SVGA3D_DEVCAP_DXFMT_X24_G8_UINT 181 +#define SVGA3D_DEVCAP_DXFMT_R8G8_TYPELESS 182 +#define SVGA3D_DEVCAP_DXFMT_R8G8_UNORM 183 +#define SVGA3D_DEVCAP_DXFMT_R8G8_UINT 184 +#define SVGA3D_DEVCAP_DXFMT_R8G8_SINT 185 +#define SVGA3D_DEVCAP_DXFMT_R16_TYPELESS 186 +#define SVGA3D_DEVCAP_DXFMT_R16_UNORM 187 +#define SVGA3D_DEVCAP_DXFMT_R16_UINT 188 +#define SVGA3D_DEVCAP_DXFMT_R16_SNORM 189 +#define SVGA3D_DEVCAP_DXFMT_R16_SINT 190 +#define SVGA3D_DEVCAP_DXFMT_R8_TYPELESS 191 +#define SVGA3D_DEVCAP_DXFMT_R8_UNORM 192 +#define SVGA3D_DEVCAP_DXFMT_R8_UINT 193 +#define SVGA3D_DEVCAP_DXFMT_R8_SNORM 194 +#define SVGA3D_DEVCAP_DXFMT_R8_SINT 195 +#define SVGA3D_DEVCAP_DXFMT_P8 196 +#define SVGA3D_DEVCAP_DXFMT_R9G9B9E5_SHAREDEXP 197 +#define SVGA3D_DEVCAP_DXFMT_R8G8_B8G8_UNORM 198 +#define SVGA3D_DEVCAP_DXFMT_G8R8_G8B8_UNORM 199 +#define SVGA3D_DEVCAP_DXFMT_BC1_TYPELESS 200 +#define SVGA3D_DEVCAP_DXFMT_BC1_UNORM_SRGB 201 +#define SVGA3D_DEVCAP_DXFMT_BC2_TYPELESS 202 +#define SVGA3D_DEVCAP_DXFMT_BC2_UNORM_SRGB 203 +#define SVGA3D_DEVCAP_DXFMT_BC3_TYPELESS 204 +#define SVGA3D_DEVCAP_DXFMT_BC3_UNORM_SRGB 205 +#define SVGA3D_DEVCAP_DXFMT_BC4_TYPELESS 206 +#define SVGA3D_DEVCAP_DXFMT_ATI1 207 +#define SVGA3D_DEVCAP_DXFMT_BC4_SNORM 208 +#define SVGA3D_DEVCAP_DXFMT_BC5_TYPELESS 209 +#define SVGA3D_DEVCAP_DXFMT_ATI2 210 +#define SVGA3D_DEVCAP_DXFMT_BC5_SNORM 211 +#define SVGA3D_DEVCAP_DXFMT_R10G10B10_XR_BIAS_A2_UNORM 212 +#define SVGA3D_DEVCAP_DXFMT_B8G8R8A8_TYPELESS 213 +#define SVGA3D_DEVCAP_DXFMT_B8G8R8A8_UNORM_SRGB 214 +#define SVGA3D_DEVCAP_DXFMT_B8G8R8X8_TYPELESS 215 +#define SVGA3D_DEVCAP_DXFMT_B8G8R8X8_UNORM_SRGB 216 +#define SVGA3D_DEVCAP_DXFMT_Z_DF16 217 +#define SVGA3D_DEVCAP_DXFMT_Z_DF24 218 +#define SVGA3D_DEVCAP_DXFMT_Z_D24S8_INT 219 +#define SVGA3D_DEVCAP_DXFMT_YV12 220 +#define SVGA3D_DEVCAP_DXFMT_R32G32B32A32_FLOAT 221 +#define SVGA3D_DEVCAP_DXFMT_R16G16B16A16_FLOAT 222 +#define SVGA3D_DEVCAP_DXFMT_R16G16B16A16_UNORM 223 +#define SVGA3D_DEVCAP_DXFMT_R32G32_FLOAT 224 +#define SVGA3D_DEVCAP_DXFMT_R10G10B10A2_UNORM 225 +#define SVGA3D_DEVCAP_DXFMT_R8G8B8A8_SNORM 226 +#define SVGA3D_DEVCAP_DXFMT_R16G16_FLOAT 227 +#define SVGA3D_DEVCAP_DXFMT_R16G16_UNORM 228 +#define SVGA3D_DEVCAP_DXFMT_R16G16_SNORM 229 +#define SVGA3D_DEVCAP_DXFMT_R32_FLOAT 230 +#define SVGA3D_DEVCAP_DXFMT_R8G8_SNORM 231 +#define SVGA3D_DEVCAP_DXFMT_R16_FLOAT 232 +#define SVGA3D_DEVCAP_DXFMT_D16_UNORM 233 +#define SVGA3D_DEVCAP_DXFMT_A8_UNORM 234 +#define SVGA3D_DEVCAP_DXFMT_BC1_UNORM 235 +#define SVGA3D_DEVCAP_DXFMT_BC2_UNORM 236 +#define SVGA3D_DEVCAP_DXFMT_BC3_UNORM 237 +#define SVGA3D_DEVCAP_DXFMT_B5G6R5_UNORM 238 +#define SVGA3D_DEVCAP_DXFMT_B5G5R5A1_UNORM 239 +#define SVGA3D_DEVCAP_DXFMT_B8G8R8A8_UNORM 240 +#define SVGA3D_DEVCAP_DXFMT_B8G8R8X8_UNORM 241 +#define SVGA3D_DEVCAP_DXFMT_BC4_UNORM 242 +#define SVGA3D_DEVCAP_DXFMT_BC5_UNORM 243 + +#define SVGA3D_DEVCAP_SM41 244 +#define SVGA3D_DEVCAP_MULTISAMPLE_2X 245 +#define SVGA3D_DEVCAP_MULTISAMPLE_4X 246 + +#define SVGA3D_DEVCAP_MS_FULL_QUALITY 247 + +#define SVGA3D_DEVCAP_LOGICOPS 248 + +#define SVGA3D_DEVCAP_LOGIC_BLENDOPS 249 + +#define SVGA3D_DEVCAP_DEAD12 250 + +#define SVGA3D_DEVCAP_DXFMT_BC6H_TYPELESS 251 +#define SVGA3D_DEVCAP_DXFMT_BC6H_UF16 252 +#define SVGA3D_DEVCAP_DXFMT_BC6H_SF16 253 +#define SVGA3D_DEVCAP_DXFMT_BC7_TYPELESS 254 +#define SVGA3D_DEVCAP_DXFMT_BC7_UNORM 255 +#define SVGA3D_DEVCAP_DXFMT_BC7_UNORM_SRGB 256 + +#define SVGA3D_DEVCAP_DEAD13 257 + +#define SVGA3D_DEVCAP_SM5 258 +#define SVGA3D_DEVCAP_MULTISAMPLE_8X 259 + +#define SVGA3D_DEVCAP_MAX 262 + +#define SVGA3D_DXFMT_SUPPORTED (1 << 0) +#define SVGA3D_DXFMT_SHADER_SAMPLE (1 << 1) +#define SVGA3D_DXFMT_COLOR_RENDERTARGET (1 << 2) +#define SVGA3D_DXFMT_DEPTH_RENDERTARGET (1 << 3) +#define SVGA3D_DXFMT_BLENDABLE (1 << 4) +#define SVGA3D_DXFMT_MIPS (1 << 5) +#define SVGA3D_DXFMT_ARRAY (1 << 6) +#define SVGA3D_DXFMT_VOLUME (1 << 7) +#define SVGA3D_DXFMT_DX_VERTEX_BUFFER (1 << 8) +#define SVGA3D_DXFMT_MULTISAMPLE (1 << 9) +#define SVGA3D_DXFMT_MAX (1 << 10) typedef union { - SVGA3dBool b; - uint32 u; - int32 i; - float f; + SVGA3dBool b; + uint32 u; + int32 i; + float f; } SVGA3dDevCapResult; -#endif /* _SVGA3D_DEVCAPS_H_ */ +#endif diff --git a/drivers/gpu/drm/vmwgfx/device_include/svga3d_dx.h b/drivers/gpu/drm/vmwgfx/device_include/svga3d_dx.h index f703ac2b1768..5af442dad542 100644 --- a/drivers/gpu/drm/vmwgfx/device_include/svga3d_dx.h +++ b/drivers/gpu/drm/vmwgfx/device_include/svga3d_dx.h @@ -1,6 +1,6 @@ -/* SPDX-License-Identifier: GPL-2.0 OR MIT */ /********************************************************** - * Copyright 2012-2019 VMware, Inc. + * Copyright 2012-2021 VMware, Inc. + * SPDX-License-Identifier: GPL-2.0 OR MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation @@ -27,88 +27,70 @@ /* * svga3d_dx.h -- * - * SVGA 3d hardware definitions for DX10 support. + * SVGA 3d hardware definitions for DX10 support. */ + + #ifndef _SVGA3D_DX_H_ #define _SVGA3D_DX_H_ -#define INCLUDE_ALLOW_MODULE -#define INCLUDE_ALLOW_USERLEVEL -#define INCLUDE_ALLOW_VMCORE -#include "includeCheck.h" - +#include "svga_reg.h" #include "svga3d_limits.h" +#include "svga3d_types.h" -#define SVGA3D_INPUT_MIN 0 -#define SVGA3D_INPUT_PER_VERTEX_DATA 0 +#define SVGA3D_INPUT_MIN 0 +#define SVGA3D_INPUT_PER_VERTEX_DATA 0 #define SVGA3D_INPUT_PER_INSTANCE_DATA 1 -#define SVGA3D_INPUT_MAX 2 +#define SVGA3D_INPUT_MAX 2 typedef uint32 SVGA3dInputClassification; -#define SVGA3D_RESOURCE_TYPE_MIN 1 -#define SVGA3D_RESOURCE_BUFFER 1 -#define SVGA3D_RESOURCE_TEXTURE1D 2 -#define SVGA3D_RESOURCE_TEXTURE2D 3 -#define SVGA3D_RESOURCE_TEXTURE3D 4 -#define SVGA3D_RESOURCE_TEXTURECUBE 5 -#define SVGA3D_RESOURCE_TYPE_DX10_MAX 6 -#define SVGA3D_RESOURCE_BUFFEREX 6 -#define SVGA3D_RESOURCE_TYPE_MAX 7 -typedef uint32 SVGA3dResourceType; - -#define SVGA3D_COLOR_WRITE_ENABLE_RED (1 << 0) -#define SVGA3D_COLOR_WRITE_ENABLE_GREEN (1 << 1) -#define SVGA3D_COLOR_WRITE_ENABLE_BLUE (1 << 2) -#define SVGA3D_COLOR_WRITE_ENABLE_ALPHA (1 << 3) -#define SVGA3D_COLOR_WRITE_ENABLE_ALL (SVGA3D_COLOR_WRITE_ENABLE_RED | \ - SVGA3D_COLOR_WRITE_ENABLE_GREEN | \ - SVGA3D_COLOR_WRITE_ENABLE_BLUE | \ - SVGA3D_COLOR_WRITE_ENABLE_ALPHA) +#define SVGA3D_COLOR_WRITE_ENABLE_RED (1 << 0) +#define SVGA3D_COLOR_WRITE_ENABLE_GREEN (1 << 1) +#define SVGA3D_COLOR_WRITE_ENABLE_BLUE (1 << 2) +#define SVGA3D_COLOR_WRITE_ENABLE_ALPHA (1 << 3) +#define SVGA3D_COLOR_WRITE_ENABLE_ALL \ + (SVGA3D_COLOR_WRITE_ENABLE_RED | SVGA3D_COLOR_WRITE_ENABLE_GREEN | \ + SVGA3D_COLOR_WRITE_ENABLE_BLUE | SVGA3D_COLOR_WRITE_ENABLE_ALPHA) typedef uint8 SVGA3dColorWriteEnable; -#define SVGA3D_DEPTH_WRITE_MASK_ZERO 0 -#define SVGA3D_DEPTH_WRITE_MASK_ALL 1 +#define SVGA3D_DEPTH_WRITE_MASK_ZERO 0 +#define SVGA3D_DEPTH_WRITE_MASK_ALL 1 typedef uint8 SVGA3dDepthWriteMask; -#define SVGA3D_FILTER_MIP_LINEAR (1 << 0) -#define SVGA3D_FILTER_MAG_LINEAR (1 << 2) -#define SVGA3D_FILTER_MIN_LINEAR (1 << 4) +#define SVGA3D_FILTER_MIP_LINEAR (1 << 0) +#define SVGA3D_FILTER_MAG_LINEAR (1 << 2) +#define SVGA3D_FILTER_MIN_LINEAR (1 << 4) #define SVGA3D_FILTER_ANISOTROPIC (1 << 6) -#define SVGA3D_FILTER_COMPARE (1 << 7) +#define SVGA3D_FILTER_COMPARE (1 << 7) typedef uint32 SVGA3dFilter; #define SVGA3D_CULL_INVALID 0 -#define SVGA3D_CULL_MIN 1 -#define SVGA3D_CULL_NONE 1 -#define SVGA3D_CULL_FRONT 2 -#define SVGA3D_CULL_BACK 3 -#define SVGA3D_CULL_MAX 4 +#define SVGA3D_CULL_MIN 1 +#define SVGA3D_CULL_NONE 1 +#define SVGA3D_CULL_FRONT 2 +#define SVGA3D_CULL_BACK 3 +#define SVGA3D_CULL_MAX 4 typedef uint8 SVGA3dCullMode; -#define SVGA3D_COMPARISON_INVALID 0 -#define SVGA3D_COMPARISON_MIN 1 -#define SVGA3D_COMPARISON_NEVER 1 -#define SVGA3D_COMPARISON_LESS 2 -#define SVGA3D_COMPARISON_EQUAL 3 -#define SVGA3D_COMPARISON_LESS_EQUAL 4 -#define SVGA3D_COMPARISON_GREATER 5 -#define SVGA3D_COMPARISON_NOT_EQUAL 6 -#define SVGA3D_COMPARISON_GREATER_EQUAL 7 -#define SVGA3D_COMPARISON_ALWAYS 8 -#define SVGA3D_COMPARISON_MAX 9 +#define SVGA3D_COMPARISON_INVALID 0 +#define SVGA3D_COMPARISON_MIN 1 +#define SVGA3D_COMPARISON_NEVER 1 +#define SVGA3D_COMPARISON_LESS 2 +#define SVGA3D_COMPARISON_EQUAL 3 +#define SVGA3D_COMPARISON_LESS_EQUAL 4 +#define SVGA3D_COMPARISON_GREATER 5 +#define SVGA3D_COMPARISON_NOT_EQUAL 6 +#define SVGA3D_COMPARISON_GREATER_EQUAL 7 +#define SVGA3D_COMPARISON_ALWAYS 8 +#define SVGA3D_COMPARISON_MAX 9 typedef uint8 SVGA3dComparisonFunc; -/* - * SVGA3D_MULTISAMPLE_RAST_DISABLE disables MSAA for all primitives. - * SVGA3D_MULTISAMPLE_RAST_DISABLE_LINE, which is supported in SM41, - * disables MSAA for lines only. - */ -#define SVGA3D_MULTISAMPLE_RAST_DISABLE 0 -#define SVGA3D_MULTISAMPLE_RAST_ENABLE 1 -#define SVGA3D_MULTISAMPLE_RAST_DX_MAX 1 -#define SVGA3D_MULTISAMPLE_RAST_DISABLE_LINE 2 -#define SVGA3D_MULTISAMPLE_RAST_MAX 2 +#define SVGA3D_MULTISAMPLE_RAST_DISABLE 0 +#define SVGA3D_MULTISAMPLE_RAST_ENABLE 1 +#define SVGA3D_MULTISAMPLE_RAST_DX_MAX 1 +#define SVGA3D_MULTISAMPLE_RAST_DISABLE_LINE 2 +#define SVGA3D_MULTISAMPLE_RAST_MAX 2 typedef uint8 SVGA3dMultisampleRastEnable; #define SVGA3D_DX_MAX_VERTEXBUFFERS 32 @@ -137,1531 +119,1273 @@ typedef uint32 SVGA3dQueryId; typedef uint32 SVGA3dStreamOutputId; typedef union { - struct { - float r; - float g; - float b; - float a; - }; - - float value[4]; -} SVGA3dRGBAFloat; - -typedef union { - struct { - uint32 r; - uint32 g; - uint32 b; - uint32 a; - }; - - uint32 value[4]; + struct { + uint32 r; + uint32 g; + uint32 b; + uint32 a; + }; + + uint32 value[4]; } SVGA3dRGBAUint32; -typedef -#include "vmware_pack_begin.h" -struct { - uint32 cid; - SVGAMobId mobid; -} -#include "vmware_pack_end.h" -SVGAOTableDXContextEntry; - -typedef -#include "vmware_pack_begin.h" -struct SVGA3dCmdDXDefineContext { - uint32 cid; -} -#include "vmware_pack_end.h" -SVGA3dCmdDXDefineContext; /* SVGA_3D_CMD_DX_DEFINE_CONTEXT */ - -typedef -#include "vmware_pack_begin.h" -struct SVGA3dCmdDXDestroyContext { - uint32 cid; -} -#include "vmware_pack_end.h" -SVGA3dCmdDXDestroyContext; /* SVGA_3D_CMD_DX_DESTROY_CONTEXT */ - -/* - * Bind a DX context. - * - * validContents should be set to 0 for new contexts, - * and 1 if this is an old context which is getting paged - * back on to the device. - * - * For new contexts, it is recommended that the driver - * issue commands to initialize all interesting state - * prior to rendering. - */ -typedef -#include "vmware_pack_begin.h" -struct SVGA3dCmdDXBindContext { - uint32 cid; - SVGAMobId mobid; - uint32 validContents; -} -#include "vmware_pack_end.h" -SVGA3dCmdDXBindContext; /* SVGA_3D_CMD_DX_BIND_CONTEXT */ - -/* - * Readback a DX context. - * (Request that the device flush the contents back into guest memory.) - */ -typedef -#include "vmware_pack_begin.h" -struct SVGA3dCmdDXReadbackContext { - uint32 cid; -} -#include "vmware_pack_end.h" -SVGA3dCmdDXReadbackContext; /* SVGA_3D_CMD_DX_READBACK_CONTEXT */ - -/* - * Invalidate a guest-backed context. - */ -typedef -#include "vmware_pack_begin.h" -struct SVGA3dCmdDXInvalidateContext { - uint32 cid; -} -#include "vmware_pack_end.h" -SVGA3dCmdDXInvalidateContext; /* SVGA_3D_CMD_DX_INVALIDATE_CONTEXT */ - -typedef -#include "vmware_pack_begin.h" -struct SVGA3dCmdDXSetSingleConstantBuffer { - uint32 slot; - SVGA3dShaderType type; - SVGA3dSurfaceId sid; - uint32 offsetInBytes; - uint32 sizeInBytes; -} -#include "vmware_pack_end.h" -SVGA3dCmdDXSetSingleConstantBuffer; -/* SVGA_3D_CMD_DX_SET_SINGLE_CONSTANT_BUFFER */ - -typedef -#include "vmware_pack_begin.h" -struct SVGA3dCmdDXSetShaderResources { - uint32 startView; - SVGA3dShaderType type; - - /* - * Followed by a variable number of SVGA3dShaderResourceViewId's. - */ -} -#include "vmware_pack_end.h" -SVGA3dCmdDXSetShaderResources; /* SVGA_3D_CMD_DX_SET_SHADER_RESOURCES */ - -typedef -#include "vmware_pack_begin.h" -struct SVGA3dCmdDXSetShader { - SVGA3dShaderId shaderId; - SVGA3dShaderType type; -} -#include "vmware_pack_end.h" -SVGA3dCmdDXSetShader; /* SVGA_3D_CMD_DX_SET_SHADER */ +#pragma pack(push, 1) +typedef struct { + uint32 cid; + SVGAMobId mobid; +} SVGAOTableDXContextEntry; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct SVGA3dCmdDXDefineContext { + uint32 cid; +} SVGA3dCmdDXDefineContext; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct SVGA3dCmdDXDestroyContext { + uint32 cid; +} SVGA3dCmdDXDestroyContext; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct SVGA3dCmdDXBindContext { + uint32 cid; + SVGAMobId mobid; + uint32 validContents; +} SVGA3dCmdDXBindContext; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct SVGA3dCmdDXReadbackContext { + uint32 cid; +} SVGA3dCmdDXReadbackContext; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct SVGA3dCmdDXInvalidateContext { + uint32 cid; +} SVGA3dCmdDXInvalidateContext; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct SVGA3dCmdDXSetSingleConstantBuffer { + uint32 slot; + SVGA3dShaderType type; + SVGA3dSurfaceId sid; + uint32 offsetInBytes; + uint32 sizeInBytes; +} SVGA3dCmdDXSetSingleConstantBuffer; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct SVGA3dCmdDXSetShaderResources { + uint32 startView; + SVGA3dShaderType type; + +} SVGA3dCmdDXSetShaderResources; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct SVGA3dCmdDXSetShader { + SVGA3dShaderId shaderId; + SVGA3dShaderType type; +} SVGA3dCmdDXSetShader; +#pragma pack(pop) typedef union { - struct { - uint32 cbOffset : 12; - uint32 cbId : 4; - uint32 baseSamp : 4; - uint32 baseTex : 7; - uint32 reserved : 5; - }; - uint32 value; + struct { + uint32 cbOffset : 12; + uint32 cbId : 4; + uint32 baseSamp : 4; + uint32 baseTex : 7; + uint32 reserved : 5; + }; + uint32 value; } SVGA3dIfaceData; -typedef -#include "vmware_pack_begin.h" -struct SVGA3dCmdDXSetShaderIface { - SVGA3dShaderType type; - uint32 numClassInstances; - uint32 index; - uint32 iface; - SVGA3dIfaceData data; -} -#include "vmware_pack_end.h" -SVGA3dCmdDXSetShaderIface; /* SVGA_3D_CMD_DX_SET_SHADER_IFACE */ - -typedef -#include "vmware_pack_begin.h" -struct SVGA3dCmdDXBindShaderIface { - uint32 cid; - SVGAMobId mobid; - uint32 offsetInBytes; -} -#include "vmware_pack_end.h" -SVGA3dCmdDXBindShaderIface; /* SVGA_3D_CMD_DX_BIND_SHADER_IFACE */ - -typedef -#include "vmware_pack_begin.h" -struct SVGA3dCmdDXSetSamplers { - uint32 startSampler; - SVGA3dShaderType type; - - /* - * Followed by a variable number of SVGA3dSamplerId's. - */ -} -#include "vmware_pack_end.h" -SVGA3dCmdDXSetSamplers; /* SVGA_3D_CMD_DX_SET_SAMPLERS */ - -typedef -#include "vmware_pack_begin.h" -struct SVGA3dCmdDXDraw { - uint32 vertexCount; - uint32 startVertexLocation; -} -#include "vmware_pack_end.h" -SVGA3dCmdDXDraw; /* SVGA_3D_CMD_DX_DRAW */ - -typedef -#include "vmware_pack_begin.h" -struct SVGA3dCmdDXDrawIndexed { - uint32 indexCount; - uint32 startIndexLocation; - int32 baseVertexLocation; -} -#include "vmware_pack_end.h" -SVGA3dCmdDXDrawIndexed; /* SVGA_3D_CMD_DX_DRAW_INDEXED */ - -typedef -#include "vmware_pack_begin.h" -struct SVGA3dCmdDXDrawInstanced { - uint32 vertexCountPerInstance; - uint32 instanceCount; - uint32 startVertexLocation; - uint32 startInstanceLocation; -} -#include "vmware_pack_end.h" -SVGA3dCmdDXDrawInstanced; /* SVGA_3D_CMD_DX_DRAW_INSTANCED */ - -typedef -#include "vmware_pack_begin.h" -struct SVGA3dCmdDXDrawIndexedInstanced { - uint32 indexCountPerInstance; - uint32 instanceCount; - uint32 startIndexLocation; - int32 baseVertexLocation; - uint32 startInstanceLocation; -} -#include "vmware_pack_end.h" -SVGA3dCmdDXDrawIndexedInstanced; /* SVGA_3D_CMD_DX_DRAW_INDEXED_INSTANCED */ - -typedef -#include "vmware_pack_begin.h" -struct SVGA3dCmdDXDrawIndexedInstancedIndirect { - SVGA3dSurfaceId argsBufferSid; - uint32 byteOffsetForArgs; -} -#include "vmware_pack_end.h" -SVGA3dCmdDXDrawIndexedInstancedIndirect; -/* SVGA_3D_CMD_DX_DRAW_INDEXED_INSTANCED_INDIRECT */ - -typedef -#include "vmware_pack_begin.h" -struct SVGA3dCmdDXDrawInstancedIndirect { - SVGA3dSurfaceId argsBufferSid; - uint32 byteOffsetForArgs; -} -#include "vmware_pack_end.h" -SVGA3dCmdDXDrawInstancedIndirect; -/* SVGA_3D_CMD_DX_DRAW_INSTANCED_INDIRECT */ - -typedef -#include "vmware_pack_begin.h" -struct SVGA3dCmdDXDrawAuto { - uint32 pad0; -} -#include "vmware_pack_end.h" -SVGA3dCmdDXDrawAuto; /* SVGA_3D_CMD_DX_DRAW_AUTO */ - -typedef -#include "vmware_pack_begin.h" -struct SVGA3dCmdDXDispatch { - uint32 threadGroupCountX; - uint32 threadGroupCountY; - uint32 threadGroupCountZ; -} -#include "vmware_pack_end.h" -SVGA3dCmdDXDispatch; -/* SVGA_3D_CMD_DX_DISPATCH */ - -typedef -#include "vmware_pack_begin.h" -struct SVGA3dCmdDXDispatchIndirect { - SVGA3dSurfaceId argsBufferSid; - uint32 byteOffsetForArgs; -} -#include "vmware_pack_end.h" -SVGA3dCmdDXDispatchIndirect; -/* SVGA_3D_CMD_DX_DISPATCH_INDIRECT */ - -typedef -#include "vmware_pack_begin.h" -struct SVGA3dCmdDXSetInputLayout { - SVGA3dElementLayoutId elementLayoutId; -} -#include "vmware_pack_end.h" -SVGA3dCmdDXSetInputLayout; /* SVGA_3D_CMD_DX_SET_INPUT_LAYOUT */ - -typedef -#include "vmware_pack_begin.h" -struct SVGA3dVertexBuffer { - SVGA3dSurfaceId sid; - uint32 stride; - uint32 offset; -} -#include "vmware_pack_end.h" -SVGA3dVertexBuffer; - -typedef -#include "vmware_pack_begin.h" -struct SVGA3dCmdDXSetVertexBuffers { - uint32 startBuffer; - /* Followed by a variable number of SVGA3dVertexBuffer's. */ -} -#include "vmware_pack_end.h" -SVGA3dCmdDXSetVertexBuffers; /* SVGA_3D_CMD_DX_SET_VERTEX_BUFFERS */ - -typedef -#include "vmware_pack_begin.h" -struct SVGA3dCmdDXSetIndexBuffer { - SVGA3dSurfaceId sid; - SVGA3dSurfaceFormat format; - uint32 offset; -} -#include "vmware_pack_end.h" -SVGA3dCmdDXSetIndexBuffer; /* SVGA_3D_CMD_DX_SET_INDEX_BUFFER */ - -typedef -#include "vmware_pack_begin.h" -struct SVGA3dCmdDXSetTopology { - SVGA3dPrimitiveType topology; -} -#include "vmware_pack_end.h" -SVGA3dCmdDXSetTopology; /* SVGA_3D_CMD_DX_SET_TOPOLOGY */ - -typedef -#include "vmware_pack_begin.h" -struct SVGA3dCmdDXSetRenderTargets { - SVGA3dDepthStencilViewId depthStencilViewId; - /* Followed by a variable number of SVGA3dRenderTargetViewId's. */ -} -#include "vmware_pack_end.h" -SVGA3dCmdDXSetRenderTargets; /* SVGA_3D_CMD_DX_SET_RENDERTARGETS */ - -typedef -#include "vmware_pack_begin.h" -struct SVGA3dCmdDXSetBlendState { - SVGA3dBlendStateId blendId; - float blendFactor[4]; - uint32 sampleMask; -} -#include "vmware_pack_end.h" -SVGA3dCmdDXSetBlendState; /* SVGA_3D_CMD_DX_SET_BLEND_STATE */ - -typedef -#include "vmware_pack_begin.h" -struct SVGA3dCmdDXSetDepthStencilState { - SVGA3dDepthStencilStateId depthStencilId; - uint32 stencilRef; -} -#include "vmware_pack_end.h" -SVGA3dCmdDXSetDepthStencilState; /* SVGA_3D_CMD_DX_SET_DEPTHSTENCIL_STATE */ - -typedef -#include "vmware_pack_begin.h" -struct SVGA3dCmdDXSetRasterizerState { - SVGA3dRasterizerStateId rasterizerId; -} -#include "vmware_pack_end.h" -SVGA3dCmdDXSetRasterizerState; /* SVGA_3D_CMD_DX_SET_RASTERIZER_STATE */ +#pragma pack(push, 1) +typedef struct SVGA3dCmdDXSetShaderIface { + SVGA3dShaderType type; + uint32 numClassInstances; + uint32 index; + uint32 iface; + SVGA3dIfaceData data; +} SVGA3dCmdDXSetShaderIface; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct SVGA3dCmdDXBindShaderIface { + uint32 cid; + SVGAMobId mobid; + uint32 offsetInBytes; +} SVGA3dCmdDXBindShaderIface; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct SVGA3dCmdDXSetSamplers { + uint32 startSampler; + SVGA3dShaderType type; + +} SVGA3dCmdDXSetSamplers; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct SVGA3dCmdDXDraw { + uint32 vertexCount; + uint32 startVertexLocation; +} SVGA3dCmdDXDraw; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct SVGA3dCmdDXDrawIndexed { + uint32 indexCount; + uint32 startIndexLocation; + int32 baseVertexLocation; +} SVGA3dCmdDXDrawIndexed; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct SVGA3dCmdDXDrawInstanced { + uint32 vertexCountPerInstance; + uint32 instanceCount; + uint32 startVertexLocation; + uint32 startInstanceLocation; +} SVGA3dCmdDXDrawInstanced; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct SVGA3dCmdDXDrawIndexedInstanced { + uint32 indexCountPerInstance; + uint32 instanceCount; + uint32 startIndexLocation; + int32 baseVertexLocation; + uint32 startInstanceLocation; +} SVGA3dCmdDXDrawIndexedInstanced; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct SVGA3dCmdDXDrawIndexedInstancedIndirect { + SVGA3dSurfaceId argsBufferSid; + uint32 byteOffsetForArgs; +} SVGA3dCmdDXDrawIndexedInstancedIndirect; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct SVGA3dCmdDXDrawInstancedIndirect { + SVGA3dSurfaceId argsBufferSid; + uint32 byteOffsetForArgs; +} SVGA3dCmdDXDrawInstancedIndirect; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct SVGA3dCmdDXDrawAuto { + uint32 pad0; +} SVGA3dCmdDXDrawAuto; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct SVGA3dCmdDXDispatch { + uint32 threadGroupCountX; + uint32 threadGroupCountY; + uint32 threadGroupCountZ; +} SVGA3dCmdDXDispatch; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct SVGA3dCmdDXDispatchIndirect { + SVGA3dSurfaceId argsBufferSid; + uint32 byteOffsetForArgs; +} SVGA3dCmdDXDispatchIndirect; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct SVGA3dCmdDXSetInputLayout { + SVGA3dElementLayoutId elementLayoutId; +} SVGA3dCmdDXSetInputLayout; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct SVGA3dVertexBuffer { + SVGA3dSurfaceId sid; + uint32 stride; + uint32 offset; +} SVGA3dVertexBuffer; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct SVGA3dCmdDXSetVertexBuffers { + uint32 startBuffer; + +} SVGA3dCmdDXSetVertexBuffers; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct SVGA3dVertexBuffer_v2 { + SVGA3dSurfaceId sid; + uint32 stride; + uint32 offset; + uint32 sizeInBytes; +} SVGA3dVertexBuffer_v2; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct SVGA3dCmdDXSetVertexBuffers_v2 { + uint32 startBuffer; + +} SVGA3dCmdDXSetVertexBuffers_v2; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct SVGA3dVertexBufferOffsetAndSize { + uint32 stride; + uint32 offset; + uint32 sizeInBytes; +} SVGA3dVertexBufferOffsetAndSize; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct SVGA3dCmdDXSetVertexBuffersOffsetAndSize { + uint32 startBuffer; + +} SVGA3dCmdDXSetVertexBuffersOffsetAndSize; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct SVGA3dCmdDXSetIndexBuffer { + SVGA3dSurfaceId sid; + SVGA3dSurfaceFormat format; + uint32 offset; +} SVGA3dCmdDXSetIndexBuffer; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct SVGA3dCmdDXSetIndexBuffer_v2 { + SVGA3dSurfaceId sid; + SVGA3dSurfaceFormat format; + uint32 offset; + uint32 sizeInBytes; +} SVGA3dCmdDXSetIndexBuffer_v2; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct SVGA3dCmdDXSetIndexBufferOffsetAndSize { + SVGA3dSurfaceFormat format; + uint32 offset; + uint32 sizeInBytes; +} SVGA3dCmdDXSetIndexBufferOffsetAndSize; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct SVGA3dCmdDXSetTopology { + SVGA3dPrimitiveType topology; +} SVGA3dCmdDXSetTopology; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct SVGA3dCmdDXSetRenderTargets { + SVGA3dDepthStencilViewId depthStencilViewId; + +} SVGA3dCmdDXSetRenderTargets; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct SVGA3dCmdDXSetBlendState { + SVGA3dBlendStateId blendId; + float blendFactor[4]; + uint32 sampleMask; +} SVGA3dCmdDXSetBlendState; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct SVGA3dCmdDXSetDepthStencilState { + SVGA3dDepthStencilStateId depthStencilId; + uint32 stencilRef; +} SVGA3dCmdDXSetDepthStencilState; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct SVGA3dCmdDXSetRasterizerState { + SVGA3dRasterizerStateId rasterizerId; +} SVGA3dCmdDXSetRasterizerState; +#pragma pack(pop) #define SVGA3D_DXQUERY_FLAG_PREDICATEHINT (1 << 0) typedef uint32 SVGA3dDXQueryFlags; -/* - * The SVGADXQueryDeviceState and SVGADXQueryDeviceBits are used by the device - * to track query state transitions, but are not intended to be used by the - * driver. - */ -#define SVGADX_QDSTATE_INVALID ((uint8)-1) /* Query has no state */ -#define SVGADX_QDSTATE_MIN 0 -#define SVGADX_QDSTATE_IDLE 0 /* Query hasn't started yet */ -#define SVGADX_QDSTATE_ACTIVE 1 /* Query is actively gathering data */ -#define SVGADX_QDSTATE_PENDING 2 /* Query is waiting for results */ -#define SVGADX_QDSTATE_FINISHED 3 /* Query has completed */ -#define SVGADX_QDSTATE_MAX 4 +#define SVGADX_QDSTATE_INVALID ((uint8)-1) +#define SVGADX_QDSTATE_MIN 0 +#define SVGADX_QDSTATE_IDLE 0 +#define SVGADX_QDSTATE_ACTIVE 1 +#define SVGADX_QDSTATE_PENDING 2 +#define SVGADX_QDSTATE_FINISHED 3 +#define SVGADX_QDSTATE_MAX 4 typedef uint8 SVGADXQueryDeviceState; -typedef -#include "vmware_pack_begin.h" -struct { - SVGA3dQueryTypeUint8 type; - uint16 pad0; - SVGADXQueryDeviceState state; - SVGA3dDXQueryFlags flags; - SVGAMobId mobid; - uint32 offset; -} -#include "vmware_pack_end.h" -SVGACOTableDXQueryEntry; - -typedef -#include "vmware_pack_begin.h" -struct SVGA3dCmdDXDefineQuery { - SVGA3dQueryId queryId; - SVGA3dQueryType type; - SVGA3dDXQueryFlags flags; -} -#include "vmware_pack_end.h" -SVGA3dCmdDXDefineQuery; /* SVGA_3D_CMD_DX_DEFINE_QUERY */ - -typedef -#include "vmware_pack_begin.h" -struct SVGA3dCmdDXDestroyQuery { - SVGA3dQueryId queryId; -} -#include "vmware_pack_end.h" -SVGA3dCmdDXDestroyQuery; /* SVGA_3D_CMD_DX_DESTROY_QUERY */ - -typedef -#include "vmware_pack_begin.h" -struct SVGA3dCmdDXBindQuery { - SVGA3dQueryId queryId; - SVGAMobId mobid; -} -#include "vmware_pack_end.h" -SVGA3dCmdDXBindQuery; /* SVGA_3D_CMD_DX_BIND_QUERY */ - -typedef -#include "vmware_pack_begin.h" -struct SVGA3dCmdDXSetQueryOffset { - SVGA3dQueryId queryId; - uint32 mobOffset; -} -#include "vmware_pack_end.h" -SVGA3dCmdDXSetQueryOffset; /* SVGA_3D_CMD_DX_SET_QUERY_OFFSET */ - -typedef -#include "vmware_pack_begin.h" -struct SVGA3dCmdDXBeginQuery { - SVGA3dQueryId queryId; -} -#include "vmware_pack_end.h" -SVGA3dCmdDXBeginQuery; /* SVGA_3D_CMD_DX_QUERY_BEGIN */ - -typedef -#include "vmware_pack_begin.h" -struct SVGA3dCmdDXEndQuery { - SVGA3dQueryId queryId; -} -#include "vmware_pack_end.h" -SVGA3dCmdDXEndQuery; /* SVGA_3D_CMD_DX_QUERY_END */ - -typedef -#include "vmware_pack_begin.h" -struct SVGA3dCmdDXReadbackQuery { - SVGA3dQueryId queryId; -} -#include "vmware_pack_end.h" -SVGA3dCmdDXReadbackQuery; /* SVGA_3D_CMD_DX_READBACK_QUERY */ - -typedef -#include "vmware_pack_begin.h" -struct SVGA3dCmdDXMoveQuery { - SVGA3dQueryId queryId; - SVGAMobId mobid; - uint32 mobOffset; -} -#include "vmware_pack_end.h" -SVGA3dCmdDXMoveQuery; /* SVGA_3D_CMD_DX_MOVE_QUERY */ - -typedef -#include "vmware_pack_begin.h" -struct SVGA3dCmdDXBindAllQuery { - uint32 cid; - SVGAMobId mobid; -} -#include "vmware_pack_end.h" -SVGA3dCmdDXBindAllQuery; /* SVGA_3D_CMD_DX_BIND_ALL_QUERY */ - -typedef -#include "vmware_pack_begin.h" -struct SVGA3dCmdDXReadbackAllQuery { - uint32 cid; -} -#include "vmware_pack_end.h" -SVGA3dCmdDXReadbackAllQuery; /* SVGA_3D_CMD_DX_READBACK_ALL_QUERY */ - -typedef -#include "vmware_pack_begin.h" -struct SVGA3dCmdDXSetPredication { - SVGA3dQueryId queryId; - uint32 predicateValue; -} -#include "vmware_pack_end.h" -SVGA3dCmdDXSetPredication; /* SVGA_3D_CMD_DX_SET_PREDICATION */ - -typedef -#include "vmware_pack_begin.h" -struct MKS3dDXSOState { - uint32 offset; /* Starting offset */ - uint32 intOffset; /* Internal offset */ - uint32 vertexCount; /* vertices written */ - uint32 dead; -} -#include "vmware_pack_end.h" -SVGA3dDXSOState; - -/* Set the offset field to this value to append SO values to the buffer */ -#define SVGA3D_DX_SO_OFFSET_APPEND ((uint32) ~0u) - -typedef -#include "vmware_pack_begin.h" -struct SVGA3dSoTarget { - SVGA3dSurfaceId sid; - uint32 offset; - uint32 sizeInBytes; -} -#include "vmware_pack_end.h" -SVGA3dSoTarget; - -typedef -#include "vmware_pack_begin.h" -struct SVGA3dCmdDXSetSOTargets { - uint32 pad0; - /* Followed by a variable number of SVGA3dSOTarget's. */ -} -#include "vmware_pack_end.h" -SVGA3dCmdDXSetSOTargets; /* SVGA_3D_CMD_DX_SET_SOTARGETS */ - -typedef -#include "vmware_pack_begin.h" -struct SVGA3dViewport -{ - float x; - float y; - float width; - float height; - float minDepth; - float maxDepth; -} -#include "vmware_pack_end.h" -SVGA3dViewport; - -typedef -#include "vmware_pack_begin.h" -struct SVGA3dCmdDXSetViewports { - uint32 pad0; - /* Followed by a variable number of SVGA3dViewport's. */ -} -#include "vmware_pack_end.h" -SVGA3dCmdDXSetViewports; /* SVGA_3D_CMD_DX_SET_VIEWPORTS */ - -#define SVGA3D_DX_MAX_VIEWPORTS 16 - -typedef -#include "vmware_pack_begin.h" -struct SVGA3dCmdDXSetScissorRects { - uint32 pad0; - /* Followed by a variable number of SVGASignedRect's. */ -} -#include "vmware_pack_end.h" -SVGA3dCmdDXSetScissorRects; /* SVGA_3D_CMD_DX_SET_SCISSORRECTS */ - -#define SVGA3D_DX_MAX_SCISSORRECTS 16 - -typedef -#include "vmware_pack_begin.h" -struct SVGA3dCmdDXClearRenderTargetView { - SVGA3dRenderTargetViewId renderTargetViewId; - SVGA3dRGBAFloat rgba; -} -#include "vmware_pack_end.h" -SVGA3dCmdDXClearRenderTargetView; /* SVGA_3D_CMD_DX_CLEAR_RENDERTARGET_VIEW */ - -typedef -#include "vmware_pack_begin.h" -struct SVGA3dCmdDXClearDepthStencilView { - uint16 flags; - uint16 stencil; - SVGA3dDepthStencilViewId depthStencilViewId; - float depth; -} -#include "vmware_pack_end.h" -SVGA3dCmdDXClearDepthStencilView; /* SVGA_3D_CMD_DX_CLEAR_DEPTHSTENCIL_VIEW */ - -typedef -#include "vmware_pack_begin.h" -struct SVGA3dCmdDXPredCopyRegion { - SVGA3dSurfaceId dstSid; - uint32 dstSubResource; - SVGA3dSurfaceId srcSid; - uint32 srcSubResource; - SVGA3dCopyBox box; -} -#include "vmware_pack_end.h" -SVGA3dCmdDXPredCopyRegion; -/* SVGA_3D_CMD_DX_PRED_COPY_REGION */ - -typedef -#include "vmware_pack_begin.h" -struct SVGA3dCmdDXPredCopy { - SVGA3dSurfaceId dstSid; - SVGA3dSurfaceId srcSid; -} -#include "vmware_pack_end.h" -SVGA3dCmdDXPredCopy; /* SVGA_3D_CMD_DX_PRED_COPY */ - -typedef -#include "vmware_pack_begin.h" -struct SVGA3dCmdDXPredConvertRegion { - SVGA3dSurfaceId dstSid; - uint32 dstSubResource; - SVGA3dBox destBox; - SVGA3dSurfaceId srcSid; - uint32 srcSubResource; - SVGA3dBox srcBox; -} -#include "vmware_pack_end.h" -SVGA3dCmdDXPredConvertRegion; /* SVGA_3D_CMD_DX_PRED_CONVERT_REGION */ - -typedef -#include "vmware_pack_begin.h" -struct SVGA3dCmdDXPredConvert { - SVGA3dSurfaceId dstSid; - SVGA3dSurfaceId srcSid; -} -#include "vmware_pack_end.h" -SVGA3dCmdDXPredConvert; /* SVGA_3D_CMD_DX_PRED_CONVERT */ - -typedef -#include "vmware_pack_begin.h" -struct SVGA3dCmdDXBufferCopy { - SVGA3dSurfaceId dest; - SVGA3dSurfaceId src; - uint32 destX; - uint32 srcX; - uint32 width; -} -#include "vmware_pack_end.h" -SVGA3dCmdDXBufferCopy; -/* SVGA_3D_CMD_DX_BUFFER_COPY */ - -/* - * Perform a surface copy between a multisample, and a non-multisampled - * surface. - */ -typedef -#include "vmware_pack_begin.h" -struct { - SVGA3dSurfaceId dstSid; - uint32 dstSubResource; - SVGA3dSurfaceId srcSid; - uint32 srcSubResource; - SVGA3dSurfaceFormat copyFormat; -} -#include "vmware_pack_end.h" -SVGA3dCmdDXResolveCopy; /* SVGA_3D_CMD_DX_RESOLVE_COPY */ - -/* - * Perform a predicated surface copy between a multisample, and a - * non-multisampled surface. - */ -typedef -#include "vmware_pack_begin.h" -struct { - SVGA3dSurfaceId dstSid; - uint32 dstSubResource; - SVGA3dSurfaceId srcSid; - uint32 srcSubResource; - SVGA3dSurfaceFormat copyFormat; -} -#include "vmware_pack_end.h" -SVGA3dCmdDXPredResolveCopy; /* SVGA_3D_CMD_DX_PRED_RESOLVE_COPY */ +#pragma pack(push, 1) +typedef struct { + SVGA3dQueryTypeUint8 type; + uint16 pad0; + SVGADXQueryDeviceState state; + SVGA3dDXQueryFlags flags; + SVGAMobId mobid; + uint32 offset; +} SVGACOTableDXQueryEntry; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct SVGA3dCmdDXDefineQuery { + SVGA3dQueryId queryId; + SVGA3dQueryType type; + SVGA3dDXQueryFlags flags; +} SVGA3dCmdDXDefineQuery; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct SVGA3dCmdDXDestroyQuery { + SVGA3dQueryId queryId; +} SVGA3dCmdDXDestroyQuery; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct SVGA3dCmdDXBindQuery { + SVGA3dQueryId queryId; + SVGAMobId mobid; +} SVGA3dCmdDXBindQuery; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct SVGA3dCmdDXSetQueryOffset { + SVGA3dQueryId queryId; + uint32 mobOffset; +} SVGA3dCmdDXSetQueryOffset; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct SVGA3dCmdDXBeginQuery { + SVGA3dQueryId queryId; +} SVGA3dCmdDXBeginQuery; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct SVGA3dCmdDXEndQuery { + SVGA3dQueryId queryId; +} SVGA3dCmdDXEndQuery; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct SVGA3dCmdDXReadbackQuery { + SVGA3dQueryId queryId; +} SVGA3dCmdDXReadbackQuery; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct SVGA3dCmdDXMoveQuery { + SVGA3dQueryId queryId; + SVGAMobId mobid; + uint32 mobOffset; +} SVGA3dCmdDXMoveQuery; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct SVGA3dCmdDXBindAllQuery { + uint32 cid; + SVGAMobId mobid; +} SVGA3dCmdDXBindAllQuery; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct SVGA3dCmdDXReadbackAllQuery { + uint32 cid; +} SVGA3dCmdDXReadbackAllQuery; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct SVGA3dCmdDXSetPredication { + SVGA3dQueryId queryId; + uint32 predicateValue; +} SVGA3dCmdDXSetPredication; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct MKS3dDXSOState { + uint32 offset; + uint32 intOffset; + uint32 vertexCount; + uint32 dead; +} SVGA3dDXSOState; +#pragma pack(pop) + +#define SVGA3D_DX_SO_OFFSET_APPEND ((uint32)~0u) + +#pragma pack(push, 1) +typedef struct SVGA3dSoTarget { + SVGA3dSurfaceId sid; + uint32 offset; + uint32 sizeInBytes; +} SVGA3dSoTarget; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct SVGA3dCmdDXSetSOTargets { + uint32 pad0; + +} SVGA3dCmdDXSetSOTargets; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct SVGA3dViewport { + float x; + float y; + float width; + float height; + float minDepth; + float maxDepth; +} SVGA3dViewport; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct SVGA3dCmdDXSetViewports { + uint32 pad0; + +} SVGA3dCmdDXSetViewports; +#pragma pack(pop) + +#define SVGA3D_DX_MAX_VIEWPORTS 16 + +#pragma pack(push, 1) +typedef struct SVGA3dCmdDXSetScissorRects { + uint32 pad0; + +} SVGA3dCmdDXSetScissorRects; +#pragma pack(pop) + +#define SVGA3D_DX_MAX_SCISSORRECTS 16 + +#pragma pack(push, 1) +typedef struct SVGA3dCmdDXClearRenderTargetView { + SVGA3dRenderTargetViewId renderTargetViewId; + SVGA3dRGBAFloat rgba; +} SVGA3dCmdDXClearRenderTargetView; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct SVGA3dCmdDXClearDepthStencilView { + uint16 flags; + uint16 stencil; + SVGA3dDepthStencilViewId depthStencilViewId; + float depth; +} SVGA3dCmdDXClearDepthStencilView; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct SVGA3dCmdDXPredCopyRegion { + SVGA3dSurfaceId dstSid; + uint32 dstSubResource; + SVGA3dSurfaceId srcSid; + uint32 srcSubResource; + SVGA3dCopyBox box; +} SVGA3dCmdDXPredCopyRegion; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct SVGA3dCmdDXPredStagingCopyRegion { + SVGA3dSurfaceId dstSid; + uint32 dstSubResource; + SVGA3dSurfaceId srcSid; + uint32 srcSubResource; + SVGA3dCopyBox box; + uint8 readback; + uint8 unsynchronized; + uint8 mustBeZero[2]; +} SVGA3dCmdDXPredStagingCopyRegion; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct SVGA3dCmdDXPredCopy { + SVGA3dSurfaceId dstSid; + SVGA3dSurfaceId srcSid; +} SVGA3dCmdDXPredCopy; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct SVGA3dCmdDXPredConvertRegion { + SVGA3dSurfaceId dstSid; + uint32 dstSubResource; + SVGA3dBox destBox; + SVGA3dSurfaceId srcSid; + uint32 srcSubResource; + SVGA3dBox srcBox; +} SVGA3dCmdDXPredConvertRegion; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct SVGA3dCmdDXPredStagingConvertRegion { + SVGA3dSurfaceId dstSid; + uint32 dstSubResource; + SVGA3dBox destBox; + SVGA3dSurfaceId srcSid; + uint32 srcSubResource; + SVGA3dBox srcBox; + uint8 readback; + uint8 unsynchronized; + uint8 mustBeZero[2]; +} SVGA3dCmdDXPredStagingConvertRegion; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct SVGA3dCmdDXPredConvert { + SVGA3dSurfaceId dstSid; + SVGA3dSurfaceId srcSid; +} SVGA3dCmdDXPredConvert; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct SVGA3dCmdDXPredStagingConvert { + SVGA3dSurfaceId dstSid; + SVGA3dSurfaceId srcSid; + uint8 readback; + uint8 unsynchronized; + uint8 mustBeZero[2]; +} SVGA3dCmdDXPredStagingConvert; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct SVGA3dCmdDXBufferCopy { + SVGA3dSurfaceId dest; + SVGA3dSurfaceId src; + uint32 destX; + uint32 srcX; + uint32 width; +} SVGA3dCmdDXBufferCopy; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct SVGA3dCmdDXStagingBufferCopy { + SVGA3dSurfaceId dest; + SVGA3dSurfaceId src; + uint32 destX; + uint32 srcX; + uint32 width; + uint8 readback; + uint8 unsynchronized; + uint8 mustBeZero[2]; +} SVGA3dCmdDXStagingBufferCopy; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct { + SVGA3dSurfaceId dstSid; + uint32 dstSubResource; + SVGA3dSurfaceId srcSid; + uint32 srcSubResource; + SVGA3dSurfaceFormat copyFormat; +} SVGA3dCmdDXResolveCopy; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct { + SVGA3dSurfaceId dstSid; + uint32 dstSubResource; + SVGA3dSurfaceId srcSid; + uint32 srcSubResource; + SVGA3dSurfaceFormat copyFormat; +} SVGA3dCmdDXPredResolveCopy; +#pragma pack(pop) typedef uint32 SVGA3dDXPresentBltMode; -#define SVGADX_PRESENTBLT_LINEAR (1 << 0) -#define SVGADX_PRESENTBLT_FORCE_SRC_SRGB (1 << 1) +#define SVGADX_PRESENTBLT_LINEAR (1 << 0) +#define SVGADX_PRESENTBLT_FORCE_SRC_SRGB (1 << 1) #define SVGADX_PRESENTBLT_FORCE_SRC_XRBIAS (1 << 2) -#define SVGADX_PRESENTBLT_MODE_MAX (1 << 3) - -typedef -#include "vmware_pack_begin.h" -struct SVGA3dCmdDXPresentBlt { - SVGA3dSurfaceId srcSid; - uint32 srcSubResource; - SVGA3dSurfaceId dstSid; - uint32 destSubResource; - SVGA3dBox boxSrc; - SVGA3dBox boxDest; - SVGA3dDXPresentBltMode mode; -} -#include "vmware_pack_end.h" -SVGA3dCmdDXPresentBlt; /* SVGA_3D_CMD_DX_PRESENTBLT*/ - -typedef -#include "vmware_pack_begin.h" -struct SVGA3dCmdDXGenMips { - SVGA3dShaderResourceViewId shaderResourceViewId; -} -#include "vmware_pack_end.h" -SVGA3dCmdDXGenMips; /* SVGA_3D_CMD_DX_GENMIPS */ - -/* - * Update a sub-resource in a guest-backed resource. - * (Inform the device that the guest-contents have been updated.) - */ -typedef -#include "vmware_pack_begin.h" -struct SVGA3dCmdDXUpdateSubResource { - SVGA3dSurfaceId sid; - uint32 subResource; - SVGA3dBox box; -} -#include "vmware_pack_end.h" -SVGA3dCmdDXUpdateSubResource; /* SVGA_3D_CMD_DX_UPDATE_SUBRESOURCE */ - -/* - * Readback a subresource in a guest-backed resource. - * (Request the device to flush the dirty contents into the guest.) - */ -typedef -#include "vmware_pack_begin.h" -struct SVGA3dCmdDXReadbackSubResource { - SVGA3dSurfaceId sid; - uint32 subResource; -} -#include "vmware_pack_end.h" -SVGA3dCmdDXReadbackSubResource; /* SVGA_3D_CMD_DX_READBACK_SUBRESOURCE */ - -/* - * Invalidate an image in a guest-backed surface. - * (Notify the device that the contents can be lost.) - */ -typedef -#include "vmware_pack_begin.h" -struct SVGA3dCmdDXInvalidateSubResource { - SVGA3dSurfaceId sid; - uint32 subResource; -} -#include "vmware_pack_end.h" -SVGA3dCmdDXInvalidateSubResource; /* SVGA_3D_CMD_DX_INVALIDATE_SUBRESOURCE */ - - -/* - * Raw byte wise transfer from a buffer surface into another surface - * of the requested box. Supported if 3d is enabled and SVGA_CAP_DX - * is set. This command does not take a context. - */ -typedef -#include "vmware_pack_begin.h" -struct SVGA3dCmdDXTransferFromBuffer { - SVGA3dSurfaceId srcSid; - uint32 srcOffset; - uint32 srcPitch; - uint32 srcSlicePitch; - SVGA3dSurfaceId destSid; - uint32 destSubResource; - SVGA3dBox destBox; -} -#include "vmware_pack_end.h" -SVGA3dCmdDXTransferFromBuffer; /* SVGA_3D_CMD_DX_TRANSFER_FROM_BUFFER */ - - -#define SVGA3D_TRANSFER_TO_BUFFER_READBACK (1 << 0) +#define SVGADX_PRESENTBLT_MODE_MAX (1 << 3) + +#pragma pack(push, 1) +typedef struct SVGA3dCmdDXPresentBlt { + SVGA3dSurfaceId srcSid; + uint32 srcSubResource; + SVGA3dSurfaceId dstSid; + uint32 destSubResource; + SVGA3dBox boxSrc; + SVGA3dBox boxDest; + SVGA3dDXPresentBltMode mode; +} SVGA3dCmdDXPresentBlt; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct SVGA3dCmdDXGenMips { + SVGA3dShaderResourceViewId shaderResourceViewId; +} SVGA3dCmdDXGenMips; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct SVGA3dCmdDXUpdateSubResource { + SVGA3dSurfaceId sid; + uint32 subResource; + SVGA3dBox box; +} SVGA3dCmdDXUpdateSubResource; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct SVGA3dCmdDXReadbackSubResource { + SVGA3dSurfaceId sid; + uint32 subResource; +} SVGA3dCmdDXReadbackSubResource; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct SVGA3dCmdDXInvalidateSubResource { + SVGA3dSurfaceId sid; + uint32 subResource; +} SVGA3dCmdDXInvalidateSubResource; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct SVGA3dCmdDXTransferFromBuffer { + SVGA3dSurfaceId srcSid; + uint32 srcOffset; + uint32 srcPitch; + uint32 srcSlicePitch; + SVGA3dSurfaceId destSid; + uint32 destSubResource; + SVGA3dBox destBox; +} SVGA3dCmdDXTransferFromBuffer; +#pragma pack(pop) + +#define SVGA3D_TRANSFER_TO_BUFFER_READBACK (1 << 0) #define SVGA3D_TRANSFER_TO_BUFFER_FLAGS_MASK (1 << 0) typedef uint32 SVGA3dTransferToBufferFlags; -/* - * Raw byte wise transfer to a buffer surface from another surface - * of the requested box. Supported if SVGA_CAP_DX2 is set. This - * command does not take a context. - */ -typedef -#include "vmware_pack_begin.h" -struct SVGA3dCmdDXTransferToBuffer { - SVGA3dSurfaceId srcSid; - uint32 srcSubResource; - SVGA3dBox srcBox; - SVGA3dSurfaceId destSid; - uint32 destOffset; - uint32 destPitch; - uint32 destSlicePitch; - SVGA3dTransferToBufferFlags flags; -} -#include "vmware_pack_end.h" -SVGA3dCmdDXTransferToBuffer; /* SVGA_3D_CMD_DX_TRANSFER_TO_BUFFER */ - +#pragma pack(push, 1) +typedef struct SVGA3dCmdDXTransferToBuffer { + SVGA3dSurfaceId srcSid; + uint32 srcSubResource; + SVGA3dBox srcBox; + SVGA3dSurfaceId destSid; + uint32 destOffset; + uint32 destPitch; + uint32 destSlicePitch; + SVGA3dTransferToBufferFlags flags; +} SVGA3dCmdDXTransferToBuffer; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct SVGA3dCmdDXPredTransferFromBuffer { + SVGA3dSurfaceId srcSid; + uint32 srcOffset; + uint32 srcPitch; + uint32 srcSlicePitch; + SVGA3dSurfaceId destSid; + uint32 destSubResource; + SVGA3dBox destBox; +} SVGA3dCmdDXPredTransferFromBuffer; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct SVGA3dCmdDXSurfaceCopyAndReadback { + SVGA3dSurfaceId srcSid; + SVGA3dSurfaceId destSid; + SVGA3dCopyBox box; +} SVGA3dCmdDXSurfaceCopyAndReadback; +#pragma pack(pop) -/* - * Raw byte wise transfer from a buffer surface into another surface - * of the requested box. Supported if SVGA3D_DEVCAP_DXCONTEXT is set. - * The context is implied from the command buffer header. - */ -typedef -#include "vmware_pack_begin.h" -struct SVGA3dCmdDXPredTransferFromBuffer { - SVGA3dSurfaceId srcSid; - uint32 srcOffset; - uint32 srcPitch; - uint32 srcSlicePitch; - SVGA3dSurfaceId destSid; - uint32 destSubResource; - SVGA3dBox destBox; -} -#include "vmware_pack_end.h" -SVGA3dCmdDXPredTransferFromBuffer; -/* SVGA_3D_CMD_DX_PRED_TRANSFER_FROM_BUFFER */ - - -typedef -#include "vmware_pack_begin.h" -struct SVGA3dCmdDXSurfaceCopyAndReadback { - SVGA3dSurfaceId srcSid; - SVGA3dSurfaceId destSid; - SVGA3dCopyBox box; -} -#include "vmware_pack_end.h" -SVGA3dCmdDXSurfaceCopyAndReadback; -/* SVGA_3D_CMD_DX_SURFACE_COPY_AND_READBACK */ - -/* - * SVGA_DX_HINT_NONE: Does nothing. - * - * SVGA_DX_HINT_PREFETCH_OBJECT: - * SVGA_DX_HINT_PREEVICT_OBJECT: - * Consumes a SVGAObjectRef, and hints that the host should consider - * fetching/evicting the specified object. - * - * An id of SVGA3D_INVALID_ID can be used if the guest isn't sure - * what object was affected. (For instance, if the guest knows that - * it is about to evict a DXShader, but doesn't know precisely which one, - * the device can still use this to help limit it's search, or track - * how many page-outs have happened.) - * - * SVGA_DX_HINT_PREFETCH_COBJECT: - * SVGA_DX_HINT_PREEVICT_COBJECT: - * Same as the above, except they consume an SVGACObjectRef. - */ typedef uint32 SVGADXHintId; -#define SVGA_DX_HINT_NONE 0 -#define SVGA_DX_HINT_PREFETCH_OBJECT 1 -#define SVGA_DX_HINT_PREEVICT_OBJECT 2 -#define SVGA_DX_HINT_PREFETCH_COBJECT 3 -#define SVGA_DX_HINT_PREEVICT_COBJECT 4 -#define SVGA_DX_HINT_MAX 5 - -typedef -#include "vmware_pack_begin.h" -struct SVGAObjectRef { - SVGAOTableType type; - uint32 id; -} -#include "vmware_pack_end.h" -SVGAObjectRef; - -typedef -#include "vmware_pack_begin.h" -struct SVGACObjectRef { - SVGACOTableType type; - uint32 cid; - uint32 id; -} -#include "vmware_pack_end.h" -SVGACObjectRef; - -typedef -#include "vmware_pack_begin.h" -struct SVGA3dCmdDXHint { - SVGADXHintId hintId; - - /* - * Followed by variable sized data depending on the hintId. - */ -} -#include "vmware_pack_end.h" -SVGA3dCmdDXHint; -/* SVGA_3D_CMD_DX_HINT */ - -typedef -#include "vmware_pack_begin.h" -struct SVGA3dCmdDXBufferUpdate { - SVGA3dSurfaceId sid; - uint32 x; - uint32 width; -} -#include "vmware_pack_end.h" -SVGA3dCmdDXBufferUpdate; -/* SVGA_3D_CMD_DX_BUFFER_UPDATE */ - -typedef -#include "vmware_pack_begin.h" -struct SVGA3dCmdDXSetConstantBufferOffset { - uint32 slot; - uint32 offsetInBytes; -} -#include "vmware_pack_end.h" -SVGA3dCmdDXSetConstantBufferOffset; +#define SVGA_DX_HINT_NONE 0 +#define SVGA_DX_HINT_PREFETCH_OBJECT 1 +#define SVGA_DX_HINT_PREEVICT_OBJECT 2 +#define SVGA_DX_HINT_PREFETCH_COBJECT 3 +#define SVGA_DX_HINT_PREEVICT_COBJECT 4 +#define SVGA_DX_HINT_MAX 5 + +#pragma pack(push, 1) +typedef struct SVGAObjectRef { + SVGAOTableType type; + uint32 id; +} SVGAObjectRef; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct SVGACObjectRef { + SVGACOTableType type; + uint32 cid; + uint32 id; +} SVGACObjectRef; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct SVGA3dCmdDXHint { + SVGADXHintId hintId; + +} SVGA3dCmdDXHint; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct SVGA3dCmdDXBufferUpdate { + SVGA3dSurfaceId sid; + uint32 x; + uint32 width; +} SVGA3dCmdDXBufferUpdate; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct SVGA3dCmdDXSetConstantBufferOffset { + uint32 slot; + uint32 offsetInBytes; +} SVGA3dCmdDXSetConstantBufferOffset; +#pragma pack(pop) typedef SVGA3dCmdDXSetConstantBufferOffset SVGA3dCmdDXSetVSConstantBufferOffset; -/* SVGA_3D_CMD_DX_SET_VS_CONSTANT_BUFFER_OFFSET */ typedef SVGA3dCmdDXSetConstantBufferOffset SVGA3dCmdDXSetPSConstantBufferOffset; -/* SVGA_3D_CMD_DX_SET_PS_CONSTANT_BUFFER_OFFSET */ typedef SVGA3dCmdDXSetConstantBufferOffset SVGA3dCmdDXSetGSConstantBufferOffset; -/* SVGA_3D_CMD_DX_SET_GS_CONSTANT_BUFFER_OFFSET */ typedef SVGA3dCmdDXSetConstantBufferOffset SVGA3dCmdDXSetHSConstantBufferOffset; -/* SVGA_3D_CMD_DX_SET_HS_CONSTANT_BUFFER_OFFSET */ typedef SVGA3dCmdDXSetConstantBufferOffset SVGA3dCmdDXSetDSConstantBufferOffset; -/* SVGA_3D_CMD_DX_SET_DS_CONSTANT_BUFFER_OFFSET */ typedef SVGA3dCmdDXSetConstantBufferOffset SVGA3dCmdDXSetCSConstantBufferOffset; -/* SVGA_3D_CMD_DX_SET_CS_CONSTANT_BUFFER_OFFSET */ - -#define SVGA3D_BUFFEREX_SRV_RAW (1 << 0) -#define SVGA3D_BUFFEREX_SRV_FLAGS_MAX (1 << 1) +#define SVGA3D_BUFFEREX_SRV_RAW (1 << 0) +#define SVGA3D_BUFFEREX_SRV_FLAGS_MAX (1 << 1) #define SVGA3D_BUFFEREX_SRV_FLAGS_MASK (SVGA3D_BUFFEREX_SRV_FLAGS_MAX - 1) typedef uint32 SVGA3dBufferExFlags; -typedef -#include "vmware_pack_begin.h" -struct { - union { - struct { - uint32 firstElement; - uint32 numElements; - uint32 pad0; - uint32 pad1; - } buffer; - struct { - uint32 mostDetailedMip; - uint32 firstArraySlice; - uint32 mipLevels; - uint32 arraySize; - } tex; /* 1d, 2d, 3d, cube */ - struct { - uint32 firstElement; - uint32 numElements; - SVGA3dBufferExFlags flags; - uint32 pad0; - } bufferex; - }; -} -#include "vmware_pack_end.h" -SVGA3dShaderResourceViewDesc; - -typedef -#include "vmware_pack_begin.h" -struct { - SVGA3dSurfaceId sid; - SVGA3dSurfaceFormat format; - SVGA3dResourceType resourceDimension; - SVGA3dShaderResourceViewDesc desc; - uint32 pad; -} -#include "vmware_pack_end.h" -SVGACOTableDXSRViewEntry; - -typedef -#include "vmware_pack_begin.h" -struct SVGA3dCmdDXDefineShaderResourceView { - SVGA3dShaderResourceViewId shaderResourceViewId; - - SVGA3dSurfaceId sid; - SVGA3dSurfaceFormat format; - SVGA3dResourceType resourceDimension; - - SVGA3dShaderResourceViewDesc desc; -} -#include "vmware_pack_end.h" -SVGA3dCmdDXDefineShaderResourceView; -/* SVGA_3D_CMD_DX_DEFINE_SHADERRESOURCE_VIEW */ - -typedef -#include "vmware_pack_begin.h" -struct SVGA3dCmdDXDestroyShaderResourceView { - SVGA3dShaderResourceViewId shaderResourceViewId; -} -#include "vmware_pack_end.h" -SVGA3dCmdDXDestroyShaderResourceView; -/* SVGA_3D_CMD_DX_DESTROY_SHADERRESOURCE_VIEW */ - -typedef -#include "vmware_pack_begin.h" -struct SVGA3dRenderTargetViewDesc { - union { - struct { - uint32 firstElement; - uint32 numElements; - uint32 padding0; - } buffer; - struct { - uint32 mipSlice; - uint32 firstArraySlice; - uint32 arraySize; - } tex; /* 1d, 2d, cube */ - struct { - uint32 mipSlice; - uint32 firstW; - uint32 wSize; - } tex3D; - }; -} -#include "vmware_pack_end.h" -SVGA3dRenderTargetViewDesc; - -typedef -#include "vmware_pack_begin.h" -struct { - SVGA3dSurfaceId sid; - SVGA3dSurfaceFormat format; - SVGA3dResourceType resourceDimension; - SVGA3dRenderTargetViewDesc desc; - uint32 pad[2]; -} -#include "vmware_pack_end.h" -SVGACOTableDXRTViewEntry; - -typedef -#include "vmware_pack_begin.h" -struct SVGA3dCmdDXDefineRenderTargetView { - SVGA3dRenderTargetViewId renderTargetViewId; - - SVGA3dSurfaceId sid; - SVGA3dSurfaceFormat format; - SVGA3dResourceType resourceDimension; - - SVGA3dRenderTargetViewDesc desc; -} -#include "vmware_pack_end.h" -SVGA3dCmdDXDefineRenderTargetView; -/* SVGA_3D_CMD_DX_DEFINE_RENDERTARGET_VIEW */ - -typedef -#include "vmware_pack_begin.h" -struct SVGA3dCmdDXDestroyRenderTargetView { - SVGA3dRenderTargetViewId renderTargetViewId; -} -#include "vmware_pack_end.h" -SVGA3dCmdDXDestroyRenderTargetView; -/* SVGA_3D_CMD_DX_DESTROY_RENDERTARGET_VIEW */ - -/* - */ -#define SVGA3D_DXDSVIEW_CREATE_READ_ONLY_DEPTH 0x01 +#pragma pack(push, 1) +typedef struct { + union { + struct { + uint32 firstElement; + uint32 numElements; + uint32 pad0; + uint32 pad1; + } buffer; + struct { + uint32 mostDetailedMip; + uint32 firstArraySlice; + uint32 mipLevels; + uint32 arraySize; + } tex; + struct { + uint32 firstElement; + uint32 numElements; + SVGA3dBufferExFlags flags; + uint32 pad0; + } bufferex; + }; +} SVGA3dShaderResourceViewDesc; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct { + SVGA3dSurfaceId sid; + SVGA3dSurfaceFormat format; + SVGA3dResourceType resourceDimension; + SVGA3dShaderResourceViewDesc desc; + uint32 pad; +} SVGACOTableDXSRViewEntry; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct SVGA3dCmdDXDefineShaderResourceView { + SVGA3dShaderResourceViewId shaderResourceViewId; + + SVGA3dSurfaceId sid; + SVGA3dSurfaceFormat format; + SVGA3dResourceType resourceDimension; + + SVGA3dShaderResourceViewDesc desc; +} SVGA3dCmdDXDefineShaderResourceView; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct SVGA3dCmdDXDestroyShaderResourceView { + SVGA3dShaderResourceViewId shaderResourceViewId; +} SVGA3dCmdDXDestroyShaderResourceView; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct SVGA3dRenderTargetViewDesc { + union { + struct { + uint32 firstElement; + uint32 numElements; + uint32 padding0; + } buffer; + struct { + uint32 mipSlice; + uint32 firstArraySlice; + uint32 arraySize; + } tex; + struct { + uint32 mipSlice; + uint32 firstW; + uint32 wSize; + } tex3D; + }; +} SVGA3dRenderTargetViewDesc; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct { + SVGA3dSurfaceId sid; + SVGA3dSurfaceFormat format; + SVGA3dResourceType resourceDimension; + SVGA3dRenderTargetViewDesc desc; + uint32 pad[2]; +} SVGACOTableDXRTViewEntry; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct SVGA3dCmdDXDefineRenderTargetView { + SVGA3dRenderTargetViewId renderTargetViewId; + + SVGA3dSurfaceId sid; + SVGA3dSurfaceFormat format; + SVGA3dResourceType resourceDimension; + + SVGA3dRenderTargetViewDesc desc; +} SVGA3dCmdDXDefineRenderTargetView; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct SVGA3dCmdDXDestroyRenderTargetView { + SVGA3dRenderTargetViewId renderTargetViewId; +} SVGA3dCmdDXDestroyRenderTargetView; +#pragma pack(pop) + +#define SVGA3D_DXDSVIEW_CREATE_READ_ONLY_DEPTH 0x01 #define SVGA3D_DXDSVIEW_CREATE_READ_ONLY_STENCIL 0x02 -#define SVGA3D_DXDSVIEW_CREATE_FLAG_MASK 0x03 +#define SVGA3D_DXDSVIEW_CREATE_FLAG_MASK 0x03 typedef uint8 SVGA3DCreateDSViewFlags; -typedef -#include "vmware_pack_begin.h" -struct { - SVGA3dSurfaceId sid; - SVGA3dSurfaceFormat format; - SVGA3dResourceType resourceDimension; - uint32 mipSlice; - uint32 firstArraySlice; - uint32 arraySize; - SVGA3DCreateDSViewFlags flags; - uint8 pad0; - uint16 pad1; - uint32 pad2; -} -#include "vmware_pack_end.h" -SVGACOTableDXDSViewEntry; - -typedef -#include "vmware_pack_begin.h" -struct SVGA3dCmdDXDefineDepthStencilView { - SVGA3dDepthStencilViewId depthStencilViewId; - - SVGA3dSurfaceId sid; - SVGA3dSurfaceFormat format; - SVGA3dResourceType resourceDimension; - uint32 mipSlice; - uint32 firstArraySlice; - uint32 arraySize; - SVGA3DCreateDSViewFlags flags; - uint8 pad0; - uint16 pad1; -} -#include "vmware_pack_end.h" -SVGA3dCmdDXDefineDepthStencilView; -/* SVGA_3D_CMD_DX_DEFINE_DEPTHSTENCIL_VIEW */ - -/* - * Version 2 needed in order to start validating and using the flags - * field. Unfortunately the device wasn't validating or using the - * flags field and the driver wasn't initializing it in shipped code, - * so a new version of the command is needed to allow that code to - * continue to work. - */ -typedef -#include "vmware_pack_begin.h" -struct SVGA3dCmdDXDefineDepthStencilView_v2 { - SVGA3dDepthStencilViewId depthStencilViewId; - - SVGA3dSurfaceId sid; - SVGA3dSurfaceFormat format; - SVGA3dResourceType resourceDimension; - uint32 mipSlice; - uint32 firstArraySlice; - uint32 arraySize; - SVGA3DCreateDSViewFlags flags; - uint8 pad0; - uint16 pad1; -} -#include "vmware_pack_end.h" -SVGA3dCmdDXDefineDepthStencilView_v2; -/* SVGA_3D_CMD_DX_DEFINE_DEPTHSTENCIL_VIEW_V2 */ - -typedef -#include "vmware_pack_begin.h" -struct SVGA3dCmdDXDestroyDepthStencilView { - SVGA3dDepthStencilViewId depthStencilViewId; -} -#include "vmware_pack_end.h" -SVGA3dCmdDXDestroyDepthStencilView; -/* SVGA_3D_CMD_DX_DESTROY_DEPTHSTENCIL_VIEW */ - - -#define SVGA3D_UABUFFER_RAW (1 << 0) -#define SVGA3D_UABUFFER_APPEND (1 << 1) +#pragma pack(push, 1) +typedef struct { + SVGA3dSurfaceId sid; + SVGA3dSurfaceFormat format; + SVGA3dResourceType resourceDimension; + uint32 mipSlice; + uint32 firstArraySlice; + uint32 arraySize; + SVGA3DCreateDSViewFlags flags; + uint8 pad0; + uint16 pad1; + uint32 pad2; +} SVGACOTableDXDSViewEntry; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct SVGA3dCmdDXDefineDepthStencilView { + SVGA3dDepthStencilViewId depthStencilViewId; + + SVGA3dSurfaceId sid; + SVGA3dSurfaceFormat format; + SVGA3dResourceType resourceDimension; + uint32 mipSlice; + uint32 firstArraySlice; + uint32 arraySize; + SVGA3DCreateDSViewFlags flags; + uint8 pad0; + uint16 pad1; +} SVGA3dCmdDXDefineDepthStencilView; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct SVGA3dCmdDXDefineDepthStencilView_v2 { + SVGA3dDepthStencilViewId depthStencilViewId; + + SVGA3dSurfaceId sid; + SVGA3dSurfaceFormat format; + SVGA3dResourceType resourceDimension; + uint32 mipSlice; + uint32 firstArraySlice; + uint32 arraySize; + SVGA3DCreateDSViewFlags flags; + uint8 pad0; + uint16 pad1; +} SVGA3dCmdDXDefineDepthStencilView_v2; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct SVGA3dCmdDXDestroyDepthStencilView { + SVGA3dDepthStencilViewId depthStencilViewId; +} SVGA3dCmdDXDestroyDepthStencilView; +#pragma pack(pop) + +#define SVGA3D_UABUFFER_RAW (1 << 0) +#define SVGA3D_UABUFFER_APPEND (1 << 1) #define SVGA3D_UABUFFER_COUNTER (1 << 2) typedef uint32 SVGA3dUABufferFlags; -typedef -#include "vmware_pack_begin.h" -struct { - union { - struct { - uint32 firstElement; - uint32 numElements; - SVGA3dUABufferFlags flags; - uint32 padding0; - uint32 padding1; - } buffer; - struct { - uint32 mipSlice; - uint32 firstArraySlice; - uint32 arraySize; - uint32 padding0; - uint32 padding1; - } tex; /* 1d, 2d */ - struct { - uint32 mipSlice; - uint32 firstW; - uint32 wSize; - uint32 padding0; - uint32 padding1; - } tex3D; - }; -} -#include "vmware_pack_end.h" -SVGA3dUAViewDesc; - -typedef -#include "vmware_pack_begin.h" -struct { - SVGA3dSurfaceId sid; - SVGA3dSurfaceFormat format; - SVGA3dResourceType resourceDimension; - SVGA3dUAViewDesc desc; - uint32 structureCount; - uint32 pad[7]; -} -#include "vmware_pack_end.h" -SVGACOTableDXUAViewEntry; - -typedef -#include "vmware_pack_begin.h" -struct SVGA3dCmdDXDefineUAView { - SVGA3dUAViewId uaViewId; - - SVGA3dSurfaceId sid; - SVGA3dSurfaceFormat format; - SVGA3dResourceType resourceDimension; - - SVGA3dUAViewDesc desc; -} -#include "vmware_pack_end.h" -SVGA3dCmdDXDefineUAView; -/* SVGA_3D_CMD_DX_DEFINE_UA_VIEW */ - -typedef -#include "vmware_pack_begin.h" -struct SVGA3dCmdDXDestroyUAView { - SVGA3dUAViewId uaViewId; -} -#include "vmware_pack_end.h" -SVGA3dCmdDXDestroyUAView; -/* SVGA_3D_CMD_DX_DESTROY_UA_VIEW */ - -typedef -#include "vmware_pack_begin.h" -struct SVGA3dCmdDXClearUAViewUint { - SVGA3dUAViewId uaViewId; - SVGA3dRGBAUint32 value; -} -#include "vmware_pack_end.h" -SVGA3dCmdDXClearUAViewUint; -/* SVGA_3D_CMD_DX_CLEAR_UA_VIEW_UINT */ - -typedef -#include "vmware_pack_begin.h" -struct SVGA3dCmdDXClearUAViewFloat { - SVGA3dUAViewId uaViewId; - SVGA3dRGBAFloat value; -} -#include "vmware_pack_end.h" -SVGA3dCmdDXClearUAViewFloat; -/* SVGA_3D_CMD_DX_CLEAR_UA_VIEW_FLOAT */ - -typedef -#include "vmware_pack_begin.h" -struct SVGA3dCmdDXCopyStructureCount { - SVGA3dUAViewId srcUAViewId; - SVGA3dSurfaceId destSid; - uint32 destByteOffset; -} -#include "vmware_pack_end.h" -SVGA3dCmdDXCopyStructureCount; -/* SVGA_3D_CMD_DX_COPY_STRUCTURE_COUNT */ - -typedef -#include "vmware_pack_begin.h" -struct SVGA3dCmdDXSetStructureCount { - SVGA3dUAViewId uaViewId; - uint32 structureCount; -} -#include "vmware_pack_end.h" -SVGA3dCmdDXSetStructureCount; -/* SVGA_3D_CMD_DX_SET_STRUCTURE_COUNT */ - -typedef -#include "vmware_pack_begin.h" -struct SVGA3dCmdDXSetUAViews { - uint32 uavSpliceIndex; - /* Followed by a variable number of SVGA3dUAViewId's. */ -} -#include "vmware_pack_end.h" -SVGA3dCmdDXSetUAViews; /* SVGA_3D_CMD_DX_SET_UA_VIEWS */ - -typedef -#include "vmware_pack_begin.h" -struct SVGA3dCmdDXSetCSUAViews { - uint32 startIndex; - /* Followed by a variable number of SVGA3dUAViewId's. */ -} -#include "vmware_pack_end.h" -SVGA3dCmdDXSetCSUAViews; /* SVGA_3D_CMD_DX_SET_CS_UA_VIEWS */ - -typedef -#include "vmware_pack_begin.h" -struct SVGA3dInputElementDesc { - uint32 inputSlot; - uint32 alignedByteOffset; - SVGA3dSurfaceFormat format; - SVGA3dInputClassification inputSlotClass; - uint32 instanceDataStepRate; - uint32 inputRegister; -} -#include "vmware_pack_end.h" -SVGA3dInputElementDesc; - -typedef -#include "vmware_pack_begin.h" -struct { - uint32 elid; - uint32 numDescs; - SVGA3dInputElementDesc descs[32]; - uint32 pad[62]; -} -#include "vmware_pack_end.h" -SVGACOTableDXElementLayoutEntry; - -typedef -#include "vmware_pack_begin.h" -struct SVGA3dCmdDXDefineElementLayout { - SVGA3dElementLayoutId elementLayoutId; - /* Followed by a variable number of SVGA3dInputElementDesc's. */ -} -#include "vmware_pack_end.h" -SVGA3dCmdDXDefineElementLayout; -/* SVGA_3D_CMD_DX_DEFINE_ELEMENTLAYOUT */ - -typedef -#include "vmware_pack_begin.h" -struct SVGA3dCmdDXDestroyElementLayout { - SVGA3dElementLayoutId elementLayoutId; -} -#include "vmware_pack_end.h" -SVGA3dCmdDXDestroyElementLayout; -/* SVGA_3D_CMD_DX_DESTROY_ELEMENTLAYOUT */ - +#pragma pack(push, 1) +typedef struct { + union { + struct { + uint32 firstElement; + uint32 numElements; + SVGA3dUABufferFlags flags; + uint32 padding0; + uint32 padding1; + } buffer; + struct { + uint32 mipSlice; + uint32 firstArraySlice; + uint32 arraySize; + uint32 padding0; + uint32 padding1; + } tex; + struct { + uint32 mipSlice; + uint32 firstW; + uint32 wSize; + uint32 padding0; + uint32 padding1; + } tex3D; + }; +} SVGA3dUAViewDesc; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct { + SVGA3dSurfaceId sid; + SVGA3dSurfaceFormat format; + SVGA3dResourceType resourceDimension; + SVGA3dUAViewDesc desc; + uint32 structureCount; + uint32 pad[7]; +} SVGACOTableDXUAViewEntry; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct SVGA3dCmdDXDefineUAView { + SVGA3dUAViewId uaViewId; + + SVGA3dSurfaceId sid; + SVGA3dSurfaceFormat format; + SVGA3dResourceType resourceDimension; + + SVGA3dUAViewDesc desc; +} SVGA3dCmdDXDefineUAView; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct SVGA3dCmdDXDestroyUAView { + SVGA3dUAViewId uaViewId; +} SVGA3dCmdDXDestroyUAView; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct SVGA3dCmdDXClearUAViewUint { + SVGA3dUAViewId uaViewId; + SVGA3dRGBAUint32 value; +} SVGA3dCmdDXClearUAViewUint; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct SVGA3dCmdDXClearUAViewFloat { + SVGA3dUAViewId uaViewId; + SVGA3dRGBAFloat value; +} SVGA3dCmdDXClearUAViewFloat; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct SVGA3dCmdDXCopyStructureCount { + SVGA3dUAViewId srcUAViewId; + SVGA3dSurfaceId destSid; + uint32 destByteOffset; +} SVGA3dCmdDXCopyStructureCount; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct SVGA3dCmdDXSetStructureCount { + SVGA3dUAViewId uaViewId; + uint32 structureCount; +} SVGA3dCmdDXSetStructureCount; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct SVGA3dCmdDXSetUAViews { + uint32 uavSpliceIndex; + +} SVGA3dCmdDXSetUAViews; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct SVGA3dCmdDXSetCSUAViews { + uint32 startIndex; + +} SVGA3dCmdDXSetCSUAViews; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct SVGA3dInputElementDesc { + uint32 inputSlot; + uint32 alignedByteOffset; + SVGA3dSurfaceFormat format; + SVGA3dInputClassification inputSlotClass; + uint32 instanceDataStepRate; + uint32 inputRegister; +} SVGA3dInputElementDesc; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct { + uint32 elid; + uint32 numDescs; + SVGA3dInputElementDesc descs[32]; + uint32 pad[62]; +} SVGACOTableDXElementLayoutEntry; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct SVGA3dCmdDXDefineElementLayout { + SVGA3dElementLayoutId elementLayoutId; + +} SVGA3dCmdDXDefineElementLayout; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct SVGA3dCmdDXDestroyElementLayout { + SVGA3dElementLayoutId elementLayoutId; +} SVGA3dCmdDXDestroyElementLayout; +#pragma pack(pop) #define SVGA3D_DX_MAX_RENDER_TARGETS 8 -typedef -#include "vmware_pack_begin.h" -struct SVGA3dDXBlendStatePerRT { - uint8 blendEnable; - uint8 srcBlend; - uint8 destBlend; - uint8 blendOp; - uint8 srcBlendAlpha; - uint8 destBlendAlpha; - uint8 blendOpAlpha; - SVGA3dColorWriteEnable renderTargetWriteMask; - uint8 logicOpEnable; - uint8 logicOp; - uint16 pad0; -} -#include "vmware_pack_end.h" -SVGA3dDXBlendStatePerRT; - -typedef -#include "vmware_pack_begin.h" -struct { - uint8 alphaToCoverageEnable; - uint8 independentBlendEnable; - uint16 pad0; - SVGA3dDXBlendStatePerRT perRT[SVGA3D_MAX_RENDER_TARGETS]; - uint32 pad1[7]; -} -#include "vmware_pack_end.h" -SVGACOTableDXBlendStateEntry; - -/* - */ -typedef -#include "vmware_pack_begin.h" -struct SVGA3dCmdDXDefineBlendState { - SVGA3dBlendStateId blendId; - uint8 alphaToCoverageEnable; - uint8 independentBlendEnable; - uint16 pad0; - SVGA3dDXBlendStatePerRT perRT[SVGA3D_MAX_RENDER_TARGETS]; -} -#include "vmware_pack_end.h" -SVGA3dCmdDXDefineBlendState; /* SVGA_3D_CMD_DX_DEFINE_BLEND_STATE */ - -typedef -#include "vmware_pack_begin.h" -struct SVGA3dCmdDXDestroyBlendState { - SVGA3dBlendStateId blendId; -} -#include "vmware_pack_end.h" -SVGA3dCmdDXDestroyBlendState; /* SVGA_3D_CMD_DX_DESTROY_BLEND_STATE */ - -typedef -#include "vmware_pack_begin.h" -struct { - uint8 depthEnable; - SVGA3dDepthWriteMask depthWriteMask; - SVGA3dComparisonFunc depthFunc; - uint8 stencilEnable; - uint8 frontEnable; - uint8 backEnable; - uint8 stencilReadMask; - uint8 stencilWriteMask; - - uint8 frontStencilFailOp; - uint8 frontStencilDepthFailOp; - uint8 frontStencilPassOp; - SVGA3dComparisonFunc frontStencilFunc; - - uint8 backStencilFailOp; - uint8 backStencilDepthFailOp; - uint8 backStencilPassOp; - SVGA3dComparisonFunc backStencilFunc; -} -#include "vmware_pack_end.h" -SVGACOTableDXDepthStencilEntry; - -/* - */ -typedef -#include "vmware_pack_begin.h" -struct SVGA3dCmdDXDefineDepthStencilState { - SVGA3dDepthStencilStateId depthStencilId; - - uint8 depthEnable; - SVGA3dDepthWriteMask depthWriteMask; - SVGA3dComparisonFunc depthFunc; - uint8 stencilEnable; - uint8 frontEnable; - uint8 backEnable; - uint8 stencilReadMask; - uint8 stencilWriteMask; - - uint8 frontStencilFailOp; - uint8 frontStencilDepthFailOp; - uint8 frontStencilPassOp; - SVGA3dComparisonFunc frontStencilFunc; - - uint8 backStencilFailOp; - uint8 backStencilDepthFailOp; - uint8 backStencilPassOp; - SVGA3dComparisonFunc backStencilFunc; -} -#include "vmware_pack_end.h" -SVGA3dCmdDXDefineDepthStencilState; -/* SVGA_3D_CMD_DX_DEFINE_DEPTHSTENCIL_STATE */ - -typedef -#include "vmware_pack_begin.h" -struct SVGA3dCmdDXDestroyDepthStencilState { - SVGA3dDepthStencilStateId depthStencilId; -} -#include "vmware_pack_end.h" -SVGA3dCmdDXDestroyDepthStencilState; -/* SVGA_3D_CMD_DX_DESTROY_DEPTHSTENCIL_STATE */ - -typedef -#include "vmware_pack_begin.h" -struct { - uint8 fillMode; - SVGA3dCullMode cullMode; - uint8 frontCounterClockwise; - uint8 provokingVertexLast; - int32 depthBias; - float depthBiasClamp; - float slopeScaledDepthBias; - uint8 depthClipEnable; - uint8 scissorEnable; - SVGA3dMultisampleRastEnable multisampleEnable; - uint8 antialiasedLineEnable; - float lineWidth; - uint8 lineStippleEnable; - uint8 lineStippleFactor; - uint16 lineStipplePattern; - uint8 forcedSampleCount; - uint8 mustBeZero[3]; -} -#include "vmware_pack_end.h" -SVGACOTableDXRasterizerStateEntry; - -/* - */ -typedef -#include "vmware_pack_begin.h" -struct SVGA3dCmdDXDefineRasterizerState { - SVGA3dRasterizerStateId rasterizerId; - - uint8 fillMode; - SVGA3dCullMode cullMode; - uint8 frontCounterClockwise; - uint8 provokingVertexLast; - int32 depthBias; - float depthBiasClamp; - float slopeScaledDepthBias; - uint8 depthClipEnable; - uint8 scissorEnable; - SVGA3dMultisampleRastEnable multisampleEnable; - uint8 antialiasedLineEnable; - float lineWidth; - uint8 lineStippleEnable; - uint8 lineStippleFactor; - uint16 lineStipplePattern; -} -#include "vmware_pack_end.h" -SVGA3dCmdDXDefineRasterizerState; -/* SVGA_3D_CMD_DX_DEFINE_RASTERIZER_STATE */ - -typedef -#include "vmware_pack_begin.h" -struct SVGA3dCmdDXDestroyRasterizerState { - SVGA3dRasterizerStateId rasterizerId; -} -#include "vmware_pack_end.h" -SVGA3dCmdDXDestroyRasterizerState; -/* SVGA_3D_CMD_DX_DESTROY_RASTERIZER_STATE */ - -typedef -#include "vmware_pack_begin.h" -struct { - SVGA3dFilter filter; - uint8 addressU; - uint8 addressV; - uint8 addressW; - uint8 pad0; - float mipLODBias; - uint8 maxAnisotropy; - SVGA3dComparisonFunc comparisonFunc; - uint16 pad1; - SVGA3dRGBAFloat borderColor; - float minLOD; - float maxLOD; - uint32 pad2[6]; -} -#include "vmware_pack_end.h" -SVGACOTableDXSamplerEntry; - -/* - */ -typedef -#include "vmware_pack_begin.h" -struct SVGA3dCmdDXDefineSamplerState { - SVGA3dSamplerId samplerId; - SVGA3dFilter filter; - uint8 addressU; - uint8 addressV; - uint8 addressW; - uint8 pad0; - float mipLODBias; - uint8 maxAnisotropy; - SVGA3dComparisonFunc comparisonFunc; - uint16 pad1; - SVGA3dRGBAFloat borderColor; - float minLOD; - float maxLOD; -} -#include "vmware_pack_end.h" -SVGA3dCmdDXDefineSamplerState; /* SVGA_3D_CMD_DX_DEFINE_SAMPLER_STATE */ - -typedef -#include "vmware_pack_begin.h" -struct SVGA3dCmdDXDestroySamplerState { - SVGA3dSamplerId samplerId; -} -#include "vmware_pack_end.h" -SVGA3dCmdDXDestroySamplerState; /* SVGA_3D_CMD_DX_DESTROY_SAMPLER_STATE */ - - -#define SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED 0 -#define SVGADX_SIGNATURE_SEMANTIC_NAME_POSITION 1 -#define SVGADX_SIGNATURE_SEMANTIC_NAME_CLIP_DISTANCE 2 -#define SVGADX_SIGNATURE_SEMANTIC_NAME_CULL_DISTANCE 3 -#define SVGADX_SIGNATURE_SEMANTIC_NAME_RENDER_TARGET_ARRAY_INDEX 4 -#define SVGADX_SIGNATURE_SEMANTIC_NAME_VIEWPORT_ARRAY_INDEX 5 -#define SVGADX_SIGNATURE_SEMANTIC_NAME_VERTEX_ID 6 -#define SVGADX_SIGNATURE_SEMANTIC_NAME_PRIMITIVE_ID 7 -#define SVGADX_SIGNATURE_SEMANTIC_NAME_INSTANCE_ID 8 -#define SVGADX_SIGNATURE_SEMANTIC_NAME_IS_FRONT_FACE 9 -#define SVGADX_SIGNATURE_SEMANTIC_NAME_SAMPLE_INDEX 10 -#define SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_U_EQ_0_EDGE_TESSFACTOR 11 -#define SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_V_EQ_0_EDGE_TESSFACTOR 12 -#define SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_U_EQ_1_EDGE_TESSFACTOR 13 -#define SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_V_EQ_1_EDGE_TESSFACTOR 14 -#define SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_U_INSIDE_TESSFACTOR 15 -#define SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_V_INSIDE_TESSFACTOR 16 -#define SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_TRI_U_EQ_0_EDGE_TESSFACTOR 17 -#define SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_TRI_V_EQ_0_EDGE_TESSFACTOR 18 -#define SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_TRI_W_EQ_0_EDGE_TESSFACTOR 19 -#define SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_TRI_INSIDE_TESSFACTOR 20 -#define SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_LINE_DETAIL_TESSFACTOR 21 -#define SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_LINE_DENSITY_TESSFACTOR 22 -#define SVGADX_SIGNATURE_SEMANTIC_NAME_MAX 23 +#pragma pack(push, 1) +typedef struct SVGA3dDXBlendStatePerRT { + uint8 blendEnable; + uint8 srcBlend; + uint8 destBlend; + uint8 blendOp; + uint8 srcBlendAlpha; + uint8 destBlendAlpha; + uint8 blendOpAlpha; + SVGA3dColorWriteEnable renderTargetWriteMask; + uint8 logicOpEnable; + uint8 logicOp; + uint16 pad0; +} SVGA3dDXBlendStatePerRT; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct { + uint8 alphaToCoverageEnable; + uint8 independentBlendEnable; + uint16 pad0; + SVGA3dDXBlendStatePerRT perRT[SVGA3D_DX_MAX_RENDER_TARGETS]; + uint32 pad1[7]; +} SVGACOTableDXBlendStateEntry; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct SVGA3dCmdDXDefineBlendState { + SVGA3dBlendStateId blendId; + uint8 alphaToCoverageEnable; + uint8 independentBlendEnable; + uint16 pad0; + SVGA3dDXBlendStatePerRT perRT[SVGA3D_DX_MAX_RENDER_TARGETS]; +} SVGA3dCmdDXDefineBlendState; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct SVGA3dCmdDXDestroyBlendState { + SVGA3dBlendStateId blendId; +} SVGA3dCmdDXDestroyBlendState; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct { + uint8 depthEnable; + SVGA3dDepthWriteMask depthWriteMask; + SVGA3dComparisonFunc depthFunc; + uint8 stencilEnable; + uint8 frontEnable; + uint8 backEnable; + uint8 stencilReadMask; + uint8 stencilWriteMask; + + uint8 frontStencilFailOp; + uint8 frontStencilDepthFailOp; + uint8 frontStencilPassOp; + SVGA3dComparisonFunc frontStencilFunc; + + uint8 backStencilFailOp; + uint8 backStencilDepthFailOp; + uint8 backStencilPassOp; + SVGA3dComparisonFunc backStencilFunc; +} SVGACOTableDXDepthStencilEntry; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct SVGA3dCmdDXDefineDepthStencilState { + SVGA3dDepthStencilStateId depthStencilId; + + uint8 depthEnable; + SVGA3dDepthWriteMask depthWriteMask; + SVGA3dComparisonFunc depthFunc; + uint8 stencilEnable; + uint8 frontEnable; + uint8 backEnable; + uint8 stencilReadMask; + uint8 stencilWriteMask; + + uint8 frontStencilFailOp; + uint8 frontStencilDepthFailOp; + uint8 frontStencilPassOp; + SVGA3dComparisonFunc frontStencilFunc; + + uint8 backStencilFailOp; + uint8 backStencilDepthFailOp; + uint8 backStencilPassOp; + SVGA3dComparisonFunc backStencilFunc; +} SVGA3dCmdDXDefineDepthStencilState; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct SVGA3dCmdDXDestroyDepthStencilState { + SVGA3dDepthStencilStateId depthStencilId; +} SVGA3dCmdDXDestroyDepthStencilState; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct { + uint8 fillMode; + SVGA3dCullMode cullMode; + uint8 frontCounterClockwise; + uint8 provokingVertexLast; + int32 depthBias; + float depthBiasClamp; + float slopeScaledDepthBias; + uint8 depthClipEnable; + uint8 scissorEnable; + SVGA3dMultisampleRastEnable multisampleEnable; + uint8 antialiasedLineEnable; + float lineWidth; + uint8 lineStippleEnable; + uint8 lineStippleFactor; + uint16 lineStipplePattern; + uint8 forcedSampleCount; + uint8 mustBeZero[3]; +} SVGACOTableDXRasterizerStateEntry; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct SVGA3dCmdDXDefineRasterizerState { + SVGA3dRasterizerStateId rasterizerId; + + uint8 fillMode; + SVGA3dCullMode cullMode; + uint8 frontCounterClockwise; + uint8 provokingVertexLast; + int32 depthBias; + float depthBiasClamp; + float slopeScaledDepthBias; + uint8 depthClipEnable; + uint8 scissorEnable; + SVGA3dMultisampleRastEnable multisampleEnable; + uint8 antialiasedLineEnable; + float lineWidth; + uint8 lineStippleEnable; + uint8 lineStippleFactor; + uint16 lineStipplePattern; +} SVGA3dCmdDXDefineRasterizerState; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct SVGA3dCmdDXDefineRasterizerState_v2 { + SVGA3dRasterizerStateId rasterizerId; + + uint8 fillMode; + SVGA3dCullMode cullMode; + uint8 frontCounterClockwise; + uint8 provokingVertexLast; + int32 depthBias; + float depthBiasClamp; + float slopeScaledDepthBias; + uint8 depthClipEnable; + uint8 scissorEnable; + SVGA3dMultisampleRastEnable multisampleEnable; + uint8 antialiasedLineEnable; + float lineWidth; + uint8 lineStippleEnable; + uint8 lineStippleFactor; + uint16 lineStipplePattern; + uint32 forcedSampleCount; +} SVGA3dCmdDXDefineRasterizerState_v2; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct SVGA3dCmdDXDestroyRasterizerState { + SVGA3dRasterizerStateId rasterizerId; +} SVGA3dCmdDXDestroyRasterizerState; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct { + SVGA3dFilter filter; + uint8 addressU; + uint8 addressV; + uint8 addressW; + uint8 pad0; + float mipLODBias; + uint8 maxAnisotropy; + SVGA3dComparisonFunc comparisonFunc; + uint16 pad1; + SVGA3dRGBAFloat borderColor; + float minLOD; + float maxLOD; + uint32 pad2[6]; +} SVGACOTableDXSamplerEntry; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct SVGA3dCmdDXDefineSamplerState { + SVGA3dSamplerId samplerId; + SVGA3dFilter filter; + uint8 addressU; + uint8 addressV; + uint8 addressW; + uint8 pad0; + float mipLODBias; + uint8 maxAnisotropy; + SVGA3dComparisonFunc comparisonFunc; + uint16 pad1; + SVGA3dRGBAFloat borderColor; + float minLOD; + float maxLOD; +} SVGA3dCmdDXDefineSamplerState; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct SVGA3dCmdDXDestroySamplerState { + SVGA3dSamplerId samplerId; +} SVGA3dCmdDXDestroySamplerState; +#pragma pack(pop) + +#define SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED 0 +#define SVGADX_SIGNATURE_SEMANTIC_NAME_POSITION 1 +#define SVGADX_SIGNATURE_SEMANTIC_NAME_CLIP_DISTANCE 2 +#define SVGADX_SIGNATURE_SEMANTIC_NAME_CULL_DISTANCE 3 +#define SVGADX_SIGNATURE_SEMANTIC_NAME_RENDER_TARGET_ARRAY_INDEX 4 +#define SVGADX_SIGNATURE_SEMANTIC_NAME_VIEWPORT_ARRAY_INDEX 5 +#define SVGADX_SIGNATURE_SEMANTIC_NAME_VERTEX_ID 6 +#define SVGADX_SIGNATURE_SEMANTIC_NAME_PRIMITIVE_ID 7 +#define SVGADX_SIGNATURE_SEMANTIC_NAME_INSTANCE_ID 8 +#define SVGADX_SIGNATURE_SEMANTIC_NAME_IS_FRONT_FACE 9 +#define SVGADX_SIGNATURE_SEMANTIC_NAME_SAMPLE_INDEX 10 +#define SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_U_EQ_0_EDGE_TESSFACTOR 11 +#define SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_V_EQ_0_EDGE_TESSFACTOR 12 +#define SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_U_EQ_1_EDGE_TESSFACTOR 13 +#define SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_V_EQ_1_EDGE_TESSFACTOR 14 +#define SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_U_INSIDE_TESSFACTOR 15 +#define SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_V_INSIDE_TESSFACTOR 16 +#define SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_TRI_U_EQ_0_EDGE_TESSFACTOR 17 +#define SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_TRI_V_EQ_0_EDGE_TESSFACTOR 18 +#define SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_TRI_W_EQ_0_EDGE_TESSFACTOR 19 +#define SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_TRI_INSIDE_TESSFACTOR 20 +#define SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_LINE_DETAIL_TESSFACTOR 21 +#define SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_LINE_DENSITY_TESSFACTOR 22 +#define SVGADX_SIGNATURE_SEMANTIC_NAME_MAX 23 typedef uint32 SVGA3dDXSignatureSemanticName; #define SVGADX_SIGNATURE_REGISTER_COMPONENT_UNKNOWN 0 @@ -1670,398 +1394,331 @@ typedef uint32 SVGA3dDXSignatureRegisterComponentType; #define SVGADX_SIGNATURE_MIN_PRECISION_DEFAULT 0 typedef uint32 SVGA3dDXSignatureMinPrecision; -typedef -#include "vmware_pack_begin.h" -struct SVGA3dDXSignatureEntry { - uint32 registerIndex; - SVGA3dDXSignatureSemanticName semanticName; - uint32 mask; /* Lower 4 bits represent X, Y, Z, W channels */ - SVGA3dDXSignatureRegisterComponentType componentType; - SVGA3dDXSignatureMinPrecision minPrecision; -} -#include "vmware_pack_end.h" -SVGA3dDXShaderSignatureEntry; +#pragma pack(push, 1) +typedef struct SVGA3dDXSignatureEntry { + uint32 registerIndex; + SVGA3dDXSignatureSemanticName semanticName; + uint32 mask; + SVGA3dDXSignatureRegisterComponentType componentType; + SVGA3dDXSignatureMinPrecision minPrecision; +} SVGA3dDXShaderSignatureEntry; +#pragma pack(pop) #define SVGADX_SIGNATURE_HEADER_VERSION_0 0x08a92d12 -/* - * The SVGA3dDXSignatureHeader structure is added after the shader - * body in the mob that is bound to the shader. It is followed by the - * specified number of SVGA3dDXSignatureEntry structures for each of - * the three types of signatures in the order (input, output, patch - * constants). - */ -typedef -#include "vmware_pack_begin.h" -struct SVGA3dDXSignatureHeader { - uint32 headerVersion; - uint32 numInputSignatures; - uint32 numOutputSignatures; - uint32 numPatchConstantSignatures; -} -#include "vmware_pack_end.h" -SVGA3dDXShaderSignatureHeader; - -typedef -#include "vmware_pack_begin.h" -struct SVGA3dCmdDXDefineShader { - SVGA3dShaderId shaderId; - SVGA3dShaderType type; - uint32 sizeInBytes; /* Number of bytes of shader text. */ -} -#include "vmware_pack_end.h" -SVGA3dCmdDXDefineShader; /* SVGA_3D_CMD_DX_DEFINE_SHADER */ - -typedef -#include "vmware_pack_begin.h" -struct SVGACOTableDXShaderEntry { - SVGA3dShaderType type; - uint32 sizeInBytes; - uint32 offsetInBytes; - SVGAMobId mobid; - uint32 pad[4]; -} -#include "vmware_pack_end.h" -SVGACOTableDXShaderEntry; - -typedef -#include "vmware_pack_begin.h" -struct SVGA3dCmdDXDestroyShader { - SVGA3dShaderId shaderId; -} -#include "vmware_pack_end.h" -SVGA3dCmdDXDestroyShader; /* SVGA_3D_CMD_DX_DESTROY_SHADER */ - -typedef -#include "vmware_pack_begin.h" -struct SVGA3dCmdDXBindShader { - uint32 cid; - uint32 shid; - SVGAMobId mobid; - uint32 offsetInBytes; -} -#include "vmware_pack_end.h" -SVGA3dCmdDXBindShader; /* SVGA_3D_CMD_DX_BIND_SHADER */ - -typedef -#include "vmware_pack_begin.h" -struct SVGA3dCmdDXBindAllShader { - uint32 cid; - SVGAMobId mobid; -} -#include "vmware_pack_end.h" -SVGA3dCmdDXBindAllShader; /* SVGA_3D_CMD_DX_BIND_ALL_SHADER */ - -typedef -#include "vmware_pack_begin.h" -struct SVGA3dCmdDXCondBindAllShader { - uint32 cid; - SVGAMobId testMobid; - SVGAMobId mobid; -} -#include "vmware_pack_end.h" -SVGA3dCmdDXCondBindAllShader; /* SVGA_3D_CMD_DX_COND_BIND_ALL_SHADER */ +#pragma pack(push, 1) +typedef struct SVGA3dDXSignatureHeader { + uint32 headerVersion; + uint32 numInputSignatures; + uint32 numOutputSignatures; + uint32 numPatchConstantSignatures; +} SVGA3dDXShaderSignatureHeader; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct SVGA3dCmdDXDefineShader { + SVGA3dShaderId shaderId; + SVGA3dShaderType type; + uint32 sizeInBytes; +} SVGA3dCmdDXDefineShader; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct SVGACOTableDXShaderEntry { + SVGA3dShaderType type; + uint32 sizeInBytes; + uint32 offsetInBytes; + SVGAMobId mobid; + uint32 pad[4]; +} SVGACOTableDXShaderEntry; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct SVGA3dCmdDXDestroyShader { + SVGA3dShaderId shaderId; +} SVGA3dCmdDXDestroyShader; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct SVGA3dCmdDXBindShader { + uint32 cid; + uint32 shid; + SVGAMobId mobid; + uint32 offsetInBytes; +} SVGA3dCmdDXBindShader; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct SVGA3dCmdDXBindAllShader { + uint32 cid; + SVGAMobId mobid; +} SVGA3dCmdDXBindAllShader; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct SVGA3dCmdDXCondBindAllShader { + uint32 cid; + SVGAMobId testMobid; + SVGAMobId mobid; +} SVGA3dCmdDXCondBindAllShader; +#pragma pack(pop) -/* - * The maximum number of streamout decl's in each streamout entry. - */ #define SVGA3D_MAX_DX10_STREAMOUT_DECLS 64 #define SVGA3D_MAX_STREAMOUT_DECLS 512 -typedef -#include "vmware_pack_begin.h" -struct SVGA3dStreamOutputDeclarationEntry { - uint32 outputSlot; - uint32 registerIndex; - uint8 registerMask; - uint8 pad0; - uint16 pad1; - uint32 stream; -} -#include "vmware_pack_end.h" -SVGA3dStreamOutputDeclarationEntry; - -typedef -#include "vmware_pack_begin.h" -struct SVGAOTableStreamOutputEntry { - uint32 numOutputStreamEntries; - SVGA3dStreamOutputDeclarationEntry decl[SVGA3D_MAX_DX10_STREAMOUT_DECLS]; - uint32 streamOutputStrideInBytes[SVGA3D_DX_MAX_SOTARGETS]; - uint32 rasterizedStream; - uint32 numOutputStreamStrides; - uint32 mobid; - uint32 offsetInBytes; - uint8 usesMob; - uint8 pad0; - uint16 pad1; - uint32 pad2[246]; -} -#include "vmware_pack_end.h" -SVGACOTableDXStreamOutputEntry; - -typedef -#include "vmware_pack_begin.h" -struct SVGA3dCmdDXDefineStreamOutput { - SVGA3dStreamOutputId soid; - uint32 numOutputStreamEntries; - SVGA3dStreamOutputDeclarationEntry decl[SVGA3D_MAX_DX10_STREAMOUT_DECLS]; - uint32 streamOutputStrideInBytes[SVGA3D_DX_MAX_SOTARGETS]; - uint32 rasterizedStream; -} -#include "vmware_pack_end.h" -SVGA3dCmdDXDefineStreamOutput; /* SVGA_3D_CMD_DX_DEFINE_STREAMOUTPUT */ - -/* - * Version 2 needed in order to start validating and using the - * rasterizedStream field. Unfortunately the device wasn't validating - * or using this field and the driver wasn't initializing it in shipped - * code, so a new version of the command is needed to allow that code - * to continue to work. Also added new numOutputStreamStrides field. - */ +#pragma pack(push, 1) +typedef struct SVGA3dStreamOutputDeclarationEntry { + uint32 outputSlot; + uint32 registerIndex; + uint8 registerMask; + uint8 pad0; + uint16 pad1; + uint32 stream; +} SVGA3dStreamOutputDeclarationEntry; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct SVGAOTableStreamOutputEntry { + uint32 numOutputStreamEntries; + SVGA3dStreamOutputDeclarationEntry decl[SVGA3D_MAX_DX10_STREAMOUT_DECLS]; + uint32 streamOutputStrideInBytes[SVGA3D_DX_MAX_SOTARGETS]; + uint32 rasterizedStream; + uint32 numOutputStreamStrides; + uint32 mobid; + uint32 offsetInBytes; + uint8 usesMob; + uint8 pad0; + uint16 pad1; + uint32 pad2[246]; +} SVGACOTableDXStreamOutputEntry; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct SVGA3dCmdDXDefineStreamOutput { + SVGA3dStreamOutputId soid; + uint32 numOutputStreamEntries; + SVGA3dStreamOutputDeclarationEntry decl[SVGA3D_MAX_DX10_STREAMOUT_DECLS]; + uint32 streamOutputStrideInBytes[SVGA3D_DX_MAX_SOTARGETS]; + uint32 rasterizedStream; +} SVGA3dCmdDXDefineStreamOutput; +#pragma pack(pop) #define SVGA3D_DX_SO_NO_RASTERIZED_STREAM 0xFFFFFFFF -typedef -#include "vmware_pack_begin.h" -struct SVGA3dCmdDXDefineStreamOutputWithMob { - SVGA3dStreamOutputId soid; - uint32 numOutputStreamEntries; - uint32 numOutputStreamStrides; - uint32 streamOutputStrideInBytes[SVGA3D_DX_MAX_SOTARGETS]; - uint32 rasterizedStream; -} -#include "vmware_pack_end.h" -SVGA3dCmdDXDefineStreamOutputWithMob; -/* SVGA_3D_CMD_DX_DEFINE_STREAMOUTPUT_WITH_MOB */ - -typedef -#include "vmware_pack_begin.h" -struct SVGA3dCmdDXBindStreamOutput { - SVGA3dStreamOutputId soid; - uint32 mobid; - uint32 offsetInBytes; - uint32 sizeInBytes; -} -#include "vmware_pack_end.h" -SVGA3dCmdDXBindStreamOutput; /* SVGA_3D_CMD_DX_BIND_STREAMOUTPUT */ - -typedef -#include "vmware_pack_begin.h" -struct SVGA3dCmdDXDestroyStreamOutput { - SVGA3dStreamOutputId soid; -} -#include "vmware_pack_end.h" -SVGA3dCmdDXDestroyStreamOutput; /* SVGA_3D_CMD_DX_DESTROY_STREAMOUTPUT */ - -typedef -#include "vmware_pack_begin.h" -struct SVGA3dCmdDXSetStreamOutput { - SVGA3dStreamOutputId soid; -} -#include "vmware_pack_end.h" -SVGA3dCmdDXSetStreamOutput; /* SVGA_3D_CMD_DX_SET_STREAMOUTPUT */ - -typedef -#include "vmware_pack_begin.h" -struct SVGA3dCmdDXSetMinLOD { - SVGA3dSurfaceId sid; - float minLOD; -} -#include "vmware_pack_end.h" -SVGA3dCmdDXSetMinLOD; /* SVGA_3D_CMD_DX_SET_MIN_LOD */ - -typedef -#include "vmware_pack_begin.h" -struct { - uint64 value; - uint32 mobId; - uint32 mobOffset; -} -#include "vmware_pack_end.h" -SVGA3dCmdDXMobFence64; /* SVGA_3D_CMD_DX_MOB_FENCE_64 */ +#pragma pack(push, 1) +typedef struct SVGA3dCmdDXDefineStreamOutputWithMob { + SVGA3dStreamOutputId soid; + uint32 numOutputStreamEntries; + uint32 numOutputStreamStrides; + uint32 streamOutputStrideInBytes[SVGA3D_DX_MAX_SOTARGETS]; + uint32 rasterizedStream; +} SVGA3dCmdDXDefineStreamOutputWithMob; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct SVGA3dCmdDXBindStreamOutput { + SVGA3dStreamOutputId soid; + uint32 mobid; + uint32 offsetInBytes; + uint32 sizeInBytes; +} SVGA3dCmdDXBindStreamOutput; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct SVGA3dCmdDXDestroyStreamOutput { + SVGA3dStreamOutputId soid; +} SVGA3dCmdDXDestroyStreamOutput; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct SVGA3dCmdDXSetStreamOutput { + SVGA3dStreamOutputId soid; +} SVGA3dCmdDXSetStreamOutput; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct SVGA3dCmdDXSetMinLOD { + SVGA3dSurfaceId sid; + float minLOD; +} SVGA3dCmdDXSetMinLOD; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct { + uint64 value; + uint32 mobId; + uint32 mobOffset; +} SVGA3dCmdDXMobFence64; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct SVGA3dCmdDXSetCOTable { + uint32 cid; + uint32 mobid; + SVGACOTableType type; + uint32 validSizeInBytes; +} SVGA3dCmdDXSetCOTable; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct SVGA3dCmdDXGrowCOTable { + uint32 cid; + uint32 mobid; + SVGACOTableType type; + uint32 validSizeInBytes; +} SVGA3dCmdDXGrowCOTable; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct SVGA3dCmdDXReadbackCOTable { + uint32 cid; + SVGACOTableType type; +} SVGA3dCmdDXReadbackCOTable; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct SVGA3dCmdDXCopyCOTableIntoMob { + uint32 cid; + SVGACOTableType type; + uint32 mobid; +} SVGA3dCmdDXCopyCOTableIntoMob; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct SVGA3dCmdDXPredStagingCopy { + SVGA3dSurfaceId dstSid; + SVGA3dSurfaceId srcSid; + uint8 readback; + uint8 unsynchronized; + uint8 mustBeZero[2]; + +} SVGA3dCmdDXPredStagingCopy; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct SVGA3dCmdDXStagingCopy { + SVGA3dSurfaceId dstSid; + SVGA3dSurfaceId srcSid; + uint8 readback; + uint8 unsynchronized; + uint8 mustBeZero[2]; + +} SVGA3dCmdDXStagingCopy; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct SVGA3dCOTableData { + uint32 mobid; +} SVGA3dCOTableData; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct SVGA3dBufferBinding { + uint32 bufferId; + uint32 stride; + uint32 offset; +} SVGA3dBufferBinding; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct SVGA3dConstantBufferBinding { + uint32 sid; + uint32 offsetInBytes; + uint32 sizeInBytes; +} SVGA3dConstantBufferBinding; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct SVGADXInputAssemblyMobFormat { + uint32 layoutId; + SVGA3dBufferBinding vertexBuffers[SVGA3D_DX_MAX_VERTEXBUFFERS]; + uint32 indexBufferSid; + uint32 pad; + uint32 indexBufferOffset; + uint32 indexBufferFormat; + uint32 topology; +} SVGADXInputAssemblyMobFormat; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct SVGADXContextMobFormat { + SVGADXInputAssemblyMobFormat inputAssembly; + + struct { + uint32 blendStateId; + uint32 blendFactor[4]; + uint32 sampleMask; + uint32 depthStencilStateId; + uint32 stencilRef; + uint32 rasterizerStateId; + uint32 depthStencilViewId; + uint32 renderTargetViewIds[SVGA3D_DX_MAX_RENDER_TARGETS]; + } renderState; + + uint32 pad0[8]; + + struct { + uint32 targets[SVGA3D_DX_MAX_SOTARGETS]; + uint32 soid; + } streamOut; + + uint32 pad1[10]; + + uint32 uavSpliceIndex; + + uint8 numViewports; + uint8 numScissorRects; + uint16 pad2[1]; + + uint32 pad3[3]; + + SVGA3dViewport viewports[SVGA3D_DX_MAX_VIEWPORTS]; + uint32 pad4[32]; + + SVGASignedRect scissorRects[SVGA3D_DX_MAX_SCISSORRECTS]; + uint32 pad5[64]; + + struct { + uint32 queryID; + uint32 value; + } predication; + + SVGAMobId shaderIfaceMobid; + uint32 shaderIfaceOffset; + struct { + uint32 shaderId; + SVGA3dConstantBufferBinding + constantBuffers[SVGA3D_DX_MAX_CONSTBUFFERS]; + uint32 shaderResources[SVGA3D_DX_MAX_SRVIEWS]; + uint32 samplers[SVGA3D_DX_MAX_SAMPLERS]; + } shaderState[SVGA3D_NUM_SHADERTYPE]; + uint32 pad6[26]; + + SVGA3dQueryId queryID[SVGA3D_MAX_QUERY]; + + SVGA3dCOTableData cotables[SVGA_COTABLE_MAX]; + + uint32 pad7[64]; + + uint32 uaViewIds[SVGA3D_DX11_1_MAX_UAVIEWS]; + uint32 csuaViewIds[SVGA3D_DX11_1_MAX_UAVIEWS]; + + uint32 pad8[188]; +} SVGADXContextMobFormat; +#pragma pack(pop) -/* - * SVGA3dCmdSetCOTable -- - * - * This command allows the guest to bind a mob to a context-object table. - */ -typedef -#include "vmware_pack_begin.h" -struct SVGA3dCmdDXSetCOTable { - uint32 cid; - uint32 mobid; - SVGACOTableType type; - uint32 validSizeInBytes; -} -#include "vmware_pack_end.h" -SVGA3dCmdDXSetCOTable; /* SVGA_3D_CMD_DX_SET_COTABLE */ +#define SVGA3D_DX_MAX_CLASS_INSTANCES_PADDED 256 -/* - * Guests using SVGA_3D_CMD_DX_GROW_COTABLE are promising that - * the new COTable contains the same contents as the old one, except possibly - * for some new invalid entries at the end. - * - * If there is an old cotable mob bound, it also has to still be valid. - * - * (Otherwise, guests should use the DXSetCOTableBase command.) - */ -typedef -#include "vmware_pack_begin.h" -struct SVGA3dCmdDXGrowCOTable { - uint32 cid; - uint32 mobid; - SVGACOTableType type; - uint32 validSizeInBytes; -} -#include "vmware_pack_end.h" -SVGA3dCmdDXGrowCOTable; /* SVGA_3D_CMD_DX_GROW_COTABLE */ - -typedef -#include "vmware_pack_begin.h" -struct SVGA3dCmdDXReadbackCOTable { - uint32 cid; - SVGACOTableType type; -} -#include "vmware_pack_end.h" -SVGA3dCmdDXReadbackCOTable; /* SVGA_3D_CMD_DX_READBACK_COTABLE */ - -typedef -#include "vmware_pack_begin.h" -struct SVGA3dCOTableData { - uint32 mobid; -} -#include "vmware_pack_end.h" -SVGA3dCOTableData; - -typedef -#include "vmware_pack_begin.h" -struct SVGA3dBufferBinding { - uint32 bufferId; - uint32 stride; - uint32 offset; -} -#include "vmware_pack_end.h" -SVGA3dBufferBinding; - -typedef -#include "vmware_pack_begin.h" -struct SVGA3dConstantBufferBinding { - uint32 sid; - uint32 offsetInBytes; - uint32 sizeInBytes; -} -#include "vmware_pack_end.h" -SVGA3dConstantBufferBinding; - -typedef -#include "vmware_pack_begin.h" -struct SVGADXInputAssemblyMobFormat { - uint32 layoutId; - SVGA3dBufferBinding vertexBuffers[SVGA3D_DX_MAX_VERTEXBUFFERS]; - uint32 indexBufferSid; - uint32 pad; - uint32 indexBufferOffset; - uint32 indexBufferFormat; - uint32 topology; -} -#include "vmware_pack_end.h" -SVGADXInputAssemblyMobFormat; - -typedef -#include "vmware_pack_begin.h" -struct SVGADXContextMobFormat { - SVGADXInputAssemblyMobFormat inputAssembly; - - struct { - uint32 blendStateId; - uint32 blendFactor[4]; - uint32 sampleMask; - uint32 depthStencilStateId; - uint32 stencilRef; - uint32 rasterizerStateId; - uint32 depthStencilViewId; - uint32 renderTargetViewIds[SVGA3D_MAX_SIMULTANEOUS_RENDER_TARGETS]; - } renderState; - - uint32 pad0[8]; - - struct { - uint32 targets[SVGA3D_DX_MAX_SOTARGETS]; - uint32 soid; - } streamOut; - - uint32 pad1[10]; - - uint32 uavSpliceIndex; - - uint8 numViewports; - uint8 numScissorRects; - uint16 pad2[1]; - - uint32 pad3[3]; - - SVGA3dViewport viewports[SVGA3D_DX_MAX_VIEWPORTS]; - uint32 pad4[32]; - - SVGASignedRect scissorRects[SVGA3D_DX_MAX_SCISSORRECTS]; - uint32 pad5[64]; - - struct { - uint32 queryID; - uint32 value; - } predication; - - SVGAMobId shaderIfaceMobid; - uint32 shaderIfaceOffset; - struct { - uint32 shaderId; - SVGA3dConstantBufferBinding constantBuffers[SVGA3D_DX_MAX_CONSTBUFFERS]; - uint32 shaderResources[SVGA3D_DX_MAX_SRVIEWS]; - uint32 samplers[SVGA3D_DX_MAX_SAMPLERS]; - } shaderState[SVGA3D_NUM_SHADERTYPE]; - uint32 pad6[26]; - - SVGA3dQueryId queryID[SVGA3D_MAX_QUERY]; - - SVGA3dCOTableData cotables[SVGA_COTABLE_MAX]; - - uint32 pad7[64]; - - uint32 uaViewIds[SVGA3D_DX11_1_MAX_UAVIEWS]; - uint32 csuaViewIds[SVGA3D_DX11_1_MAX_UAVIEWS]; - - uint32 pad8[188]; -} -#include "vmware_pack_end.h" -SVGADXContextMobFormat; +#pragma pack(push, 1) +typedef struct SVGADXShaderIfaceMobFormat { + struct { + uint32 numClassInstances; + uint32 iface[SVGA3D_DX_MAX_CLASS_INSTANCES_PADDED]; + SVGA3dIfaceData data[SVGA3D_DX_MAX_CLASS_INSTANCES_PADDED]; + } shaderIfaceState[SVGA3D_NUM_SHADERTYPE]; -/* - * There is conflicting documentation on max class instances (253 vs 256). The - * lower value is the one used throughout the device, but since mob format is - * more involved to increase if needed, conservatively use the higher one here. - */ -#define SVGA3D_DX_MAX_CLASS_INSTANCES_PADDED 256 + uint32 pad0[1018]; +} SVGADXShaderIfaceMobFormat; +#pragma pack(pop) -typedef -#include "vmware_pack_begin.h" -struct SVGADXShaderIfaceMobFormat { - struct { - uint32 numClassInstances; - uint32 iface[SVGA3D_DX_MAX_CLASS_INSTANCES_PADDED]; - SVGA3dIfaceData data[SVGA3D_DX_MAX_CLASS_INSTANCES_PADDED]; - } shaderIfaceState[SVGA3D_NUM_SHADERTYPE]; - - uint32 pad0[1018]; -} -#include "vmware_pack_end.h" -SVGADXShaderIfaceMobFormat; - -typedef -#include "vmware_pack_begin.h" -struct SVGA3dCmdDXTempSetContext { - uint32 dxcid; -} -#include "vmware_pack_end.h" -SVGA3dCmdDXTempSetContext; /* SVGA_3D_CMD_DX_TEMP_SET_CONTEXT */ - -#endif /* _SVGA3D_DX_H_ */ +#endif diff --git a/drivers/gpu/drm/vmwgfx/device_include/svga3d_limits.h b/drivers/gpu/drm/vmwgfx/device_include/svga3d_limits.h index f4375a41b3aa..35494a728c7a 100644 --- a/drivers/gpu/drm/vmwgfx/device_include/svga3d_limits.h +++ b/drivers/gpu/drm/vmwgfx/device_include/svga3d_limits.h @@ -1,6 +1,6 @@ -/* SPDX-License-Identifier: GPL-2.0 OR MIT */ /********************************************************** - * Copyright 2007-2019 VMware, Inc. + * Copyright 2012-2021 VMware, Inc. + * SPDX-License-Identifier: GPL-2.0 OR MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation @@ -27,104 +27,59 @@ /* * svga3d_limits.h -- * - * SVGA 3d hardware limits + * SVGA 3d hardware limits */ -#ifndef _SVGA3D_LIMITS_H_ -#define _SVGA3D_LIMITS_H_ -#define INCLUDE_ALLOW_MODULE -#define INCLUDE_ALLOW_USERLEVEL -#define INCLUDE_ALLOW_VMCORE -#include "includeCheck.h" +#ifndef _SVGA3D_LIMITS_H_ +#define _SVGA3D_LIMITS_H_ -#define SVGA3D_NUM_CLIPPLANES 6 -#define SVGA3D_MAX_CONTEXT_IDS 256 -#define SVGA3D_MAX_SURFACE_IDS (32 * 1024) +#define SVGA3D_HB_MAX_CONTEXT_IDS 256 +#define SVGA3D_HB_MAX_SURFACE_IDS (32 * 1024) -/* - * While there are separate bind-points for RenderTargetViews and - * UnorderedAccessViews in a DXContext, there is in fact one shared - * semantic space that the guest-driver can use on any given draw call. - * So there are really only 8 slots that can be spilt up between them, with the - * spliceIndex controlling where the UAV's sit in the collapsed array. - */ -#define SVGA3D_MAX_RENDER_TARGETS 8 -#define SVGA3D_MAX_SIMULTANEOUS_RENDER_TARGETS (SVGA3D_MAX_RENDER_TARGETS) -#define SVGA3D_MAX_UAVIEWS 8 -#define SVGA3D_DX11_1_MAX_UAVIEWS 64 +#define SVGA3D_DX_MAX_RENDER_TARGETS 8 +#define SVGA3D_DX11_MAX_UAVIEWS 8 +#define SVGA3D_DX11_1_MAX_UAVIEWS 64 +#define SVGA3D_MAX_UAVIEWS (SVGA3D_DX11_1_MAX_UAVIEWS) +#define SVGA3D_DX11_MAX_SIMULTANEOUS_RTUAV (SVGA3D_DX11_MAX_UAVIEWS) +#define SVGA3D_DX11_1_MAX_SIMULTANEOUS_RTUAV (SVGA3D_DX11_1_MAX_UAVIEWS) +#define SVGA3D_MAX_SIMULTANEOUS_RTUAV (SVGA3D_MAX_UAVIEWS) -/* - * Maximum canonical size of a surface in host-backed mode (pre-GBObjects). - */ #define SVGA3D_HB_MAX_SURFACE_SIZE MBYTES_2_BYTES(128) -/* - * Maximum ID a shader can be assigned on a given context. - */ -#define SVGA3D_MAX_SHADERIDS 5000 -/* - * Maximum number of shaders of a given type that can be defined - * (including all contexts). - */ -#define SVGA3D_MAX_SIMULTANEOUS_SHADERS 20000 +#define SVGA3D_MAX_SHADERIDS 5000 -#define SVGA3D_NUM_TEXTURE_UNITS 32 -#define SVGA3D_NUM_LIGHTS 8 +#define SVGA3D_MAX_SIMULTANEOUS_SHADERS 20000 -#define SVGA3D_MAX_VIDEOPROCESSOR_SAMPLERS 32 +#define SVGA3D_NUM_TEXTURE_UNITS 32 +#define SVGA3D_NUM_LIGHTS 8 + +#define SVGA3D_MAX_VIDEOPROCESSOR_SAMPLERS 32 -/* - * Maximum size in dwords of shader text the SVGA device will allow. - * Currently 8 MB. - */ #define SVGA3D_MAX_SHADER_MEMORY_BYTES (8 * 1024 * 1024) -#define SVGA3D_MAX_SHADER_MEMORY (SVGA3D_MAX_SHADER_MEMORY_BYTES / \ - sizeof(uint32)) +#define SVGA3D_MAX_SHADER_MEMORY \ + (SVGA3D_MAX_SHADER_MEMORY_BYTES / sizeof(uint32)) -/* - * The maximum value of threadGroupCount in each dimension - */ #define SVGA3D_MAX_SHADER_THREAD_GROUPS 65535 -#define SVGA3D_MAX_CLIP_PLANES 6 +#define SVGA3D_MAX_CLIP_PLANES 6 -/* - * This is the limit to the number of fixed-function texture - * transforms and texture coordinates we can support. It does *not* - * correspond to the number of texture image units (samplers) we - * support! - */ #define SVGA3D_MAX_TEXTURE_COORDS 8 -/* - * Number of faces in a cubemap. - */ #define SVGA3D_MAX_SURFACE_FACES 6 -/* - * Maximum number of array indexes in a GB surface (with DX enabled). - */ #define SVGA3D_SM4_MAX_SURFACE_ARRAYSIZE 512 #define SVGA3D_SM5_MAX_SURFACE_ARRAYSIZE 2048 #define SVGA3D_MAX_SURFACE_ARRAYSIZE SVGA3D_SM5_MAX_SURFACE_ARRAYSIZE -/* - * The maximum number of vertex arrays we're guaranteed to support in - * SVGA_3D_CMD_DRAWPRIMITIVES. - */ -#define SVGA3D_MAX_VERTEX_ARRAYS 32 +#define SVGA3D_MAX_VERTEX_ARRAYS 32 -/* - * The maximum number of primitive ranges we're guaranteed to support - * in SVGA_3D_CMD_DRAWPRIMITIVES. - */ #define SVGA3D_MAX_DRAW_PRIMITIVE_RANGES 32 -/* - * The maximum number of samples that can be contained in a surface. - */ #define SVGA3D_MAX_SAMPLES 8 -#endif /* _SVGA3D_LIMITS_H_ */ +#define SVGA3D_MIN_SBX_DATA_SIZE (GBYTES_2_BYTES(1)) +#define SVGA3D_MAX_SBX_DATA_SIZE (GBYTES_2_BYTES(4)) + +#endif diff --git a/drivers/gpu/drm/vmwgfx/device_include/svga3d_reg.h b/drivers/gpu/drm/vmwgfx/device_include/svga3d_reg.h index bdfc404c91e3..988d8509c472 100644 --- a/drivers/gpu/drm/vmwgfx/device_include/svga3d_reg.h +++ b/drivers/gpu/drm/vmwgfx/device_include/svga3d_reg.h @@ -1,6 +1,6 @@ -/* SPDX-License-Identifier: GPL-2.0 OR MIT */ /********************************************************** * Copyright 1998-2015 VMware, Inc. + * SPDX-License-Identifier: GPL-2.0 OR MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation @@ -27,17 +27,13 @@ /* * svga3d_reg.h -- * - * SVGA 3d hardware definitions + * SVGA 3d hardware definitions */ -#ifndef _SVGA3D_REG_H_ -#define _SVGA3D_REG_H_ -#define INCLUDE_ALLOW_MODULE -#define INCLUDE_ALLOW_USERLEVEL -#define INCLUDE_ALLOW_VMCORE -#include "includeCheck.h" +#ifndef _SVGA3D_REG_H_ +#define _SVGA3D_REG_H_ #include "svga_reg.h" @@ -47,5 +43,4 @@ #include "svga3d_dx.h" #include "svga3d_devcaps.h" - -#endif /* _SVGA3D_REG_H_ */ +#endif diff --git a/drivers/gpu/drm/vmwgfx/device_include/svga3d_surfacedefs.h b/drivers/gpu/drm/vmwgfx/device_include/svga3d_surfacedefs.h index 127eaf0a0a58..7d98fc48414e 100644 --- a/drivers/gpu/drm/vmwgfx/device_include/svga3d_surfacedefs.h +++ b/drivers/gpu/drm/vmwgfx/device_include/svga3d_surfacedefs.h @@ -1,1667 +1,1561 @@ -/* SPDX-License-Identifier: GPL-2.0 OR MIT */ -/************************************************************************** +/********************************************************** + * Copyright 2008-2021 VMware, Inc. + * SPDX-License-Identifier: GPL-2.0 OR MIT * - * Copyright 2008-2015 VMware, Inc., Palo Alto, CA., USA + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ + **********************************************************/ /* * svga3d_surfacedefs.h -- * - * Surface definitions and inlineable utilities for SVGA3d. + * Surface definitions for SVGA3d. */ + + #ifndef _SVGA3D_SURFACEDEFS_H_ #define _SVGA3D_SURFACEDEFS_H_ -#define INCLUDE_ALLOW_USERLEVEL -#define INCLUDE_ALLOW_MODULE -#include "includeCheck.h" +#include "svga3d_types.h" -#include <linux/kernel.h> -#include <drm/vmwgfx_drm.h> +#ifdef __cplusplus +extern "C" { +#endif -#include "svga3d_reg.h" +struct SVGAUseCaps; -#define surf_size_struct struct drm_vmw_size +#if defined(_WIN32) && !defined(__GNUC__) -/* - * enum svga3d_block_desc - describes generic properties about formats. - */ -enum svga3d_block_desc { - /* Nothing special can be said about this format. */ - SVGA3DBLOCKDESC_NONE = 0, +#define STATIC_CONST __declspec(selectany) extern const +#else +#define STATIC_CONST static const +#endif + +typedef enum SVGA3dBlockDesc { - /* Format contains Blue/U data */ - SVGA3DBLOCKDESC_BLUE = 1 << 0, - SVGA3DBLOCKDESC_W = 1 << 0, - SVGA3DBLOCKDESC_BUMP_L = 1 << 0, + SVGA3DBLOCKDESC_NONE = 0, - /* Format contains Green/V data */ - SVGA3DBLOCKDESC_GREEN = 1 << 1, - SVGA3DBLOCKDESC_V = 1 << 1, + SVGA3DBLOCKDESC_BLUE = 1 << 0, + SVGA3DBLOCKDESC_W = 1 << 0, + SVGA3DBLOCKDESC_BUMP_L = 1 << 0, - /* Format contains Red/W/Luminance data */ - SVGA3DBLOCKDESC_RED = 1 << 2, - SVGA3DBLOCKDESC_U = 1 << 2, - SVGA3DBLOCKDESC_LUMINANCE = 1 << 2, + SVGA3DBLOCKDESC_GREEN = 1 << 1, + SVGA3DBLOCKDESC_V = 1 << 1, - /* Format contains Alpha/Q data */ - SVGA3DBLOCKDESC_ALPHA = 1 << 3, - SVGA3DBLOCKDESC_Q = 1 << 3, + SVGA3DBLOCKDESC_RED = 1 << 2, + SVGA3DBLOCKDESC_U = 1 << 2, + SVGA3DBLOCKDESC_LUMINANCE = 1 << 2, - /* Format is a buffer */ - SVGA3DBLOCKDESC_BUFFER = 1 << 4, + SVGA3DBLOCKDESC_ALPHA = 1 << 3, + SVGA3DBLOCKDESC_Q = 1 << 3, - /* Format is compressed */ - SVGA3DBLOCKDESC_COMPRESSED = 1 << 5, + SVGA3DBLOCKDESC_BUFFER = 1 << 4, - /* Format uses IEEE floating point */ - SVGA3DBLOCKDESC_FP = 1 << 6, + SVGA3DBLOCKDESC_COMPRESSED = 1 << 5, - /* Three separate blocks store data. */ - SVGA3DBLOCKDESC_PLANAR_YUV = 1 << 7, + SVGA3DBLOCKDESC_FP = 1 << 6, + + SVGA3DBLOCKDESC_PLANAR_YUV = 1 << 7, - /* 2 planes of Y, UV, e.g., NV12. */ SVGA3DBLOCKDESC_2PLANAR_YUV = 1 << 8, - /* 3 planes of separate Y, U, V, e.g., YV12. */ SVGA3DBLOCKDESC_3PLANAR_YUV = 1 << 9, - /* Block with a stencil channel */ - SVGA3DBLOCKDESC_STENCIL = 1 << 11, - - /* Typeless format */ - SVGA3DBLOCKDESC_TYPELESS = 1 << 12, - - /* Channels are signed integers */ - SVGA3DBLOCKDESC_SINT = 1 << 13, - - /* Channels are unsigned integers */ - SVGA3DBLOCKDESC_UINT = 1 << 14, - - /* Channels are normalized (when sampling) */ - SVGA3DBLOCKDESC_NORM = 1 << 15, - - /* Channels are in SRGB */ - SVGA3DBLOCKDESC_SRGB = 1 << 16, - - /* Shared exponent */ - SVGA3DBLOCKDESC_EXP = 1 << 17, - - /* Format contains color data. */ - SVGA3DBLOCKDESC_COLOR = 1 << 18, - /* Format contains depth data. */ - SVGA3DBLOCKDESC_DEPTH = 1 << 19, - /* Format contains bump data. */ - SVGA3DBLOCKDESC_BUMP = 1 << 20, - - /* Format contains YUV video data. */ - SVGA3DBLOCKDESC_YUV_VIDEO = 1 << 21, - - /* For mixed unsigned/signed formats. */ - SVGA3DBLOCKDESC_MIXED = 1 << 22, - - /* For distingushing CxV8U8. */ - SVGA3DBLOCKDESC_CX = 1 << 23, - - /* Different compressed format groups. */ - SVGA3DBLOCKDESC_BC1 = 1 << 24, - SVGA3DBLOCKDESC_BC2 = 1 << 25, - SVGA3DBLOCKDESC_BC3 = 1 << 26, - SVGA3DBLOCKDESC_BC4 = 1 << 27, - SVGA3DBLOCKDESC_BC5 = 1 << 28, - SVGA3DBLOCKDESC_BC6H = 1 << 29, - SVGA3DBLOCKDESC_BC7 = 1 << 30, - - SVGA3DBLOCKDESC_A_UINT = SVGA3DBLOCKDESC_ALPHA | - SVGA3DBLOCKDESC_UINT | - SVGA3DBLOCKDESC_COLOR, - SVGA3DBLOCKDESC_A_UNORM = SVGA3DBLOCKDESC_A_UINT | - SVGA3DBLOCKDESC_NORM, - SVGA3DBLOCKDESC_R_UINT = SVGA3DBLOCKDESC_RED | - SVGA3DBLOCKDESC_UINT | - SVGA3DBLOCKDESC_COLOR, - SVGA3DBLOCKDESC_R_UNORM = SVGA3DBLOCKDESC_R_UINT | - SVGA3DBLOCKDESC_NORM, - SVGA3DBLOCKDESC_R_SINT = SVGA3DBLOCKDESC_RED | - SVGA3DBLOCKDESC_SINT | - SVGA3DBLOCKDESC_COLOR, - SVGA3DBLOCKDESC_R_SNORM = SVGA3DBLOCKDESC_R_SINT | - SVGA3DBLOCKDESC_NORM, - SVGA3DBLOCKDESC_G_UINT = SVGA3DBLOCKDESC_GREEN | - SVGA3DBLOCKDESC_UINT | - SVGA3DBLOCKDESC_COLOR, - SVGA3DBLOCKDESC_RG_UINT = SVGA3DBLOCKDESC_RED | - SVGA3DBLOCKDESC_GREEN | - SVGA3DBLOCKDESC_UINT | - SVGA3DBLOCKDESC_COLOR, - SVGA3DBLOCKDESC_RG_UNORM = SVGA3DBLOCKDESC_RG_UINT | - SVGA3DBLOCKDESC_NORM, - SVGA3DBLOCKDESC_RG_SINT = SVGA3DBLOCKDESC_RED | - SVGA3DBLOCKDESC_GREEN | - SVGA3DBLOCKDESC_SINT | - SVGA3DBLOCKDESC_COLOR, - SVGA3DBLOCKDESC_RG_SNORM = SVGA3DBLOCKDESC_RG_SINT | - SVGA3DBLOCKDESC_NORM, - SVGA3DBLOCKDESC_RGB_UINT = SVGA3DBLOCKDESC_RED | - SVGA3DBLOCKDESC_GREEN | - SVGA3DBLOCKDESC_BLUE | - SVGA3DBLOCKDESC_UINT | - SVGA3DBLOCKDESC_COLOR, - SVGA3DBLOCKDESC_RGB_SINT = SVGA3DBLOCKDESC_RED | - SVGA3DBLOCKDESC_GREEN | - SVGA3DBLOCKDESC_BLUE | - SVGA3DBLOCKDESC_SINT | - SVGA3DBLOCKDESC_COLOR, - SVGA3DBLOCKDESC_RGB_UNORM = SVGA3DBLOCKDESC_RGB_UINT | - SVGA3DBLOCKDESC_NORM, - SVGA3DBLOCKDESC_RGB_UNORM_SRGB = SVGA3DBLOCKDESC_RGB_UNORM | - SVGA3DBLOCKDESC_SRGB, - SVGA3DBLOCKDESC_RGBA_UINT = SVGA3DBLOCKDESC_RED | - SVGA3DBLOCKDESC_GREEN | - SVGA3DBLOCKDESC_BLUE | - SVGA3DBLOCKDESC_ALPHA | - SVGA3DBLOCKDESC_UINT | - SVGA3DBLOCKDESC_COLOR, - SVGA3DBLOCKDESC_RGBA_UNORM = SVGA3DBLOCKDESC_RGBA_UINT | - SVGA3DBLOCKDESC_NORM, - SVGA3DBLOCKDESC_RGBA_UNORM_SRGB = SVGA3DBLOCKDESC_RGBA_UNORM | - SVGA3DBLOCKDESC_SRGB, - SVGA3DBLOCKDESC_RGBA_SINT = SVGA3DBLOCKDESC_RED | - SVGA3DBLOCKDESC_GREEN | - SVGA3DBLOCKDESC_BLUE | - SVGA3DBLOCKDESC_ALPHA | - SVGA3DBLOCKDESC_SINT | - SVGA3DBLOCKDESC_COLOR, - SVGA3DBLOCKDESC_RGBA_SNORM = SVGA3DBLOCKDESC_RGBA_SINT | - SVGA3DBLOCKDESC_NORM, - SVGA3DBLOCKDESC_RGBA_FP = SVGA3DBLOCKDESC_RED | - SVGA3DBLOCKDESC_GREEN | - SVGA3DBLOCKDESC_BLUE | - SVGA3DBLOCKDESC_ALPHA | - SVGA3DBLOCKDESC_FP | - SVGA3DBLOCKDESC_COLOR, - SVGA3DBLOCKDESC_UV = SVGA3DBLOCKDESC_U | - SVGA3DBLOCKDESC_V | - SVGA3DBLOCKDESC_BUMP, - SVGA3DBLOCKDESC_UVL = SVGA3DBLOCKDESC_UV | - SVGA3DBLOCKDESC_BUMP_L | - SVGA3DBLOCKDESC_MIXED | - SVGA3DBLOCKDESC_BUMP, - SVGA3DBLOCKDESC_UVW = SVGA3DBLOCKDESC_UV | - SVGA3DBLOCKDESC_W | - SVGA3DBLOCKDESC_BUMP, - SVGA3DBLOCKDESC_UVWA = SVGA3DBLOCKDESC_UVW | - SVGA3DBLOCKDESC_ALPHA | - SVGA3DBLOCKDESC_MIXED | - SVGA3DBLOCKDESC_BUMP, - SVGA3DBLOCKDESC_UVWQ = SVGA3DBLOCKDESC_U | - SVGA3DBLOCKDESC_V | - SVGA3DBLOCKDESC_W | - SVGA3DBLOCKDESC_Q | - SVGA3DBLOCKDESC_BUMP, - SVGA3DBLOCKDESC_L_UNORM = SVGA3DBLOCKDESC_LUMINANCE | - SVGA3DBLOCKDESC_UINT | - SVGA3DBLOCKDESC_NORM | - SVGA3DBLOCKDESC_COLOR, - SVGA3DBLOCKDESC_LA_UNORM = SVGA3DBLOCKDESC_LUMINANCE | - SVGA3DBLOCKDESC_ALPHA | - SVGA3DBLOCKDESC_UINT | - SVGA3DBLOCKDESC_NORM | - SVGA3DBLOCKDESC_COLOR, - SVGA3DBLOCKDESC_R_FP = SVGA3DBLOCKDESC_RED | - SVGA3DBLOCKDESC_FP | - SVGA3DBLOCKDESC_COLOR, - SVGA3DBLOCKDESC_RG_FP = SVGA3DBLOCKDESC_R_FP | - SVGA3DBLOCKDESC_GREEN | - SVGA3DBLOCKDESC_COLOR, - SVGA3DBLOCKDESC_RGB_FP = SVGA3DBLOCKDESC_RG_FP | - SVGA3DBLOCKDESC_BLUE | - SVGA3DBLOCKDESC_COLOR, - SVGA3DBLOCKDESC_YUV = SVGA3DBLOCKDESC_YUV_VIDEO | - SVGA3DBLOCKDESC_COLOR, - SVGA3DBLOCKDESC_AYUV = SVGA3DBLOCKDESC_ALPHA | - SVGA3DBLOCKDESC_YUV_VIDEO | - SVGA3DBLOCKDESC_COLOR, - SVGA3DBLOCKDESC_RGB_EXP = SVGA3DBLOCKDESC_RED | - SVGA3DBLOCKDESC_GREEN | - SVGA3DBLOCKDESC_BLUE | - SVGA3DBLOCKDESC_EXP | - SVGA3DBLOCKDESC_COLOR, - - SVGA3DBLOCKDESC_COMP_TYPELESS = SVGA3DBLOCKDESC_COMPRESSED | - SVGA3DBLOCKDESC_TYPELESS, - SVGA3DBLOCKDESC_COMP_UNORM = SVGA3DBLOCKDESC_COMPRESSED | - SVGA3DBLOCKDESC_UINT | - SVGA3DBLOCKDESC_NORM | - SVGA3DBLOCKDESC_COLOR, - SVGA3DBLOCKDESC_COMP_SNORM = SVGA3DBLOCKDESC_COMPRESSED | - SVGA3DBLOCKDESC_SINT | - SVGA3DBLOCKDESC_NORM | - SVGA3DBLOCKDESC_COLOR, - SVGA3DBLOCKDESC_COMP_UNORM_SRGB = SVGA3DBLOCKDESC_COMP_UNORM | - SVGA3DBLOCKDESC_SRGB, - SVGA3DBLOCKDESC_BC1_COMP_TYPELESS = SVGA3DBLOCKDESC_BC1 | - SVGA3DBLOCKDESC_COMP_TYPELESS, - SVGA3DBLOCKDESC_BC1_COMP_UNORM = SVGA3DBLOCKDESC_BC1 | - SVGA3DBLOCKDESC_COMP_UNORM, - SVGA3DBLOCKDESC_BC1_COMP_UNORM_SRGB = SVGA3DBLOCKDESC_BC1_COMP_UNORM | - SVGA3DBLOCKDESC_SRGB, - SVGA3DBLOCKDESC_BC2_COMP_TYPELESS = SVGA3DBLOCKDESC_BC2 | - SVGA3DBLOCKDESC_COMP_TYPELESS, - SVGA3DBLOCKDESC_BC2_COMP_UNORM = SVGA3DBLOCKDESC_BC2 | - SVGA3DBLOCKDESC_COMP_UNORM, - SVGA3DBLOCKDESC_BC2_COMP_UNORM_SRGB = SVGA3DBLOCKDESC_BC2_COMP_UNORM | - SVGA3DBLOCKDESC_SRGB, - SVGA3DBLOCKDESC_BC3_COMP_TYPELESS = SVGA3DBLOCKDESC_BC3 | - SVGA3DBLOCKDESC_COMP_TYPELESS, - SVGA3DBLOCKDESC_BC3_COMP_UNORM = SVGA3DBLOCKDESC_BC3 | - SVGA3DBLOCKDESC_COMP_UNORM, - SVGA3DBLOCKDESC_BC3_COMP_UNORM_SRGB = SVGA3DBLOCKDESC_BC3_COMP_UNORM | - SVGA3DBLOCKDESC_SRGB, - SVGA3DBLOCKDESC_BC4_COMP_TYPELESS = SVGA3DBLOCKDESC_BC4 | - SVGA3DBLOCKDESC_COMP_TYPELESS, - SVGA3DBLOCKDESC_BC4_COMP_UNORM = SVGA3DBLOCKDESC_BC4 | - SVGA3DBLOCKDESC_COMP_UNORM, - SVGA3DBLOCKDESC_BC4_COMP_SNORM = SVGA3DBLOCKDESC_BC4 | - SVGA3DBLOCKDESC_COMP_SNORM, - SVGA3DBLOCKDESC_BC5_COMP_TYPELESS = SVGA3DBLOCKDESC_BC5 | - SVGA3DBLOCKDESC_COMP_TYPELESS, - SVGA3DBLOCKDESC_BC5_COMP_UNORM = SVGA3DBLOCKDESC_BC5 | - SVGA3DBLOCKDESC_COMP_UNORM, - SVGA3DBLOCKDESC_BC5_COMP_SNORM = SVGA3DBLOCKDESC_BC5 | - SVGA3DBLOCKDESC_COMP_SNORM, - SVGA3DBLOCKDESC_BC6H_COMP_TYPELESS = SVGA3DBLOCKDESC_BC6H | - SVGA3DBLOCKDESC_COMP_TYPELESS, - SVGA3DBLOCKDESC_BC6H_COMP_UF16 = SVGA3DBLOCKDESC_BC6H | - SVGA3DBLOCKDESC_COMPRESSED, - SVGA3DBLOCKDESC_BC6H_COMP_SF16 = SVGA3DBLOCKDESC_BC6H | - SVGA3DBLOCKDESC_COMPRESSED, - SVGA3DBLOCKDESC_BC7_COMP_TYPELESS = SVGA3DBLOCKDESC_BC7 | - SVGA3DBLOCKDESC_COMP_TYPELESS, - SVGA3DBLOCKDESC_BC7_COMP_UNORM = SVGA3DBLOCKDESC_BC7 | - SVGA3DBLOCKDESC_COMP_UNORM, - SVGA3DBLOCKDESC_BC7_COMP_UNORM_SRGB = SVGA3DBLOCKDESC_BC7_COMP_UNORM | - SVGA3DBLOCKDESC_SRGB, - - SVGA3DBLOCKDESC_NV12 = SVGA3DBLOCKDESC_YUV_VIDEO | - SVGA3DBLOCKDESC_PLANAR_YUV | - SVGA3DBLOCKDESC_2PLANAR_YUV | - SVGA3DBLOCKDESC_COLOR, - SVGA3DBLOCKDESC_YV12 = SVGA3DBLOCKDESC_YUV_VIDEO | - SVGA3DBLOCKDESC_PLANAR_YUV | - SVGA3DBLOCKDESC_3PLANAR_YUV | - SVGA3DBLOCKDESC_COLOR, - - SVGA3DBLOCKDESC_DEPTH_UINT = SVGA3DBLOCKDESC_DEPTH | - SVGA3DBLOCKDESC_UINT, - SVGA3DBLOCKDESC_DEPTH_UNORM = SVGA3DBLOCKDESC_DEPTH_UINT | - SVGA3DBLOCKDESC_NORM, - SVGA3DBLOCKDESC_DS = SVGA3DBLOCKDESC_DEPTH | - SVGA3DBLOCKDESC_STENCIL, - SVGA3DBLOCKDESC_DS_UINT = SVGA3DBLOCKDESC_DEPTH | - SVGA3DBLOCKDESC_STENCIL | - SVGA3DBLOCKDESC_UINT, - SVGA3DBLOCKDESC_DS_UNORM = SVGA3DBLOCKDESC_DS_UINT | - SVGA3DBLOCKDESC_NORM, - SVGA3DBLOCKDESC_DEPTH_FP = SVGA3DBLOCKDESC_DEPTH | - SVGA3DBLOCKDESC_FP, - - SVGA3DBLOCKDESC_UV_UINT = SVGA3DBLOCKDESC_UV | - SVGA3DBLOCKDESC_UINT, - SVGA3DBLOCKDESC_UV_SNORM = SVGA3DBLOCKDESC_UV | - SVGA3DBLOCKDESC_SINT | - SVGA3DBLOCKDESC_NORM, - SVGA3DBLOCKDESC_UVCX_SNORM = SVGA3DBLOCKDESC_UV_SNORM | - SVGA3DBLOCKDESC_CX, + SVGA3DBLOCKDESC_STENCIL = 1 << 11, + + SVGA3DBLOCKDESC_TYPELESS = 1 << 12, + + SVGA3DBLOCKDESC_SINT = 1 << 13, + + SVGA3DBLOCKDESC_UINT = 1 << 14, + + SVGA3DBLOCKDESC_NORM = 1 << 15, + + SVGA3DBLOCKDESC_SRGB = 1 << 16, + + SVGA3DBLOCKDESC_EXP = 1 << 17, + + SVGA3DBLOCKDESC_COLOR = 1 << 18, + + SVGA3DBLOCKDESC_DEPTH = 1 << 19, + + SVGA3DBLOCKDESC_BUMP = 1 << 20, + + SVGA3DBLOCKDESC_YUV_VIDEO = 1 << 21, + + SVGA3DBLOCKDESC_MIXED = 1 << 22, + + SVGA3DBLOCKDESC_CX = 1 << 23, + + SVGA3DBLOCKDESC_BC1 = 1 << 24, + SVGA3DBLOCKDESC_BC2 = 1 << 25, + SVGA3DBLOCKDESC_BC3 = 1 << 26, + SVGA3DBLOCKDESC_BC4 = 1 << 27, + SVGA3DBLOCKDESC_BC5 = 1 << 28, + SVGA3DBLOCKDESC_BC6H = 1 << 29, + SVGA3DBLOCKDESC_BC7 = 1 << 30, + SVGA3DBLOCKDESC_COMPRESSED_MASK = + SVGA3DBLOCKDESC_BC1 | SVGA3DBLOCKDESC_BC2 | + SVGA3DBLOCKDESC_BC3 | SVGA3DBLOCKDESC_BC4 | + SVGA3DBLOCKDESC_BC5 | SVGA3DBLOCKDESC_BC6H | + SVGA3DBLOCKDESC_BC7, + + SVGA3DBLOCKDESC_A_UINT = SVGA3DBLOCKDESC_ALPHA | SVGA3DBLOCKDESC_UINT | + SVGA3DBLOCKDESC_COLOR, + SVGA3DBLOCKDESC_A_UNORM = SVGA3DBLOCKDESC_A_UINT | SVGA3DBLOCKDESC_NORM, + SVGA3DBLOCKDESC_R_UINT = SVGA3DBLOCKDESC_RED | SVGA3DBLOCKDESC_UINT | + SVGA3DBLOCKDESC_COLOR, + SVGA3DBLOCKDESC_R_UNORM = SVGA3DBLOCKDESC_R_UINT | SVGA3DBLOCKDESC_NORM, + SVGA3DBLOCKDESC_R_SINT = SVGA3DBLOCKDESC_RED | SVGA3DBLOCKDESC_SINT | + SVGA3DBLOCKDESC_COLOR, + SVGA3DBLOCKDESC_R_SNORM = SVGA3DBLOCKDESC_R_SINT | SVGA3DBLOCKDESC_NORM, + SVGA3DBLOCKDESC_G_UINT = SVGA3DBLOCKDESC_GREEN | SVGA3DBLOCKDESC_UINT | + SVGA3DBLOCKDESC_COLOR, + SVGA3DBLOCKDESC_RG_UINT = SVGA3DBLOCKDESC_RED | SVGA3DBLOCKDESC_GREEN | + SVGA3DBLOCKDESC_UINT | SVGA3DBLOCKDESC_COLOR, + SVGA3DBLOCKDESC_RG_UNORM = + SVGA3DBLOCKDESC_RG_UINT | SVGA3DBLOCKDESC_NORM, + SVGA3DBLOCKDESC_RG_SINT = SVGA3DBLOCKDESC_RED | SVGA3DBLOCKDESC_GREEN | + SVGA3DBLOCKDESC_SINT | SVGA3DBLOCKDESC_COLOR, + SVGA3DBLOCKDESC_RG_SNORM = + SVGA3DBLOCKDESC_RG_SINT | SVGA3DBLOCKDESC_NORM, + SVGA3DBLOCKDESC_RGB_UINT = SVGA3DBLOCKDESC_RED | SVGA3DBLOCKDESC_GREEN | + SVGA3DBLOCKDESC_BLUE | SVGA3DBLOCKDESC_UINT | + SVGA3DBLOCKDESC_COLOR, + SVGA3DBLOCKDESC_RGB_SINT = SVGA3DBLOCKDESC_RED | SVGA3DBLOCKDESC_GREEN | + SVGA3DBLOCKDESC_BLUE | SVGA3DBLOCKDESC_SINT | + SVGA3DBLOCKDESC_COLOR, + SVGA3DBLOCKDESC_RGB_UNORM = + SVGA3DBLOCKDESC_RGB_UINT | SVGA3DBLOCKDESC_NORM, + SVGA3DBLOCKDESC_RGB_UNORM_SRGB = + SVGA3DBLOCKDESC_RGB_UNORM | SVGA3DBLOCKDESC_SRGB, + SVGA3DBLOCKDESC_RGBA_UINT = + SVGA3DBLOCKDESC_RED | SVGA3DBLOCKDESC_GREEN | + SVGA3DBLOCKDESC_BLUE | SVGA3DBLOCKDESC_ALPHA | + SVGA3DBLOCKDESC_UINT | SVGA3DBLOCKDESC_COLOR, + SVGA3DBLOCKDESC_RGBA_UNORM = + SVGA3DBLOCKDESC_RGBA_UINT | SVGA3DBLOCKDESC_NORM, + SVGA3DBLOCKDESC_RGBA_UNORM_SRGB = + SVGA3DBLOCKDESC_RGBA_UNORM | SVGA3DBLOCKDESC_SRGB, + SVGA3DBLOCKDESC_RGBA_SINT = + SVGA3DBLOCKDESC_RED | SVGA3DBLOCKDESC_GREEN | + SVGA3DBLOCKDESC_BLUE | SVGA3DBLOCKDESC_ALPHA | + SVGA3DBLOCKDESC_SINT | SVGA3DBLOCKDESC_COLOR, + SVGA3DBLOCKDESC_RGBA_SNORM = + SVGA3DBLOCKDESC_RGBA_SINT | SVGA3DBLOCKDESC_NORM, + SVGA3DBLOCKDESC_RGBA_FP = SVGA3DBLOCKDESC_RED | SVGA3DBLOCKDESC_GREEN | + SVGA3DBLOCKDESC_BLUE | SVGA3DBLOCKDESC_ALPHA | + SVGA3DBLOCKDESC_FP | SVGA3DBLOCKDESC_COLOR, + SVGA3DBLOCKDESC_UV = + SVGA3DBLOCKDESC_U | SVGA3DBLOCKDESC_V | SVGA3DBLOCKDESC_BUMP, + SVGA3DBLOCKDESC_UVL = SVGA3DBLOCKDESC_UV | SVGA3DBLOCKDESC_BUMP_L | + SVGA3DBLOCKDESC_MIXED | SVGA3DBLOCKDESC_BUMP, + SVGA3DBLOCKDESC_UVW = + SVGA3DBLOCKDESC_UV | SVGA3DBLOCKDESC_W | SVGA3DBLOCKDESC_BUMP, + SVGA3DBLOCKDESC_UVWA = SVGA3DBLOCKDESC_UVW | SVGA3DBLOCKDESC_ALPHA | + SVGA3DBLOCKDESC_MIXED | SVGA3DBLOCKDESC_BUMP, + SVGA3DBLOCKDESC_UVWQ = SVGA3DBLOCKDESC_U | SVGA3DBLOCKDESC_V | + SVGA3DBLOCKDESC_W | SVGA3DBLOCKDESC_Q | + SVGA3DBLOCKDESC_BUMP, + SVGA3DBLOCKDESC_L_UNORM = SVGA3DBLOCKDESC_LUMINANCE | + SVGA3DBLOCKDESC_UINT | SVGA3DBLOCKDESC_NORM | + SVGA3DBLOCKDESC_COLOR, + SVGA3DBLOCKDESC_LA_UNORM = SVGA3DBLOCKDESC_LUMINANCE | + SVGA3DBLOCKDESC_ALPHA | + SVGA3DBLOCKDESC_UINT | SVGA3DBLOCKDESC_NORM | + SVGA3DBLOCKDESC_COLOR, + SVGA3DBLOCKDESC_R_FP = SVGA3DBLOCKDESC_RED | SVGA3DBLOCKDESC_FP | + SVGA3DBLOCKDESC_COLOR, + SVGA3DBLOCKDESC_RG_FP = SVGA3DBLOCKDESC_R_FP | SVGA3DBLOCKDESC_GREEN | + SVGA3DBLOCKDESC_COLOR, + SVGA3DBLOCKDESC_RGB_FP = SVGA3DBLOCKDESC_RG_FP | SVGA3DBLOCKDESC_BLUE | + SVGA3DBLOCKDESC_COLOR, + SVGA3DBLOCKDESC_YUV = SVGA3DBLOCKDESC_YUV_VIDEO | SVGA3DBLOCKDESC_COLOR, + SVGA3DBLOCKDESC_AYUV = SVGA3DBLOCKDESC_ALPHA | + SVGA3DBLOCKDESC_YUV_VIDEO | + SVGA3DBLOCKDESC_COLOR, + SVGA3DBLOCKDESC_RGB_EXP = SVGA3DBLOCKDESC_RED | SVGA3DBLOCKDESC_GREEN | + SVGA3DBLOCKDESC_BLUE | SVGA3DBLOCKDESC_EXP | + SVGA3DBLOCKDESC_COLOR, + + SVGA3DBLOCKDESC_COMP_TYPELESS = + SVGA3DBLOCKDESC_COMPRESSED | SVGA3DBLOCKDESC_TYPELESS, + SVGA3DBLOCKDESC_COMP_UNORM = + SVGA3DBLOCKDESC_COMPRESSED | SVGA3DBLOCKDESC_UINT | + SVGA3DBLOCKDESC_NORM | SVGA3DBLOCKDESC_COLOR, + SVGA3DBLOCKDESC_COMP_SNORM = + SVGA3DBLOCKDESC_COMPRESSED | SVGA3DBLOCKDESC_SINT | + SVGA3DBLOCKDESC_NORM | SVGA3DBLOCKDESC_COLOR, + SVGA3DBLOCKDESC_COMP_UNORM_SRGB = + SVGA3DBLOCKDESC_COMP_UNORM | SVGA3DBLOCKDESC_SRGB, + SVGA3DBLOCKDESC_BC1_COMP_TYPELESS = + SVGA3DBLOCKDESC_BC1 | SVGA3DBLOCKDESC_COMP_TYPELESS, + SVGA3DBLOCKDESC_BC1_COMP_UNORM = + SVGA3DBLOCKDESC_BC1 | SVGA3DBLOCKDESC_COMP_UNORM, + SVGA3DBLOCKDESC_BC1_COMP_UNORM_SRGB = + SVGA3DBLOCKDESC_BC1_COMP_UNORM | SVGA3DBLOCKDESC_SRGB, + SVGA3DBLOCKDESC_BC2_COMP_TYPELESS = + SVGA3DBLOCKDESC_BC2 | SVGA3DBLOCKDESC_COMP_TYPELESS, + SVGA3DBLOCKDESC_BC2_COMP_UNORM = + SVGA3DBLOCKDESC_BC2 | SVGA3DBLOCKDESC_COMP_UNORM, + SVGA3DBLOCKDESC_BC2_COMP_UNORM_SRGB = + SVGA3DBLOCKDESC_BC2_COMP_UNORM | SVGA3DBLOCKDESC_SRGB, + SVGA3DBLOCKDESC_BC3_COMP_TYPELESS = + SVGA3DBLOCKDESC_BC3 | SVGA3DBLOCKDESC_COMP_TYPELESS, + SVGA3DBLOCKDESC_BC3_COMP_UNORM = + SVGA3DBLOCKDESC_BC3 | SVGA3DBLOCKDESC_COMP_UNORM, + SVGA3DBLOCKDESC_BC3_COMP_UNORM_SRGB = + SVGA3DBLOCKDESC_BC3_COMP_UNORM | SVGA3DBLOCKDESC_SRGB, + SVGA3DBLOCKDESC_BC4_COMP_TYPELESS = + SVGA3DBLOCKDESC_BC4 | SVGA3DBLOCKDESC_COMP_TYPELESS, + SVGA3DBLOCKDESC_BC4_COMP_UNORM = + SVGA3DBLOCKDESC_BC4 | SVGA3DBLOCKDESC_COMP_UNORM, + SVGA3DBLOCKDESC_BC4_COMP_SNORM = + SVGA3DBLOCKDESC_BC4 | SVGA3DBLOCKDESC_COMP_SNORM, + SVGA3DBLOCKDESC_BC5_COMP_TYPELESS = + SVGA3DBLOCKDESC_BC5 | SVGA3DBLOCKDESC_COMP_TYPELESS, + SVGA3DBLOCKDESC_BC5_COMP_UNORM = + SVGA3DBLOCKDESC_BC5 | SVGA3DBLOCKDESC_COMP_UNORM, + SVGA3DBLOCKDESC_BC5_COMP_SNORM = + SVGA3DBLOCKDESC_BC5 | SVGA3DBLOCKDESC_COMP_SNORM, + SVGA3DBLOCKDESC_BC6H_COMP_TYPELESS = + SVGA3DBLOCKDESC_BC6H | SVGA3DBLOCKDESC_COMP_TYPELESS, + SVGA3DBLOCKDESC_BC6H_COMP_UF16 = + SVGA3DBLOCKDESC_BC6H | SVGA3DBLOCKDESC_COMPRESSED, + SVGA3DBLOCKDESC_BC6H_COMP_SF16 = + SVGA3DBLOCKDESC_BC6H | SVGA3DBLOCKDESC_COMPRESSED, + SVGA3DBLOCKDESC_BC7_COMP_TYPELESS = + SVGA3DBLOCKDESC_BC7 | SVGA3DBLOCKDESC_COMP_TYPELESS, + SVGA3DBLOCKDESC_BC7_COMP_UNORM = + SVGA3DBLOCKDESC_BC7 | SVGA3DBLOCKDESC_COMP_UNORM, + SVGA3DBLOCKDESC_BC7_COMP_UNORM_SRGB = + SVGA3DBLOCKDESC_BC7_COMP_UNORM | SVGA3DBLOCKDESC_SRGB, + + SVGA3DBLOCKDESC_NV12 = + SVGA3DBLOCKDESC_YUV_VIDEO | SVGA3DBLOCKDESC_PLANAR_YUV | + SVGA3DBLOCKDESC_2PLANAR_YUV | SVGA3DBLOCKDESC_COLOR, + SVGA3DBLOCKDESC_YV12 = + SVGA3DBLOCKDESC_YUV_VIDEO | SVGA3DBLOCKDESC_PLANAR_YUV | + SVGA3DBLOCKDESC_3PLANAR_YUV | SVGA3DBLOCKDESC_COLOR, + + SVGA3DBLOCKDESC_DEPTH_UINT = + SVGA3DBLOCKDESC_DEPTH | SVGA3DBLOCKDESC_UINT, + SVGA3DBLOCKDESC_DEPTH_UNORM = + SVGA3DBLOCKDESC_DEPTH_UINT | SVGA3DBLOCKDESC_NORM, + SVGA3DBLOCKDESC_DS = SVGA3DBLOCKDESC_DEPTH | SVGA3DBLOCKDESC_STENCIL, + SVGA3DBLOCKDESC_DS_UINT = SVGA3DBLOCKDESC_DEPTH | + SVGA3DBLOCKDESC_STENCIL | + SVGA3DBLOCKDESC_UINT, + SVGA3DBLOCKDESC_DS_UNORM = + SVGA3DBLOCKDESC_DS_UINT | SVGA3DBLOCKDESC_NORM, + SVGA3DBLOCKDESC_DEPTH_FP = SVGA3DBLOCKDESC_DEPTH | SVGA3DBLOCKDESC_FP, + + SVGA3DBLOCKDESC_UV_UINT = SVGA3DBLOCKDESC_UV | SVGA3DBLOCKDESC_UINT, + SVGA3DBLOCKDESC_UV_SNORM = SVGA3DBLOCKDESC_UV | SVGA3DBLOCKDESC_SINT | + SVGA3DBLOCKDESC_NORM, + SVGA3DBLOCKDESC_UVCX_SNORM = + SVGA3DBLOCKDESC_UV_SNORM | SVGA3DBLOCKDESC_CX, SVGA3DBLOCKDESC_UVWQ_SNORM = SVGA3DBLOCKDESC_UVWQ | SVGA3DBLOCKDESC_SINT | SVGA3DBLOCKDESC_NORM, -}; +} SVGA3dBlockDesc; -struct svga3d_channel_def { +typedef struct SVGA3dChannelDef { union { - u8 blue; - u8 w_bump; - u8 l_bump; - u8 uv_video; - u8 u_video; + uint8 blue; + uint8 w_bump; + uint8 l_bump; + uint8 uv_video; + uint8 u_video; }; union { - u8 green; - u8 stencil; - u8 v_bump; - u8 v_video; + uint8 green; + uint8 stencil; + uint8 v_bump; + uint8 v_video; }; union { - u8 red; - u8 u_bump; - u8 luminance; - u8 y_video; - u8 depth; - u8 data; + uint8 red; + uint8 u_bump; + uint8 luminance; + uint8 y_video; + uint8 depth; + uint8 data; }; union { - u8 alpha; - u8 q_bump; - u8 exp; + uint8 alpha; + uint8 q_bump; + uint8 exp; }; -}; +} SVGA3dChannelDef; -/* - * struct svga3d_surface_desc - describes the actual pixel data. - * - * @format: Format - * @block_desc: Block description - * @block_size: Dimensions in pixels of a block - * @bytes_per_block: Size of block in bytes - * @pitch_bytes_per_block: Size of a block in bytes for purposes of pitch - * @bit_depth: Channel bit depths - * @bit_offset: Channel bit masks (in bits offset from the start of the pointer) - */ -struct svga3d_surface_desc { +typedef struct SVGA3dSurfaceDesc { SVGA3dSurfaceFormat format; - enum svga3d_block_desc block_desc; - - surf_size_struct block_size; - u32 bytes_per_block; - u32 pitch_bytes_per_block; - - struct svga3d_channel_def bit_depth; - struct svga3d_channel_def bit_offset; -}; - -static const struct svga3d_surface_desc svga3d_surface_descs[] = { - {SVGA3D_FORMAT_INVALID, SVGA3DBLOCKDESC_NONE, - {1, 1, 1}, 0, 0, - {{0}, {0}, {0}, {0}}, - {{0}, {0}, {0}, {0}}}, - - {SVGA3D_X8R8G8B8, SVGA3DBLOCKDESC_RGB_UNORM, - {1, 1, 1}, 4, 4, - {{8}, {8}, {8}, {0}}, - {{0}, {8}, {16}, {24}}}, - - {SVGA3D_A8R8G8B8, SVGA3DBLOCKDESC_RGBA_UNORM, - {1, 1, 1}, 4, 4, - {{8}, {8}, {8}, {8}}, - {{0}, {8}, {16}, {24}}}, - - {SVGA3D_R5G6B5, SVGA3DBLOCKDESC_RGB_UNORM, - {1, 1, 1}, 2, 2, - {{5}, {6}, {5}, {0}}, - {{0}, {5}, {11}, {0}}}, - - {SVGA3D_X1R5G5B5, SVGA3DBLOCKDESC_RGB_UNORM, - {1, 1, 1}, 2, 2, - {{5}, {5}, {5}, {0}}, - {{0}, {5}, {10}, {0}}}, - - {SVGA3D_A1R5G5B5, SVGA3DBLOCKDESC_RGBA_UNORM, - {1, 1, 1}, 2, 2, - {{5}, {5}, {5}, {1}}, - {{0}, {5}, {10}, {15}}}, - - {SVGA3D_A4R4G4B4, SVGA3DBLOCKDESC_RGBA_UNORM, - {1, 1, 1}, 2, 2, - {{4}, {4}, {4}, {4}}, - {{0}, {4}, {8}, {12}}}, - - {SVGA3D_Z_D32, SVGA3DBLOCKDESC_DEPTH_UNORM, - {1, 1, 1}, 4, 4, - {{0}, {0}, {32}, {0}}, - {{0}, {0}, {0}, {0}}}, - - {SVGA3D_Z_D16, SVGA3DBLOCKDESC_DEPTH_UNORM, - {1, 1, 1}, 2, 2, - {{0}, {0}, {16}, {0}}, - {{0}, {0}, {0}, {0}}}, - - {SVGA3D_Z_D24S8, SVGA3DBLOCKDESC_DS_UNORM, - {1, 1, 1}, 4, 4, - {{0}, {8}, {24}, {0}}, - {{0}, {0}, {8}, {0}}}, - - {SVGA3D_Z_D15S1, SVGA3DBLOCKDESC_DS_UNORM, - {1, 1, 1}, 2, 2, - {{0}, {1}, {15}, {0}}, - {{0}, {0}, {1}, {0}}}, - - {SVGA3D_LUMINANCE8, SVGA3DBLOCKDESC_L_UNORM, - {1, 1, 1}, 1, 1, - {{0}, {0}, {8}, {0}}, - {{0}, {0}, {0}, {0}}}, - - {SVGA3D_LUMINANCE4_ALPHA4, SVGA3DBLOCKDESC_LA_UNORM, - {1, 1, 1}, 1, 1, - {{0}, {0}, {4}, {4}}, - {{0}, {0}, {0}, {4}}}, - - {SVGA3D_LUMINANCE16, SVGA3DBLOCKDESC_L_UNORM, - {1, 1, 1}, 2, 2, - {{0}, {0}, {16}, {0}}, - {{0}, {0}, {0}, {0}}}, - - {SVGA3D_LUMINANCE8_ALPHA8, SVGA3DBLOCKDESC_LA_UNORM, - {1, 1, 1}, 2, 2, - {{0}, {0}, {8}, {8}}, - {{0}, {0}, {0}, {8}}}, - - {SVGA3D_DXT1, SVGA3DBLOCKDESC_BC1_COMP_UNORM, - {4, 4, 1}, 8, 8, - {{0}, {0}, {64}, {0}}, - {{0}, {0}, {0}, {0}}}, - - {SVGA3D_DXT2, SVGA3DBLOCKDESC_BC2_COMP_UNORM, - {4, 4, 1}, 16, 16, - {{0}, {0}, {128}, {0}}, - {{0}, {0}, {0}, {0}}}, - - {SVGA3D_DXT3, SVGA3DBLOCKDESC_BC2_COMP_UNORM, - {4, 4, 1}, 16, 16, - {{0}, {0}, {128}, {0}}, - {{0}, {0}, {0}, {0}}}, - - {SVGA3D_DXT4, SVGA3DBLOCKDESC_BC3_COMP_UNORM, - {4, 4, 1}, 16, 16, - {{0}, {0}, {128}, {0}}, - {{0}, {0}, {0}, {0}}}, - - {SVGA3D_DXT5, SVGA3DBLOCKDESC_BC3_COMP_UNORM, - {4, 4, 1}, 16, 16, - {{0}, {0}, {128}, {0}}, - {{0}, {0}, {0}, {0}}}, - - {SVGA3D_BUMPU8V8, SVGA3DBLOCKDESC_UV_SNORM, - {1, 1, 1}, 2, 2, - {{0}, {8}, {8}, {0}}, - {{0}, {8}, {0}, {0}}}, - - {SVGA3D_BUMPL6V5U5, SVGA3DBLOCKDESC_UVL, - {1, 1, 1}, 2, 2, - {{6}, {5}, {5}, {0}}, - {{10}, {5}, {0}, {0}}}, - - {SVGA3D_BUMPX8L8V8U8, SVGA3DBLOCKDESC_UVL, - {1, 1, 1}, 4, 4, - {{8}, {8}, {8}, {0}}, - {{16}, {8}, {0}, {0}}}, - - {SVGA3D_FORMAT_DEAD1, SVGA3DBLOCKDESC_NONE, - {1, 1, 1}, 3, 3, - {{8}, {8}, {8}, {0}}, - {{16}, {8}, {0}, {0}}}, - - {SVGA3D_ARGB_S10E5, SVGA3DBLOCKDESC_RGBA_FP, - {1, 1, 1}, 8, 8, - {{16}, {16}, {16}, {16}}, - {{32}, {16}, {0}, {48}}}, - - {SVGA3D_ARGB_S23E8, SVGA3DBLOCKDESC_RGBA_FP, - {1, 1, 1}, 16, 16, - {{32}, {32}, {32}, {32}}, - {{64}, {32}, {0}, {96}}}, - - {SVGA3D_A2R10G10B10, SVGA3DBLOCKDESC_RGBA_UNORM, - {1, 1, 1}, 4, 4, - {{10}, {10}, {10}, {2}}, - {{0}, {10}, {20}, {30}}}, - - {SVGA3D_V8U8, SVGA3DBLOCKDESC_UV_SNORM, - {1, 1, 1}, 2, 2, - {{0}, {8}, {8}, {0}}, - {{0}, {8}, {0}, {0}}}, - - {SVGA3D_Q8W8V8U8, SVGA3DBLOCKDESC_UVWQ_SNORM, - {1, 1, 1}, 4, 4, - {{8}, {8}, {8}, {8}}, - {{16}, {8}, {0}, {24}}}, - - {SVGA3D_CxV8U8, SVGA3DBLOCKDESC_UVCX_SNORM, - {1, 1, 1}, 2, 2, - {{0}, {8}, {8}, {0}}, - {{0}, {8}, {0}, {0}}}, - - {SVGA3D_X8L8V8U8, SVGA3DBLOCKDESC_UVL, - {1, 1, 1}, 4, 4, - {{8}, {8}, {8}, {0}}, - {{16}, {8}, {0}, {0}}}, - - {SVGA3D_A2W10V10U10, SVGA3DBLOCKDESC_UVWA, - {1, 1, 1}, 4, 4, - {{10}, {10}, {10}, {2}}, - {{20}, {10}, {0}, {30}}}, - - {SVGA3D_ALPHA8, SVGA3DBLOCKDESC_A_UNORM, - {1, 1, 1}, 1, 1, - {{0}, {0}, {0}, {8}}, - {{0}, {0}, {0}, {0}}}, - - {SVGA3D_R_S10E5, SVGA3DBLOCKDESC_R_FP, - {1, 1, 1}, 2, 2, - {{0}, {0}, {16}, {0}}, - {{0}, {0}, {0}, {0}}}, - - {SVGA3D_R_S23E8, SVGA3DBLOCKDESC_R_FP, - {1, 1, 1}, 4, 4, - {{0}, {0}, {32}, {0}}, - {{0}, {0}, {0}, {0}}}, - - {SVGA3D_RG_S10E5, SVGA3DBLOCKDESC_RG_FP, - {1, 1, 1}, 4, 4, - {{0}, {16}, {16}, {0}}, - {{0}, {16}, {0}, {0}}}, - - {SVGA3D_RG_S23E8, SVGA3DBLOCKDESC_RG_FP, - {1, 1, 1}, 8, 8, - {{0}, {32}, {32}, {0}}, - {{0}, {32}, {0}, {0}}}, - - {SVGA3D_BUFFER, SVGA3DBLOCKDESC_BUFFER, - {1, 1, 1}, 1, 1, - {{0}, {0}, {8}, {0}}, - {{0}, {0}, {0}, {0}}}, - - {SVGA3D_Z_D24X8, SVGA3DBLOCKDESC_DEPTH_UNORM, - {1, 1, 1}, 4, 4, - {{0}, {0}, {24}, {0}}, - {{0}, {0}, {8}, {0}}}, - - {SVGA3D_V16U16, SVGA3DBLOCKDESC_UV_SNORM, - {1, 1, 1}, 4, 4, - {{0}, {16}, {16}, {0}}, - {{0}, {16}, {0}, {0}}}, - - {SVGA3D_G16R16, SVGA3DBLOCKDESC_RG_UNORM, - {1, 1, 1}, 4, 4, - {{0}, {16}, {16}, {0}}, - {{0}, {16}, {0}, {0}}}, - - {SVGA3D_A16B16G16R16, SVGA3DBLOCKDESC_RGBA_UNORM, - {1, 1, 1}, 8, 8, - {{16}, {16}, {16}, {16}}, - {{32}, {16}, {0}, {48}}}, - - {SVGA3D_UYVY, SVGA3DBLOCKDESC_YUV, - {2, 1, 1}, 4, 4, - {{8}, {0}, {8}, {0}}, - {{0}, {0}, {8}, {0}}}, - - {SVGA3D_YUY2, SVGA3DBLOCKDESC_YUV, - {2, 1, 1}, 4, 4, - {{8}, {0}, {8}, {0}}, - {{8}, {0}, {0}, {0}}}, - - {SVGA3D_NV12, SVGA3DBLOCKDESC_NV12, - {2, 2, 1}, 6, 2, - {{0}, {0}, {48}, {0}}, - {{0}, {0}, {0}, {0}}}, - - {SVGA3D_FORMAT_DEAD2, SVGA3DBLOCKDESC_NONE, - {1, 1, 1}, 4, 4, - {{8}, {8}, {8}, {8}}, - {{0}, {8}, {16}, {24}}}, - - {SVGA3D_R32G32B32A32_TYPELESS, SVGA3DBLOCKDESC_TYPELESS, - {1, 1, 1}, 16, 16, - {{32}, {32}, {32}, {32}}, - {{64}, {32}, {0}, {96}}}, - - {SVGA3D_R32G32B32A32_UINT, SVGA3DBLOCKDESC_RGBA_UINT, - {1, 1, 1}, 16, 16, - {{32}, {32}, {32}, {32}}, - {{64}, {32}, {0}, {96}}}, - - {SVGA3D_R32G32B32A32_SINT, SVGA3DBLOCKDESC_RGBA_SINT, - {1, 1, 1}, 16, 16, - {{32}, {32}, {32}, {32}}, - {{64}, {32}, {0}, {96}}}, - - {SVGA3D_R32G32B32_TYPELESS, SVGA3DBLOCKDESC_TYPELESS, - {1, 1, 1}, 12, 12, - {{32}, {32}, {32}, {0}}, - {{64}, {32}, {0}, {0}}}, - - {SVGA3D_R32G32B32_FLOAT, SVGA3DBLOCKDESC_RGB_FP, - {1, 1, 1}, 12, 12, - {{32}, {32}, {32}, {0}}, - {{64}, {32}, {0}, {0}}}, - - {SVGA3D_R32G32B32_UINT, SVGA3DBLOCKDESC_RGB_UINT, - {1, 1, 1}, 12, 12, - {{32}, {32}, {32}, {0}}, - {{64}, {32}, {0}, {0}}}, - - {SVGA3D_R32G32B32_SINT, SVGA3DBLOCKDESC_RGB_SINT, - {1, 1, 1}, 12, 12, - {{32}, {32}, {32}, {0}}, - {{64}, {32}, {0}, {0}}}, - - {SVGA3D_R16G16B16A16_TYPELESS, SVGA3DBLOCKDESC_TYPELESS, - {1, 1, 1}, 8, 8, - {{16}, {16}, {16}, {16}}, - {{32}, {16}, {0}, {48}}}, - - {SVGA3D_R16G16B16A16_UINT, SVGA3DBLOCKDESC_RGBA_UINT, - {1, 1, 1}, 8, 8, - {{16}, {16}, {16}, {16}}, - {{32}, {16}, {0}, {48}}}, - - {SVGA3D_R16G16B16A16_SNORM, SVGA3DBLOCKDESC_RGBA_SNORM, - {1, 1, 1}, 8, 8, - {{16}, {16}, {16}, {16}}, - {{32}, {16}, {0}, {48}}}, - - {SVGA3D_R16G16B16A16_SINT, SVGA3DBLOCKDESC_RGBA_SINT, - {1, 1, 1}, 8, 8, - {{16}, {16}, {16}, {16}}, - {{32}, {16}, {0}, {48}}}, - - {SVGA3D_R32G32_TYPELESS, SVGA3DBLOCKDESC_TYPELESS, - {1, 1, 1}, 8, 8, - {{0}, {32}, {32}, {0}}, - {{0}, {32}, {0}, {0}}}, - - {SVGA3D_R32G32_UINT, SVGA3DBLOCKDESC_RG_UINT, - {1, 1, 1}, 8, 8, - {{0}, {32}, {32}, {0}}, - {{0}, {32}, {0}, {0}}}, - - {SVGA3D_R32G32_SINT, SVGA3DBLOCKDESC_RG_SINT, - {1, 1, 1}, 8, 8, - {{0}, {32}, {32}, {0}}, - {{0}, {32}, {0}, {0}}}, - - {SVGA3D_R32G8X24_TYPELESS, SVGA3DBLOCKDESC_TYPELESS, - {1, 1, 1}, 8, 8, - {{0}, {8}, {32}, {0}}, - {{0}, {32}, {0}, {0}}}, - - {SVGA3D_D32_FLOAT_S8X24_UINT, SVGA3DBLOCKDESC_DS, - {1, 1, 1}, 8, 8, - {{0}, {8}, {32}, {0}}, - {{0}, {32}, {0}, {0}}}, - - {SVGA3D_R32_FLOAT_X8X24, SVGA3DBLOCKDESC_R_FP, - {1, 1, 1}, 8, 8, - {{0}, {0}, {32}, {0}}, - {{0}, {0}, {0}, {0}}}, - - {SVGA3D_X32_G8X24_UINT, SVGA3DBLOCKDESC_G_UINT, - {1, 1, 1}, 8, 8, - {{0}, {8}, {0}, {0}}, - {{0}, {32}, {0}, {0}}}, - - {SVGA3D_R10G10B10A2_TYPELESS, SVGA3DBLOCKDESC_TYPELESS, - {1, 1, 1}, 4, 4, - {{10}, {10}, {10}, {2}}, - {{20}, {10}, {0}, {30}}}, - - {SVGA3D_R10G10B10A2_UINT, SVGA3DBLOCKDESC_RGBA_UINT, - {1, 1, 1}, 4, 4, - {{10}, {10}, {10}, {2}}, - {{20}, {10}, {0}, {30}}}, - - {SVGA3D_R11G11B10_FLOAT, SVGA3DBLOCKDESC_RGB_FP, - {1, 1, 1}, 4, 4, - {{10}, {11}, {11}, {0}}, - {{22}, {11}, {0}, {0}}}, - - {SVGA3D_R8G8B8A8_TYPELESS, SVGA3DBLOCKDESC_TYPELESS, - {1, 1, 1}, 4, 4, - {{8}, {8}, {8}, {8}}, - {{16}, {8}, {0}, {24}}}, - - {SVGA3D_R8G8B8A8_UNORM, SVGA3DBLOCKDESC_RGBA_UNORM, - {1, 1, 1}, 4, 4, - {{8}, {8}, {8}, {8}}, - {{16}, {8}, {0}, {24}}}, - - {SVGA3D_R8G8B8A8_UNORM_SRGB, SVGA3DBLOCKDESC_RGBA_UNORM_SRGB, - {1, 1, 1}, 4, 4, - {{8}, {8}, {8}, {8}}, - {{16}, {8}, {0}, {24}}}, - - {SVGA3D_R8G8B8A8_UINT, SVGA3DBLOCKDESC_RGBA_UINT, - {1, 1, 1}, 4, 4, - {{8}, {8}, {8}, {8}}, - {{16}, {8}, {0}, {24}}}, - - {SVGA3D_R8G8B8A8_SINT, SVGA3DBLOCKDESC_RGBA_SINT, - {1, 1, 1}, 4, 4, - {{8}, {8}, {8}, {8}}, - {{16}, {8}, {0}, {24}}}, - - {SVGA3D_R16G16_TYPELESS, SVGA3DBLOCKDESC_TYPELESS, - {1, 1, 1}, 4, 4, - {{0}, {16}, {16}, {0}}, - {{0}, {16}, {0}, {0}}}, - - {SVGA3D_R16G16_UINT, SVGA3DBLOCKDESC_RG_UINT, - {1, 1, 1}, 4, 4, - {{0}, {16}, {16}, {0}}, - {{0}, {16}, {0}, {0}}}, - - {SVGA3D_R16G16_SINT, SVGA3DBLOCKDESC_RG_SINT, - {1, 1, 1}, 4, 4, - {{0}, {16}, {16}, {0}}, - {{0}, {16}, {0}, {0}}}, - - {SVGA3D_R32_TYPELESS, SVGA3DBLOCKDESC_TYPELESS, - {1, 1, 1}, 4, 4, - {{0}, {0}, {32}, {0}}, - {{0}, {0}, {0}, {0}}}, - - {SVGA3D_D32_FLOAT, SVGA3DBLOCKDESC_DEPTH_FP, - {1, 1, 1}, 4, 4, - {{0}, {0}, {32}, {0}}, - {{0}, {0}, {0}, {0}}}, - - {SVGA3D_R32_UINT, SVGA3DBLOCKDESC_R_UINT, - {1, 1, 1}, 4, 4, - {{0}, {0}, {32}, {0}}, - {{0}, {0}, {0}, {0}}}, - - {SVGA3D_R32_SINT, SVGA3DBLOCKDESC_R_SINT, - {1, 1, 1}, 4, 4, - {{0}, {0}, {32}, {0}}, - {{0}, {0}, {0}, {0}}}, - - {SVGA3D_R24G8_TYPELESS, SVGA3DBLOCKDESC_TYPELESS, - {1, 1, 1}, 4, 4, - {{0}, {8}, {24}, {0}}, - {{0}, {24}, {0}, {0}}}, - - {SVGA3D_D24_UNORM_S8_UINT, SVGA3DBLOCKDESC_DS_UNORM, - {1, 1, 1}, 4, 4, - {{0}, {8}, {24}, {0}}, - {{0}, {24}, {0}, {0}}}, - - {SVGA3D_R24_UNORM_X8, SVGA3DBLOCKDESC_R_UNORM, - {1, 1, 1}, 4, 4, - {{0}, {0}, {24}, {0}}, - {{0}, {0}, {0}, {0}}}, - - {SVGA3D_X24_G8_UINT, SVGA3DBLOCKDESC_G_UINT, - {1, 1, 1}, 4, 4, - {{0}, {8}, {0}, {0}}, - {{0}, {24}, {0}, {0}}}, - - {SVGA3D_R8G8_TYPELESS, SVGA3DBLOCKDESC_TYPELESS, - {1, 1, 1}, 2, 2, - {{0}, {8}, {8}, {0}}, - {{0}, {8}, {0}, {0}}}, - - {SVGA3D_R8G8_UNORM, SVGA3DBLOCKDESC_RG_UNORM, - {1, 1, 1}, 2, 2, - {{0}, {8}, {8}, {0}}, - {{0}, {8}, {0}, {0}}}, - - {SVGA3D_R8G8_UINT, SVGA3DBLOCKDESC_RG_UINT, - {1, 1, 1}, 2, 2, - {{0}, {8}, {8}, {0}}, - {{0}, {8}, {0}, {0}}}, - - {SVGA3D_R8G8_SINT, SVGA3DBLOCKDESC_RG_SINT, - {1, 1, 1}, 2, 2, - {{0}, {8}, {8}, {0}}, - {{0}, {8}, {0}, {0}}}, - - {SVGA3D_R16_TYPELESS, SVGA3DBLOCKDESC_TYPELESS, - {1, 1, 1}, 2, 2, - {{0}, {0}, {16}, {0}}, - {{0}, {0}, {0}, {0}}}, - - {SVGA3D_R16_UNORM, SVGA3DBLOCKDESC_R_UNORM, - {1, 1, 1}, 2, 2, - {{0}, {0}, {16}, {0}}, - {{0}, {0}, {0}, {0}}}, - - {SVGA3D_R16_UINT, SVGA3DBLOCKDESC_R_UINT, - {1, 1, 1}, 2, 2, - {{0}, {0}, {16}, {0}}, - {{0}, {0}, {0}, {0}}}, - - {SVGA3D_R16_SNORM, SVGA3DBLOCKDESC_R_SNORM, - {1, 1, 1}, 2, 2, - {{0}, {0}, {16}, {0}}, - {{0}, {0}, {0}, {0}}}, - - {SVGA3D_R16_SINT, SVGA3DBLOCKDESC_R_SINT, - {1, 1, 1}, 2, 2, - {{0}, {0}, {16}, {0}}, - {{0}, {0}, {0}, {0}}}, - - {SVGA3D_R8_TYPELESS, SVGA3DBLOCKDESC_TYPELESS, - {1, 1, 1}, 1, 1, - {{0}, {0}, {8}, {0}}, - {{0}, {0}, {0}, {0}}}, - - {SVGA3D_R8_UNORM, SVGA3DBLOCKDESC_R_UNORM, - {1, 1, 1}, 1, 1, - {{0}, {0}, {8}, {0}}, - {{0}, {0}, {0}, {0}}}, - - {SVGA3D_R8_UINT, SVGA3DBLOCKDESC_R_UINT, - {1, 1, 1}, 1, 1, - {{0}, {0}, {8}, {0}}, - {{0}, {0}, {0}, {0}}}, - - {SVGA3D_R8_SNORM, SVGA3DBLOCKDESC_R_SNORM, - {1, 1, 1}, 1, 1, - {{0}, {0}, {8}, {0}}, - {{0}, {0}, {0}, {0}}}, - - {SVGA3D_R8_SINT, SVGA3DBLOCKDESC_R_SINT, - {1, 1, 1}, 1, 1, - {{0}, {0}, {8}, {0}}, - {{0}, {0}, {0}, {0}}}, - - {SVGA3D_P8, SVGA3DBLOCKDESC_NONE, - {1, 1, 1}, 1, 1, - {{0}, {0}, {8}, {0}}, - {{0}, {0}, {0}, {0}}}, - - {SVGA3D_R9G9B9E5_SHAREDEXP, SVGA3DBLOCKDESC_RGB_EXP, - {1, 1, 1}, 4, 4, - {{9}, {9}, {9}, {5}}, - {{18}, {9}, {0}, {27}}}, - - {SVGA3D_R8G8_B8G8_UNORM, SVGA3DBLOCKDESC_NONE, - {2, 1, 1}, 4, 4, - {{0}, {8}, {8}, {0}}, - {{0}, {0}, {8}, {0}}}, - - {SVGA3D_G8R8_G8B8_UNORM, SVGA3DBLOCKDESC_NONE, - {2, 1, 1}, 4, 4, - {{0}, {8}, {8}, {0}}, - {{0}, {8}, {0}, {0}}}, - - {SVGA3D_BC1_TYPELESS, SVGA3DBLOCKDESC_BC1_COMP_TYPELESS, - {4, 4, 1}, 8, 8, - {{0}, {0}, {64}, {0}}, - {{0}, {0}, {0}, {0}}}, - - {SVGA3D_BC1_UNORM_SRGB, SVGA3DBLOCKDESC_BC1_COMP_UNORM_SRGB, - {4, 4, 1}, 8, 8, - {{0}, {0}, {64}, {0}}, - {{0}, {0}, {0}, {0}}}, - - {SVGA3D_BC2_TYPELESS, SVGA3DBLOCKDESC_BC2_COMP_TYPELESS, - {4, 4, 1}, 16, 16, - {{0}, {0}, {128}, {0}}, - {{0}, {0}, {0}, {0}}}, - - {SVGA3D_BC2_UNORM_SRGB, SVGA3DBLOCKDESC_BC2_COMP_UNORM_SRGB, - {4, 4, 1}, 16, 16, - {{0}, {0}, {128}, {0}}, - {{0}, {0}, {0}, {0}}}, - - {SVGA3D_BC3_TYPELESS, SVGA3DBLOCKDESC_BC3_COMP_TYPELESS, - {4, 4, 1}, 16, 16, - {{0}, {0}, {128}, {0}}, - {{0}, {0}, {0}, {0}}}, - - {SVGA3D_BC3_UNORM_SRGB, SVGA3DBLOCKDESC_BC3_COMP_UNORM_SRGB, - {4, 4, 1}, 16, 16, - {{0}, {0}, {128}, {0}}, - {{0}, {0}, {0}, {0}}}, - - {SVGA3D_BC4_TYPELESS, SVGA3DBLOCKDESC_BC4_COMP_TYPELESS, - {4, 4, 1}, 8, 8, - {{0}, {0}, {64}, {0}}, - {{0}, {0}, {0}, {0}}}, - - {SVGA3D_ATI1, SVGA3DBLOCKDESC_BC4_COMP_UNORM, - {4, 4, 1}, 8, 8, - {{0}, {0}, {64}, {0}}, - {{0}, {0}, {0}, {0}}}, - - {SVGA3D_BC4_SNORM, SVGA3DBLOCKDESC_BC4_COMP_SNORM, - {4, 4, 1}, 8, 8, - {{0}, {0}, {64}, {0}}, - {{0}, {0}, {0}, {0}}}, - - {SVGA3D_BC5_TYPELESS, SVGA3DBLOCKDESC_BC5_COMP_TYPELESS, - {4, 4, 1}, 16, 16, - {{0}, {0}, {128}, {0}}, - {{0}, {0}, {0}, {0}}}, - - {SVGA3D_ATI2, SVGA3DBLOCKDESC_BC5_COMP_UNORM, - {4, 4, 1}, 16, 16, - {{0}, {0}, {128}, {0}}, - {{0}, {0}, {0}, {0}}}, - - {SVGA3D_BC5_SNORM, SVGA3DBLOCKDESC_BC5_COMP_SNORM, - {4, 4, 1}, 16, 16, - {{0}, {0}, {128}, {0}}, - {{0}, {0}, {0}, {0}}}, - - {SVGA3D_R10G10B10_XR_BIAS_A2_UNORM, SVGA3DBLOCKDESC_RGBA_UNORM, - {1, 1, 1}, 4, 4, - {{10}, {10}, {10}, {2}}, - {{20}, {10}, {0}, {30}}}, - - {SVGA3D_B8G8R8A8_TYPELESS, SVGA3DBLOCKDESC_TYPELESS, - {1, 1, 1}, 4, 4, - {{8}, {8}, {8}, {8}}, - {{0}, {8}, {16}, {24}}}, - - {SVGA3D_B8G8R8A8_UNORM_SRGB, SVGA3DBLOCKDESC_RGBA_UNORM_SRGB, - {1, 1, 1}, 4, 4, - {{8}, {8}, {8}, {8}}, - {{0}, {8}, {16}, {24}}}, - - {SVGA3D_B8G8R8X8_TYPELESS, SVGA3DBLOCKDESC_TYPELESS, - {1, 1, 1}, 4, 4, - {{8}, {8}, {8}, {0}}, - {{0}, {8}, {16}, {24}}}, - - {SVGA3D_B8G8R8X8_UNORM_SRGB, SVGA3DBLOCKDESC_RGB_UNORM_SRGB, - {1, 1, 1}, 4, 4, - {{8}, {8}, {8}, {0}}, - {{0}, {8}, {16}, {24}}}, - - {SVGA3D_Z_DF16, SVGA3DBLOCKDESC_DEPTH_UNORM, - {1, 1, 1}, 2, 2, - {{0}, {0}, {16}, {0}}, - {{0}, {0}, {0}, {0}}}, - - {SVGA3D_Z_DF24, SVGA3DBLOCKDESC_DEPTH_UNORM, - {1, 1, 1}, 4, 4, - {{0}, {0}, {24}, {0}}, - {{0}, {0}, {8}, {0}}}, - - {SVGA3D_Z_D24S8_INT, SVGA3DBLOCKDESC_DS_UNORM, - {1, 1, 1}, 4, 4, - {{0}, {8}, {24}, {0}}, - {{0}, {0}, {8}, {0}}}, - - {SVGA3D_YV12, SVGA3DBLOCKDESC_YV12, - {2, 2, 1}, 6, 2, - {{0}, {0}, {48}, {0}}, - {{0}, {0}, {0}, {0}}}, - - {SVGA3D_R32G32B32A32_FLOAT, SVGA3DBLOCKDESC_RGBA_FP, - {1, 1, 1}, 16, 16, - {{32}, {32}, {32}, {32}}, - {{64}, {32}, {0}, {96}}}, - - {SVGA3D_R16G16B16A16_FLOAT, SVGA3DBLOCKDESC_RGBA_FP, - {1, 1, 1}, 8, 8, - {{16}, {16}, {16}, {16}}, - {{32}, {16}, {0}, {48}}}, - - {SVGA3D_R16G16B16A16_UNORM, SVGA3DBLOCKDESC_RGBA_UNORM, - {1, 1, 1}, 8, 8, - {{16}, {16}, {16}, {16}}, - {{32}, {16}, {0}, {48}}}, - - {SVGA3D_R32G32_FLOAT, SVGA3DBLOCKDESC_RG_FP, - {1, 1, 1}, 8, 8, - {{0}, {32}, {32}, {0}}, - {{0}, {32}, {0}, {0}}}, - - {SVGA3D_R10G10B10A2_UNORM, SVGA3DBLOCKDESC_RGBA_UNORM, - {1, 1, 1}, 4, 4, - {{10}, {10}, {10}, {2}}, - {{20}, {10}, {0}, {30}}}, - - {SVGA3D_R8G8B8A8_SNORM, SVGA3DBLOCKDESC_RGBA_SNORM, - {1, 1, 1}, 4, 4, - {{8}, {8}, {8}, {8}}, - {{16}, {8}, {0}, {24}}}, - - {SVGA3D_R16G16_FLOAT, SVGA3DBLOCKDESC_RG_FP, - {1, 1, 1}, 4, 4, - {{0}, {16}, {16}, {0}}, - {{0}, {16}, {0}, {0}}}, - - {SVGA3D_R16G16_UNORM, SVGA3DBLOCKDESC_RG_UNORM, - {1, 1, 1}, 4, 4, - {{0}, {16}, {16}, {0}}, - {{0}, {16}, {0}, {0}}}, - - {SVGA3D_R16G16_SNORM, SVGA3DBLOCKDESC_RG_SNORM, - {1, 1, 1}, 4, 4, - {{0}, {16}, {16}, {0}}, - {{0}, {16}, {0}, {0}}}, - - {SVGA3D_R32_FLOAT, SVGA3DBLOCKDESC_R_FP, - {1, 1, 1}, 4, 4, - {{0}, {0}, {32}, {0}}, - {{0}, {0}, {0}, {0}}}, - - {SVGA3D_R8G8_SNORM, SVGA3DBLOCKDESC_RG_SNORM, - {1, 1, 1}, 2, 2, - {{0}, {8}, {8}, {0}}, - {{0}, {8}, {0}, {0}}}, - - {SVGA3D_R16_FLOAT, SVGA3DBLOCKDESC_R_FP, - {1, 1, 1}, 2, 2, - {{0}, {0}, {16}, {0}}, - {{0}, {0}, {0}, {0}}}, - - {SVGA3D_D16_UNORM, SVGA3DBLOCKDESC_DEPTH_UNORM, - {1, 1, 1}, 2, 2, - {{0}, {0}, {16}, {0}}, - {{0}, {0}, {0}, {0}}}, - - {SVGA3D_A8_UNORM, SVGA3DBLOCKDESC_A_UNORM, - {1, 1, 1}, 1, 1, - {{0}, {0}, {0}, {8}}, - {{0}, {0}, {0}, {0}}}, - - {SVGA3D_BC1_UNORM, SVGA3DBLOCKDESC_BC1_COMP_UNORM, - {4, 4, 1}, 8, 8, - {{0}, {0}, {64}, {0}}, - {{0}, {0}, {0}, {0}}}, - - {SVGA3D_BC2_UNORM, SVGA3DBLOCKDESC_BC2_COMP_UNORM, - {4, 4, 1}, 16, 16, - {{0}, {0}, {128}, {0}}, - {{0}, {0}, {0}, {0}}}, - - {SVGA3D_BC3_UNORM, SVGA3DBLOCKDESC_BC3_COMP_UNORM, - {4, 4, 1}, 16, 16, - {{0}, {0}, {128}, {0}}, - {{0}, {0}, {0}, {0}}}, - - {SVGA3D_B5G6R5_UNORM, SVGA3DBLOCKDESC_RGB_UNORM, - {1, 1, 1}, 2, 2, - {{5}, {6}, {5}, {0}}, - {{0}, {5}, {11}, {0}}}, - - {SVGA3D_B5G5R5A1_UNORM, SVGA3DBLOCKDESC_RGBA_UNORM, - {1, 1, 1}, 2, 2, - {{5}, {5}, {5}, {1}}, - {{0}, {5}, {10}, {15}}}, - - {SVGA3D_B8G8R8A8_UNORM, SVGA3DBLOCKDESC_RGBA_UNORM, - {1, 1, 1}, 4, 4, - {{8}, {8}, {8}, {8}}, - {{0}, {8}, {16}, {24}}}, - - {SVGA3D_B8G8R8X8_UNORM, SVGA3DBLOCKDESC_RGB_UNORM, - {1, 1, 1}, 4, 4, - {{8}, {8}, {8}, {0}}, - {{0}, {8}, {16}, {24}}}, - - {SVGA3D_BC4_UNORM, SVGA3DBLOCKDESC_BC4_COMP_UNORM, - {4, 4, 1}, 8, 8, - {{0}, {0}, {64}, {0}}, - {{0}, {0}, {0}, {0}}}, - - {SVGA3D_BC5_UNORM, SVGA3DBLOCKDESC_BC5_COMP_UNORM, - {4, 4, 1}, 16, 16, - {{0}, {0}, {128}, {0}}, - {{0}, {0}, {0}, {0}}}, - - {SVGA3D_B4G4R4A4_UNORM, SVGA3DBLOCKDESC_RGBA_UNORM, - {1, 1, 1}, 2, 2, - {{4}, {4}, {4}, {4}}, - {{0}, {4}, {8}, {12}}}, - - {SVGA3D_BC6H_TYPELESS, SVGA3DBLOCKDESC_BC6H_COMP_TYPELESS, - {4, 4, 1}, 16, 16, - {{0}, {0}, {128}, {0}}, - {{0}, {0}, {0}, {0}}}, - - {SVGA3D_BC6H_UF16, SVGA3DBLOCKDESC_BC6H_COMP_UF16, - {4, 4, 1}, 16, 16, - {{0}, {0}, {128}, {0}}, - {{0}, {0}, {0}, {0}}}, - - {SVGA3D_BC6H_SF16, SVGA3DBLOCKDESC_BC6H_COMP_SF16, - {4, 4, 1}, 16, 16, - {{0}, {0}, {128}, {0}}, - {{0}, {0}, {0}, {0}}}, - - {SVGA3D_BC7_TYPELESS, SVGA3DBLOCKDESC_BC7_COMP_TYPELESS, - {4, 4, 1}, 16, 16, - {{0}, {0}, {128}, {0}}, - {{0}, {0}, {0}, {0}}}, - - {SVGA3D_BC7_UNORM, SVGA3DBLOCKDESC_BC7_COMP_UNORM, - {4, 4, 1}, 16, 16, - {{0}, {0}, {128}, {0}}, - {{0}, {0}, {0}, {0}}}, - - {SVGA3D_BC7_UNORM_SRGB, SVGA3DBLOCKDESC_BC7_COMP_UNORM_SRGB, - {4, 4, 1}, 16, 16, - {{0}, {0}, {128}, {0}}, - {{0}, {0}, {0}, {0}}}, - - {SVGA3D_AYUV, SVGA3DBLOCKDESC_AYUV, - {1, 1, 1}, 4, 4, - {{8}, {8}, {8}, {8}}, - {{0}, {8}, {16}, {24}}}, -}; - -static inline u32 clamped_umul32(u32 a, u32 b) -{ - uint64_t tmp = (uint64_t) a*b; - return (tmp > (uint64_t) ((u32) -1)) ? (u32) -1 : tmp; -} - -/** - * svga3dsurface_get_desc - Look up the appropriate SVGA3dSurfaceDesc for the - * given format. - */ -static inline const struct svga3d_surface_desc * -svga3dsurface_get_desc(SVGA3dSurfaceFormat format) -{ - if (format < ARRAY_SIZE(svga3d_surface_descs)) - return &svga3d_surface_descs[format]; - - return &svga3d_surface_descs[SVGA3D_FORMAT_INVALID]; -} - -/** - * svga3dsurface_get_mip_size - Given a base level size and the mip level, - * compute the size of the mip level. - */ -static inline surf_size_struct -svga3dsurface_get_mip_size(surf_size_struct base_level, u32 mip_level) -{ - surf_size_struct size; - - size.width = max_t(u32, base_level.width >> mip_level, 1); - size.height = max_t(u32, base_level.height >> mip_level, 1); - size.depth = max_t(u32, base_level.depth >> mip_level, 1); - size.pad64 = 0; - - return size; -} - -static inline void -svga3dsurface_get_size_in_blocks(const struct svga3d_surface_desc *desc, - const surf_size_struct *pixel_size, - surf_size_struct *block_size) -{ - block_size->width = __KERNEL_DIV_ROUND_UP(pixel_size->width, - desc->block_size.width); - block_size->height = __KERNEL_DIV_ROUND_UP(pixel_size->height, - desc->block_size.height); - block_size->depth = __KERNEL_DIV_ROUND_UP(pixel_size->depth, - desc->block_size.depth); -} - -static inline bool -svga3dsurface_is_planar_surface(const struct svga3d_surface_desc *desc) -{ - return (desc->block_desc & SVGA3DBLOCKDESC_PLANAR_YUV) != 0; -} - -static inline u32 -svga3dsurface_calculate_pitch(const struct svga3d_surface_desc *desc, - const surf_size_struct *size) -{ - u32 pitch; - surf_size_struct blocks; - - svga3dsurface_get_size_in_blocks(desc, size, &blocks); - - pitch = blocks.width * desc->pitch_bytes_per_block; - - return pitch; -} - -/** - * svga3dsurface_get_image_buffer_size - Calculates image buffer size. - * - * Return the number of bytes of buffer space required to store one image of a - * surface, optionally using the specified pitch. - * - * If pitch is zero, it is assumed that rows are tightly packed. - * - * This function is overflow-safe. If the result would have overflowed, instead - * we return MAX_UINT32. - */ -static inline u32 -svga3dsurface_get_image_buffer_size(const struct svga3d_surface_desc *desc, - const surf_size_struct *size, - u32 pitch) -{ - surf_size_struct image_blocks; - u32 slice_size, total_size; - - svga3dsurface_get_size_in_blocks(desc, size, &image_blocks); - - if (svga3dsurface_is_planar_surface(desc)) { - total_size = clamped_umul32(image_blocks.width, - image_blocks.height); - total_size = clamped_umul32(total_size, image_blocks.depth); - total_size = clamped_umul32(total_size, desc->bytes_per_block); - return total_size; - } - - if (pitch == 0) - pitch = svga3dsurface_calculate_pitch(desc, size); - - slice_size = clamped_umul32(image_blocks.height, pitch); - total_size = clamped_umul32(slice_size, image_blocks.depth); - - return total_size; -} - -/** - * svga3dsurface_get_serialized_size - Get the serialized size for the image. - */ -static inline u32 -svga3dsurface_get_serialized_size(SVGA3dSurfaceFormat format, - surf_size_struct base_level_size, - u32 num_mip_levels, - u32 num_layers) -{ - const struct svga3d_surface_desc *desc = svga3dsurface_get_desc(format); - u32 total_size = 0; - u32 mip; - - for (mip = 0; mip < num_mip_levels; mip++) { - surf_size_struct size = - svga3dsurface_get_mip_size(base_level_size, mip); - total_size += svga3dsurface_get_image_buffer_size(desc, - &size, 0); - } - - return total_size * num_layers; -} - -/** - * svga3dsurface_get_serialized_size_extended - Returns the number of bytes - * required for a surface with given parameters. Support for sample count. - */ -static inline u32 -svga3dsurface_get_serialized_size_extended(SVGA3dSurfaceFormat format, - surf_size_struct base_level_size, - u32 num_mip_levels, - u32 num_layers, - u32 num_samples) -{ - uint64_t total_size = - svga3dsurface_get_serialized_size(format, - base_level_size, - num_mip_levels, - num_layers); - total_size *= max_t(u32, 1, num_samples); - - return min_t(uint64_t, total_size, (uint64_t)U32_MAX); -} - -/** - * svga3dsurface_get_pixel_offset - Compute the offset (in bytes) to a pixel - * in an image (or volume). - * - * @width: The image width in pixels. - * @height: The image height in pixels - */ -static inline u32 -svga3dsurface_get_pixel_offset(SVGA3dSurfaceFormat format, - u32 width, u32 height, - u32 x, u32 y, u32 z) -{ - const struct svga3d_surface_desc *desc = svga3dsurface_get_desc(format); - const u32 bw = desc->block_size.width, bh = desc->block_size.height; - const u32 bd = desc->block_size.depth; - const u32 rowstride = __KERNEL_DIV_ROUND_UP(width, bw) * - desc->bytes_per_block; - const u32 imgstride = __KERNEL_DIV_ROUND_UP(height, bh) * rowstride; - const u32 offset = (z / bd * imgstride + - y / bh * rowstride + - x / bw * desc->bytes_per_block); - return offset; -} - -static inline u32 -svga3dsurface_get_image_offset(SVGA3dSurfaceFormat format, - surf_size_struct baseLevelSize, - u32 numMipLevels, - u32 face, - u32 mip) - -{ - u32 offset; - u32 mipChainBytes; - u32 mipChainBytesToLevel; - u32 i; - const struct svga3d_surface_desc *desc; - surf_size_struct mipSize; - u32 bytes; - - desc = svga3dsurface_get_desc(format); - - mipChainBytes = 0; - mipChainBytesToLevel = 0; - for (i = 0; i < numMipLevels; i++) { - mipSize = svga3dsurface_get_mip_size(baseLevelSize, i); - bytes = svga3dsurface_get_image_buffer_size(desc, &mipSize, 0); - mipChainBytes += bytes; - if (i < mip) - mipChainBytesToLevel += bytes; - } - - offset = mipChainBytes * face + mipChainBytesToLevel; - - return offset; -} - - -/** - * svga3dsurface_is_gb_screen_target_format - Is the specified format usable as - * a ScreenTarget? - * (with just the GBObjects cap-bit - * set) - * @format: format to queried - * - * RETURNS: - * true if queried format is valid for screen targets - */ -static inline bool -svga3dsurface_is_gb_screen_target_format(SVGA3dSurfaceFormat format) -{ - return (format == SVGA3D_X8R8G8B8 || - format == SVGA3D_A8R8G8B8 || - format == SVGA3D_R5G6B5 || - format == SVGA3D_X1R5G5B5 || - format == SVGA3D_A1R5G5B5 || - format == SVGA3D_P8); -} - - -/** - * svga3dsurface_is_dx_screen_target_format - Is the specified format usable as - * a ScreenTarget? - * (with DX10 enabled) - * - * @format: format to queried - * - * Results: - * true if queried format is valid for screen targets - */ -static inline bool -svga3dsurface_is_dx_screen_target_format(SVGA3dSurfaceFormat format) -{ - return (format == SVGA3D_R8G8B8A8_UNORM || - format == SVGA3D_B8G8R8A8_UNORM || - format == SVGA3D_B8G8R8X8_UNORM); -} - - -/** - * svga3dsurface_is_screen_target_format - Is the specified format usable as a - * ScreenTarget? - * (for some combination of caps) - * - * @format: format to queried - * - * Results: - * true if queried format is valid for screen targets - */ -static inline bool -svga3dsurface_is_screen_target_format(SVGA3dSurfaceFormat format) -{ - if (svga3dsurface_is_gb_screen_target_format(format)) { - return true; - } - return svga3dsurface_is_dx_screen_target_format(format); -} - -/** - * struct svga3dsurface_mip - Mimpmap level information - * @bytes: Bytes required in the backing store of this mipmap level. - * @img_stride: Byte stride per image. - * @row_stride: Byte stride per block row. - * @size: The size of the mipmap. - */ -struct svga3dsurface_mip { - size_t bytes; - size_t img_stride; - size_t row_stride; - struct drm_vmw_size size; - -}; - -/** - * struct svga3dsurface_cache - Cached surface information - * @desc: Pointer to the surface descriptor - * @mip: Array of mipmap level information. Valid size is @num_mip_levels. - * @mip_chain_bytes: Bytes required in the backing store for the whole chain - * of mip levels. - * @sheet_bytes: Bytes required in the backing store for a sheet - * representing a single sample. - * @num_mip_levels: Valid size of the @mip array. Number of mipmap levels in - * a chain. - * @num_layers: Number of slices in an array texture or number of faces in - * a cubemap texture. - */ -struct svga3dsurface_cache { - const struct svga3d_surface_desc *desc; - struct svga3dsurface_mip mip[DRM_VMW_MAX_MIP_LEVELS]; - size_t mip_chain_bytes; - size_t sheet_bytes; - u32 num_mip_levels; - u32 num_layers; -}; - -/** - * struct svga3dsurface_loc - Surface location - * @sheet: The multisample sheet. - * @sub_resource: Surface subresource. Defined as layer * num_mip_levels + - * mip_level. - * @x: X coordinate. - * @y: Y coordinate. - * @z: Z coordinate. - */ -struct svga3dsurface_loc { - u32 sheet; - u32 sub_resource; - u32 x, y, z; + SVGA3dBlockDesc blockDesc; + + SVGA3dSize blockSize; + uint32 bytesPerBlock; + uint32 pitchBytesPerBlock; + + SVGA3dChannelDef bitDepth; + SVGA3dChannelDef bitOffset; +} SVGA3dSurfaceDesc; + +STATIC_CONST SVGA3dSurfaceDesc g_SVGA3dSurfaceDescs[] = { + { SVGA3D_FORMAT_INVALID, + SVGA3DBLOCKDESC_NONE, + { 1, 1, 1 }, + 0, + 0, + { { 0 }, { 0 }, { 0 }, { 0 } }, + { { 0 }, { 0 }, { 0 }, { 0 } } }, + + { SVGA3D_X8R8G8B8, + SVGA3DBLOCKDESC_RGB_UNORM, + { 1, 1, 1 }, + 4, + 4, + { { 8 }, { 8 }, { 8 }, { 0 } }, + { { 0 }, { 8 }, { 16 }, { 24 } } }, + + { SVGA3D_A8R8G8B8, + SVGA3DBLOCKDESC_RGBA_UNORM, + { 1, 1, 1 }, + 4, + 4, + { { 8 }, { 8 }, { 8 }, { 8 } }, + { { 0 }, { 8 }, { 16 }, { 24 } } }, + + { SVGA3D_R5G6B5, + SVGA3DBLOCKDESC_RGB_UNORM, + { 1, 1, 1 }, + 2, + 2, + { { 5 }, { 6 }, { 5 }, { 0 } }, + { { 0 }, { 5 }, { 11 }, { 0 } } }, + + { SVGA3D_X1R5G5B5, + SVGA3DBLOCKDESC_RGB_UNORM, + { 1, 1, 1 }, + 2, + 2, + { { 5 }, { 5 }, { 5 }, { 0 } }, + { { 0 }, { 5 }, { 10 }, { 0 } } }, + + { SVGA3D_A1R5G5B5, + SVGA3DBLOCKDESC_RGBA_UNORM, + { 1, 1, 1 }, + 2, + 2, + { { 5 }, { 5 }, { 5 }, { 1 } }, + { { 0 }, { 5 }, { 10 }, { 15 } } }, + + { SVGA3D_A4R4G4B4, + SVGA3DBLOCKDESC_RGBA_UNORM, + { 1, 1, 1 }, + 2, + 2, + { { 4 }, { 4 }, { 4 }, { 4 } }, + { { 0 }, { 4 }, { 8 }, { 12 } } }, + + { SVGA3D_Z_D32, + SVGA3DBLOCKDESC_DEPTH_UNORM, + { 1, 1, 1 }, + 4, + 4, + { { 0 }, { 0 }, { 32 }, { 0 } }, + { { 0 }, { 0 }, { 0 }, { 0 } } }, + + { SVGA3D_Z_D16, + SVGA3DBLOCKDESC_DEPTH_UNORM, + { 1, 1, 1 }, + 2, + 2, + { { 0 }, { 0 }, { 16 }, { 0 } }, + { { 0 }, { 0 }, { 0 }, { 0 } } }, + + { SVGA3D_Z_D24S8, + SVGA3DBLOCKDESC_DS_UNORM, + { 1, 1, 1 }, + 4, + 4, + { { 0 }, { 8 }, { 24 }, { 0 } }, + { { 0 }, { 0 }, { 8 }, { 0 } } }, + + { SVGA3D_Z_D15S1, + SVGA3DBLOCKDESC_DS_UNORM, + { 1, 1, 1 }, + 2, + 2, + { { 0 }, { 1 }, { 15 }, { 0 } }, + { { 0 }, { 0 }, { 1 }, { 0 } } }, + + { SVGA3D_LUMINANCE8, + SVGA3DBLOCKDESC_L_UNORM, + { 1, 1, 1 }, + 1, + 1, + { { 0 }, { 0 }, { 8 }, { 0 } }, + { { 0 }, { 0 }, { 0 }, { 0 } } }, + + { SVGA3D_LUMINANCE4_ALPHA4, + SVGA3DBLOCKDESC_LA_UNORM, + { 1, 1, 1 }, + 1, + 1, + { { 0 }, { 0 }, { 4 }, { 4 } }, + { { 0 }, { 0 }, { 0 }, { 4 } } }, + + { SVGA3D_LUMINANCE16, + SVGA3DBLOCKDESC_L_UNORM, + { 1, 1, 1 }, + 2, + 2, + { { 0 }, { 0 }, { 16 }, { 0 } }, + { { 0 }, { 0 }, { 0 }, { 0 } } }, + + { SVGA3D_LUMINANCE8_ALPHA8, + SVGA3DBLOCKDESC_LA_UNORM, + { 1, 1, 1 }, + 2, + 2, + { { 0 }, { 0 }, { 8 }, { 8 } }, + { { 0 }, { 0 }, { 0 }, { 8 } } }, + + { SVGA3D_DXT1, + SVGA3DBLOCKDESC_BC1_COMP_UNORM, + { 4, 4, 1 }, + 8, + 8, + { { 0 }, { 0 }, { 64 }, { 0 } }, + { { 0 }, { 0 }, { 0 }, { 0 } } }, + + { SVGA3D_DXT2, + SVGA3DBLOCKDESC_BC2_COMP_UNORM, + { 4, 4, 1 }, + 16, + 16, + { { 0 }, { 0 }, { 128 }, { 0 } }, + { { 0 }, { 0 }, { 0 }, { 0 } } }, + + { SVGA3D_DXT3, + SVGA3DBLOCKDESC_BC2_COMP_UNORM, + { 4, 4, 1 }, + 16, + 16, + { { 0 }, { 0 }, { 128 }, { 0 } }, + { { 0 }, { 0 }, { 0 }, { 0 } } }, + + { SVGA3D_DXT4, + SVGA3DBLOCKDESC_BC3_COMP_UNORM, + { 4, 4, 1 }, + 16, + 16, + { { 0 }, { 0 }, { 128 }, { 0 } }, + { { 0 }, { 0 }, { 0 }, { 0 } } }, + + { SVGA3D_DXT5, + SVGA3DBLOCKDESC_BC3_COMP_UNORM, + { 4, 4, 1 }, + 16, + 16, + { { 0 }, { 0 }, { 128 }, { 0 } }, + { { 0 }, { 0 }, { 0 }, { 0 } } }, + + { SVGA3D_BUMPU8V8, + SVGA3DBLOCKDESC_UV_SNORM, + { 1, 1, 1 }, + 2, + 2, + { { 0 }, { 8 }, { 8 }, { 0 } }, + { { 0 }, { 8 }, { 0 }, { 0 } } }, + + { SVGA3D_BUMPL6V5U5, + SVGA3DBLOCKDESC_UVL, + { 1, 1, 1 }, + 2, + 2, + { { 6 }, { 5 }, { 5 }, { 0 } }, + { { 10 }, { 5 }, { 0 }, { 0 } } }, + + { SVGA3D_BUMPX8L8V8U8, + SVGA3DBLOCKDESC_UVL, + { 1, 1, 1 }, + 4, + 4, + { { 8 }, { 8 }, { 8 }, { 0 } }, + { { 16 }, { 8 }, { 0 }, { 0 } } }, + + { SVGA3D_FORMAT_DEAD1, + SVGA3DBLOCKDESC_NONE, + { 1, 1, 1 }, + 3, + 3, + { { 8 }, { 8 }, { 8 }, { 0 } }, + { { 16 }, { 8 }, { 0 }, { 0 } } }, + + { SVGA3D_ARGB_S10E5, + SVGA3DBLOCKDESC_RGBA_FP, + { 1, 1, 1 }, + 8, + 8, + { { 16 }, { 16 }, { 16 }, { 16 } }, + { { 32 }, { 16 }, { 0 }, { 48 } } }, + + { SVGA3D_ARGB_S23E8, + SVGA3DBLOCKDESC_RGBA_FP, + { 1, 1, 1 }, + 16, + 16, + { { 32 }, { 32 }, { 32 }, { 32 } }, + { { 64 }, { 32 }, { 0 }, { 96 } } }, + + { SVGA3D_A2R10G10B10, + SVGA3DBLOCKDESC_RGBA_UNORM, + { 1, 1, 1 }, + 4, + 4, + { { 10 }, { 10 }, { 10 }, { 2 } }, + { { 0 }, { 10 }, { 20 }, { 30 } } }, + + { SVGA3D_V8U8, + SVGA3DBLOCKDESC_UV_SNORM, + { 1, 1, 1 }, + 2, + 2, + { { 0 }, { 8 }, { 8 }, { 0 } }, + { { 0 }, { 8 }, { 0 }, { 0 } } }, + + { SVGA3D_Q8W8V8U8, + SVGA3DBLOCKDESC_UVWQ_SNORM, + { 1, 1, 1 }, + 4, + 4, + { { 8 }, { 8 }, { 8 }, { 8 } }, + { { 16 }, { 8 }, { 0 }, { 24 } } }, + + { SVGA3D_CxV8U8, + SVGA3DBLOCKDESC_UVCX_SNORM, + { 1, 1, 1 }, + 2, + 2, + { { 0 }, { 8 }, { 8 }, { 0 } }, + { { 0 }, { 8 }, { 0 }, { 0 } } }, + + { SVGA3D_X8L8V8U8, + SVGA3DBLOCKDESC_UVL, + { 1, 1, 1 }, + 4, + 4, + { { 8 }, { 8 }, { 8 }, { 0 } }, + { { 16 }, { 8 }, { 0 }, { 0 } } }, + + { SVGA3D_A2W10V10U10, + SVGA3DBLOCKDESC_UVWA, + { 1, 1, 1 }, + 4, + 4, + { { 10 }, { 10 }, { 10 }, { 2 } }, + { { 20 }, { 10 }, { 0 }, { 30 } } }, + + { SVGA3D_ALPHA8, + SVGA3DBLOCKDESC_A_UNORM, + { 1, 1, 1 }, + 1, + 1, + { { 0 }, { 0 }, { 0 }, { 8 } }, + { { 0 }, { 0 }, { 0 }, { 0 } } }, + + { SVGA3D_R_S10E5, + SVGA3DBLOCKDESC_R_FP, + { 1, 1, 1 }, + 2, + 2, + { { 0 }, { 0 }, { 16 }, { 0 } }, + { { 0 }, { 0 }, { 0 }, { 0 } } }, + + { SVGA3D_R_S23E8, + SVGA3DBLOCKDESC_R_FP, + { 1, 1, 1 }, + 4, + 4, + { { 0 }, { 0 }, { 32 }, { 0 } }, + { { 0 }, { 0 }, { 0 }, { 0 } } }, + + { SVGA3D_RG_S10E5, + SVGA3DBLOCKDESC_RG_FP, + { 1, 1, 1 }, + 4, + 4, + { { 0 }, { 16 }, { 16 }, { 0 } }, + { { 0 }, { 16 }, { 0 }, { 0 } } }, + + { SVGA3D_RG_S23E8, + SVGA3DBLOCKDESC_RG_FP, + { 1, 1, 1 }, + 8, + 8, + { { 0 }, { 32 }, { 32 }, { 0 } }, + { { 0 }, { 32 }, { 0 }, { 0 } } }, + + { SVGA3D_BUFFER, + SVGA3DBLOCKDESC_BUFFER, + { 1, 1, 1 }, + 1, + 1, + { { 0 }, { 0 }, { 8 }, { 0 } }, + { { 0 }, { 0 }, { 0 }, { 0 } } }, + + { SVGA3D_Z_D24X8, + SVGA3DBLOCKDESC_DEPTH_UNORM, + { 1, 1, 1 }, + 4, + 4, + { { 0 }, { 0 }, { 24 }, { 0 } }, + { { 0 }, { 0 }, { 8 }, { 0 } } }, + + { SVGA3D_V16U16, + SVGA3DBLOCKDESC_UV_SNORM, + { 1, 1, 1 }, + 4, + 4, + { { 0 }, { 16 }, { 16 }, { 0 } }, + { { 0 }, { 16 }, { 0 }, { 0 } } }, + + { SVGA3D_G16R16, + SVGA3DBLOCKDESC_RG_UNORM, + { 1, 1, 1 }, + 4, + 4, + { { 0 }, { 16 }, { 16 }, { 0 } }, + { { 0 }, { 16 }, { 0 }, { 0 } } }, + + { SVGA3D_A16B16G16R16, + SVGA3DBLOCKDESC_RGBA_UNORM, + { 1, 1, 1 }, + 8, + 8, + { { 16 }, { 16 }, { 16 }, { 16 } }, + { { 32 }, { 16 }, { 0 }, { 48 } } }, + + { SVGA3D_UYVY, + SVGA3DBLOCKDESC_YUV, + { 2, 1, 1 }, + 4, + 4, + { { 8 }, { 0 }, { 8 }, { 0 } }, + { { 0 }, { 0 }, { 8 }, { 0 } } }, + + { SVGA3D_YUY2, + SVGA3DBLOCKDESC_YUV, + { 2, 1, 1 }, + 4, + 4, + { { 8 }, { 0 }, { 8 }, { 0 } }, + { { 8 }, { 0 }, { 0 }, { 0 } } }, + + { SVGA3D_NV12, + SVGA3DBLOCKDESC_NV12, + { 2, 2, 1 }, + 6, + 2, + { { 0 }, { 0 }, { 48 }, { 0 } }, + { { 0 }, { 0 }, { 0 }, { 0 } } }, + + { SVGA3D_FORMAT_DEAD2, + SVGA3DBLOCKDESC_NONE, + { 1, 1, 1 }, + 4, + 4, + { { 8 }, { 8 }, { 8 }, { 8 } }, + { { 0 }, { 8 }, { 16 }, { 24 } } }, + + { SVGA3D_R32G32B32A32_TYPELESS, + SVGA3DBLOCKDESC_TYPELESS, + { 1, 1, 1 }, + 16, + 16, + { { 32 }, { 32 }, { 32 }, { 32 } }, + { { 64 }, { 32 }, { 0 }, { 96 } } }, + + { SVGA3D_R32G32B32A32_UINT, + SVGA3DBLOCKDESC_RGBA_UINT, + { 1, 1, 1 }, + 16, + 16, + { { 32 }, { 32 }, { 32 }, { 32 } }, + { { 64 }, { 32 }, { 0 }, { 96 } } }, + + { SVGA3D_R32G32B32A32_SINT, + SVGA3DBLOCKDESC_RGBA_SINT, + { 1, 1, 1 }, + 16, + 16, + { { 32 }, { 32 }, { 32 }, { 32 } }, + { { 64 }, { 32 }, { 0 }, { 96 } } }, + + { SVGA3D_R32G32B32_TYPELESS, + SVGA3DBLOCKDESC_TYPELESS, + { 1, 1, 1 }, + 12, + 12, + { { 32 }, { 32 }, { 32 }, { 0 } }, + { { 64 }, { 32 }, { 0 }, { 0 } } }, + + { SVGA3D_R32G32B32_FLOAT, + SVGA3DBLOCKDESC_RGB_FP, + { 1, 1, 1 }, + 12, + 12, + { { 32 }, { 32 }, { 32 }, { 0 } }, + { { 64 }, { 32 }, { 0 }, { 0 } } }, + + { SVGA3D_R32G32B32_UINT, + SVGA3DBLOCKDESC_RGB_UINT, + { 1, 1, 1 }, + 12, + 12, + { { 32 }, { 32 }, { 32 }, { 0 } }, + { { 64 }, { 32 }, { 0 }, { 0 } } }, + + { SVGA3D_R32G32B32_SINT, + SVGA3DBLOCKDESC_RGB_SINT, + { 1, 1, 1 }, + 12, + 12, + { { 32 }, { 32 }, { 32 }, { 0 } }, + { { 64 }, { 32 }, { 0 }, { 0 } } }, + + { SVGA3D_R16G16B16A16_TYPELESS, + SVGA3DBLOCKDESC_TYPELESS, + { 1, 1, 1 }, + 8, + 8, + { { 16 }, { 16 }, { 16 }, { 16 } }, + { { 32 }, { 16 }, { 0 }, { 48 } } }, + + { SVGA3D_R16G16B16A16_UINT, + SVGA3DBLOCKDESC_RGBA_UINT, + { 1, 1, 1 }, + 8, + 8, + { { 16 }, { 16 }, { 16 }, { 16 } }, + { { 32 }, { 16 }, { 0 }, { 48 } } }, + + { SVGA3D_R16G16B16A16_SNORM, + SVGA3DBLOCKDESC_RGBA_SNORM, + { 1, 1, 1 }, + 8, + 8, + { { 16 }, { 16 }, { 16 }, { 16 } }, + { { 32 }, { 16 }, { 0 }, { 48 } } }, + + { SVGA3D_R16G16B16A16_SINT, + SVGA3DBLOCKDESC_RGBA_SINT, + { 1, 1, 1 }, + 8, + 8, + { { 16 }, { 16 }, { 16 }, { 16 } }, + { { 32 }, { 16 }, { 0 }, { 48 } } }, + + { SVGA3D_R32G32_TYPELESS, + SVGA3DBLOCKDESC_TYPELESS, + { 1, 1, 1 }, + 8, + 8, + { { 0 }, { 32 }, { 32 }, { 0 } }, + { { 0 }, { 32 }, { 0 }, { 0 } } }, + + { SVGA3D_R32G32_UINT, + SVGA3DBLOCKDESC_RG_UINT, + { 1, 1, 1 }, + 8, + 8, + { { 0 }, { 32 }, { 32 }, { 0 } }, + { { 0 }, { 32 }, { 0 }, { 0 } } }, + + { SVGA3D_R32G32_SINT, + SVGA3DBLOCKDESC_RG_SINT, + { 1, 1, 1 }, + 8, + 8, + { { 0 }, { 32 }, { 32 }, { 0 } }, + { { 0 }, { 32 }, { 0 }, { 0 } } }, + + { SVGA3D_R32G8X24_TYPELESS, + SVGA3DBLOCKDESC_TYPELESS, + { 1, 1, 1 }, + 8, + 8, + { { 0 }, { 8 }, { 32 }, { 0 } }, + { { 0 }, { 32 }, { 0 }, { 0 } } }, + + { SVGA3D_D32_FLOAT_S8X24_UINT, + SVGA3DBLOCKDESC_DS, + { 1, 1, 1 }, + 8, + 8, + { { 0 }, { 8 }, { 32 }, { 0 } }, + { { 0 }, { 32 }, { 0 }, { 0 } } }, + + { SVGA3D_R32_FLOAT_X8X24, + SVGA3DBLOCKDESC_R_FP, + { 1, 1, 1 }, + 8, + 8, + { { 0 }, { 0 }, { 32 }, { 0 } }, + { { 0 }, { 0 }, { 0 }, { 0 } } }, + + { SVGA3D_X32_G8X24_UINT, + SVGA3DBLOCKDESC_G_UINT, + { 1, 1, 1 }, + 8, + 8, + { { 0 }, { 8 }, { 0 }, { 0 } }, + { { 0 }, { 32 }, { 0 }, { 0 } } }, + + { SVGA3D_R10G10B10A2_TYPELESS, + SVGA3DBLOCKDESC_TYPELESS, + { 1, 1, 1 }, + 4, + 4, + { { 10 }, { 10 }, { 10 }, { 2 } }, + { { 20 }, { 10 }, { 0 }, { 30 } } }, + + { SVGA3D_R10G10B10A2_UINT, + SVGA3DBLOCKDESC_RGBA_UINT, + { 1, 1, 1 }, + 4, + 4, + { { 10 }, { 10 }, { 10 }, { 2 } }, + { { 20 }, { 10 }, { 0 }, { 30 } } }, + + { SVGA3D_R11G11B10_FLOAT, + SVGA3DBLOCKDESC_RGB_FP, + { 1, 1, 1 }, + 4, + 4, + { { 10 }, { 11 }, { 11 }, { 0 } }, + { { 22 }, { 11 }, { 0 }, { 0 } } }, + + { SVGA3D_R8G8B8A8_TYPELESS, + SVGA3DBLOCKDESC_TYPELESS, + { 1, 1, 1 }, + 4, + 4, + { { 8 }, { 8 }, { 8 }, { 8 } }, + { { 16 }, { 8 }, { 0 }, { 24 } } }, + + { SVGA3D_R8G8B8A8_UNORM, + SVGA3DBLOCKDESC_RGBA_UNORM, + { 1, 1, 1 }, + 4, + 4, + { { 8 }, { 8 }, { 8 }, { 8 } }, + { { 16 }, { 8 }, { 0 }, { 24 } } }, + + { SVGA3D_R8G8B8A8_UNORM_SRGB, + SVGA3DBLOCKDESC_RGBA_UNORM_SRGB, + { 1, 1, 1 }, + 4, + 4, + { { 8 }, { 8 }, { 8 }, { 8 } }, + { { 16 }, { 8 }, { 0 }, { 24 } } }, + + { SVGA3D_R8G8B8A8_UINT, + SVGA3DBLOCKDESC_RGBA_UINT, + { 1, 1, 1 }, + 4, + 4, + { { 8 }, { 8 }, { 8 }, { 8 } }, + { { 16 }, { 8 }, { 0 }, { 24 } } }, + + { SVGA3D_R8G8B8A8_SINT, + SVGA3DBLOCKDESC_RGBA_SINT, + { 1, 1, 1 }, + 4, + 4, + { { 8 }, { 8 }, { 8 }, { 8 } }, + { { 16 }, { 8 }, { 0 }, { 24 } } }, + + { SVGA3D_R16G16_TYPELESS, + SVGA3DBLOCKDESC_TYPELESS, + { 1, 1, 1 }, + 4, + 4, + { { 0 }, { 16 }, { 16 }, { 0 } }, + { { 0 }, { 16 }, { 0 }, { 0 } } }, + + { SVGA3D_R16G16_UINT, + SVGA3DBLOCKDESC_RG_UINT, + { 1, 1, 1 }, + 4, + 4, + { { 0 }, { 16 }, { 16 }, { 0 } }, + { { 0 }, { 16 }, { 0 }, { 0 } } }, + + { SVGA3D_R16G16_SINT, + SVGA3DBLOCKDESC_RG_SINT, + { 1, 1, 1 }, + 4, + 4, + { { 0 }, { 16 }, { 16 }, { 0 } }, + { { 0 }, { 16 }, { 0 }, { 0 } } }, + + { SVGA3D_R32_TYPELESS, + SVGA3DBLOCKDESC_TYPELESS, + { 1, 1, 1 }, + 4, + 4, + { { 0 }, { 0 }, { 32 }, { 0 } }, + { { 0 }, { 0 }, { 0 }, { 0 } } }, + + { SVGA3D_D32_FLOAT, + SVGA3DBLOCKDESC_DEPTH_FP, + { 1, 1, 1 }, + 4, + 4, + { { 0 }, { 0 }, { 32 }, { 0 } }, + { { 0 }, { 0 }, { 0 }, { 0 } } }, + + { SVGA3D_R32_UINT, + SVGA3DBLOCKDESC_R_UINT, + { 1, 1, 1 }, + 4, + 4, + { { 0 }, { 0 }, { 32 }, { 0 } }, + { { 0 }, { 0 }, { 0 }, { 0 } } }, + + { SVGA3D_R32_SINT, + SVGA3DBLOCKDESC_R_SINT, + { 1, 1, 1 }, + 4, + 4, + { { 0 }, { 0 }, { 32 }, { 0 } }, + { { 0 }, { 0 }, { 0 }, { 0 } } }, + + { SVGA3D_R24G8_TYPELESS, + SVGA3DBLOCKDESC_TYPELESS, + { 1, 1, 1 }, + 4, + 4, + { { 0 }, { 8 }, { 24 }, { 0 } }, + { { 0 }, { 24 }, { 0 }, { 0 } } }, + + { SVGA3D_D24_UNORM_S8_UINT, + SVGA3DBLOCKDESC_DS_UNORM, + { 1, 1, 1 }, + 4, + 4, + { { 0 }, { 8 }, { 24 }, { 0 } }, + { { 0 }, { 24 }, { 0 }, { 0 } } }, + + { SVGA3D_R24_UNORM_X8, + SVGA3DBLOCKDESC_R_UNORM, + { 1, 1, 1 }, + 4, + 4, + { { 0 }, { 0 }, { 24 }, { 0 } }, + { { 0 }, { 0 }, { 0 }, { 0 } } }, + + { SVGA3D_X24_G8_UINT, + SVGA3DBLOCKDESC_G_UINT, + { 1, 1, 1 }, + 4, + 4, + { { 0 }, { 8 }, { 0 }, { 0 } }, + { { 0 }, { 24 }, { 0 }, { 0 } } }, + + { SVGA3D_R8G8_TYPELESS, + SVGA3DBLOCKDESC_TYPELESS, + { 1, 1, 1 }, + 2, + 2, + { { 0 }, { 8 }, { 8 }, { 0 } }, + { { 0 }, { 8 }, { 0 }, { 0 } } }, + + { SVGA3D_R8G8_UNORM, + SVGA3DBLOCKDESC_RG_UNORM, + { 1, 1, 1 }, + 2, + 2, + { { 0 }, { 8 }, { 8 }, { 0 } }, + { { 0 }, { 8 }, { 0 }, { 0 } } }, + + { SVGA3D_R8G8_UINT, + SVGA3DBLOCKDESC_RG_UINT, + { 1, 1, 1 }, + 2, + 2, + { { 0 }, { 8 }, { 8 }, { 0 } }, + { { 0 }, { 8 }, { 0 }, { 0 } } }, + + { SVGA3D_R8G8_SINT, + SVGA3DBLOCKDESC_RG_SINT, + { 1, 1, 1 }, + 2, + 2, + { { 0 }, { 8 }, { 8 }, { 0 } }, + { { 0 }, { 8 }, { 0 }, { 0 } } }, + + { SVGA3D_R16_TYPELESS, + SVGA3DBLOCKDESC_TYPELESS, + { 1, 1, 1 }, + 2, + 2, + { { 0 }, { 0 }, { 16 }, { 0 } }, + { { 0 }, { 0 }, { 0 }, { 0 } } }, + + { SVGA3D_R16_UNORM, + SVGA3DBLOCKDESC_R_UNORM, + { 1, 1, 1 }, + 2, + 2, + { { 0 }, { 0 }, { 16 }, { 0 } }, + { { 0 }, { 0 }, { 0 }, { 0 } } }, + + { SVGA3D_R16_UINT, + SVGA3DBLOCKDESC_R_UINT, + { 1, 1, 1 }, + 2, + 2, + { { 0 }, { 0 }, { 16 }, { 0 } }, + { { 0 }, { 0 }, { 0 }, { 0 } } }, + + { SVGA3D_R16_SNORM, + SVGA3DBLOCKDESC_R_SNORM, + { 1, 1, 1 }, + 2, + 2, + { { 0 }, { 0 }, { 16 }, { 0 } }, + { { 0 }, { 0 }, { 0 }, { 0 } } }, + + { SVGA3D_R16_SINT, + SVGA3DBLOCKDESC_R_SINT, + { 1, 1, 1 }, + 2, + 2, + { { 0 }, { 0 }, { 16 }, { 0 } }, + { { 0 }, { 0 }, { 0 }, { 0 } } }, + + { SVGA3D_R8_TYPELESS, + SVGA3DBLOCKDESC_TYPELESS, + { 1, 1, 1 }, + 1, + 1, + { { 0 }, { 0 }, { 8 }, { 0 } }, + { { 0 }, { 0 }, { 0 }, { 0 } } }, + + { SVGA3D_R8_UNORM, + SVGA3DBLOCKDESC_R_UNORM, + { 1, 1, 1 }, + 1, + 1, + { { 0 }, { 0 }, { 8 }, { 0 } }, + { { 0 }, { 0 }, { 0 }, { 0 } } }, + + { SVGA3D_R8_UINT, + SVGA3DBLOCKDESC_R_UINT, + { 1, 1, 1 }, + 1, + 1, + { { 0 }, { 0 }, { 8 }, { 0 } }, + { { 0 }, { 0 }, { 0 }, { 0 } } }, + + { SVGA3D_R8_SNORM, + SVGA3DBLOCKDESC_R_SNORM, + { 1, 1, 1 }, + 1, + 1, + { { 0 }, { 0 }, { 8 }, { 0 } }, + { { 0 }, { 0 }, { 0 }, { 0 } } }, + + { SVGA3D_R8_SINT, + SVGA3DBLOCKDESC_R_SINT, + { 1, 1, 1 }, + 1, + 1, + { { 0 }, { 0 }, { 8 }, { 0 } }, + { { 0 }, { 0 }, { 0 }, { 0 } } }, + + { SVGA3D_P8, + SVGA3DBLOCKDESC_NONE, + { 1, 1, 1 }, + 1, + 1, + { { 0 }, { 0 }, { 8 }, { 0 } }, + { { 0 }, { 0 }, { 0 }, { 0 } } }, + + { SVGA3D_R9G9B9E5_SHAREDEXP, + SVGA3DBLOCKDESC_RGB_EXP, + { 1, 1, 1 }, + 4, + 4, + { { 9 }, { 9 }, { 9 }, { 5 } }, + { { 18 }, { 9 }, { 0 }, { 27 } } }, + + { SVGA3D_R8G8_B8G8_UNORM, + SVGA3DBLOCKDESC_NONE, + { 2, 1, 1 }, + 4, + 4, + { { 0 }, { 8 }, { 8 }, { 0 } }, + { { 0 }, { 0 }, { 8 }, { 0 } } }, + + { SVGA3D_G8R8_G8B8_UNORM, + SVGA3DBLOCKDESC_NONE, + { 2, 1, 1 }, + 4, + 4, + { { 0 }, { 8 }, { 8 }, { 0 } }, + { { 0 }, { 8 }, { 0 }, { 0 } } }, + + { SVGA3D_BC1_TYPELESS, + SVGA3DBLOCKDESC_BC1_COMP_TYPELESS, + { 4, 4, 1 }, + 8, + 8, + { { 0 }, { 0 }, { 64 }, { 0 } }, + { { 0 }, { 0 }, { 0 }, { 0 } } }, + + { SVGA3D_BC1_UNORM_SRGB, + SVGA3DBLOCKDESC_BC1_COMP_UNORM_SRGB, + { 4, 4, 1 }, + 8, + 8, + { { 0 }, { 0 }, { 64 }, { 0 } }, + { { 0 }, { 0 }, { 0 }, { 0 } } }, + + { SVGA3D_BC2_TYPELESS, + SVGA3DBLOCKDESC_BC2_COMP_TYPELESS, + { 4, 4, 1 }, + 16, + 16, + { { 0 }, { 0 }, { 128 }, { 0 } }, + { { 0 }, { 0 }, { 0 }, { 0 } } }, + + { SVGA3D_BC2_UNORM_SRGB, + SVGA3DBLOCKDESC_BC2_COMP_UNORM_SRGB, + { 4, 4, 1 }, + 16, + 16, + { { 0 }, { 0 }, { 128 }, { 0 } }, + { { 0 }, { 0 }, { 0 }, { 0 } } }, + + { SVGA3D_BC3_TYPELESS, + SVGA3DBLOCKDESC_BC3_COMP_TYPELESS, + { 4, 4, 1 }, + 16, + 16, + { { 0 }, { 0 }, { 128 }, { 0 } }, + { { 0 }, { 0 }, { 0 }, { 0 } } }, + + { SVGA3D_BC3_UNORM_SRGB, + SVGA3DBLOCKDESC_BC3_COMP_UNORM_SRGB, + { 4, 4, 1 }, + 16, + 16, + { { 0 }, { 0 }, { 128 }, { 0 } }, + { { 0 }, { 0 }, { 0 }, { 0 } } }, + + { SVGA3D_BC4_TYPELESS, + SVGA3DBLOCKDESC_BC4_COMP_TYPELESS, + { 4, 4, 1 }, + 8, + 8, + { { 0 }, { 0 }, { 64 }, { 0 } }, + { { 0 }, { 0 }, { 0 }, { 0 } } }, + + { SVGA3D_ATI1, + SVGA3DBLOCKDESC_BC4_COMP_UNORM, + { 4, 4, 1 }, + 8, + 8, + { { 0 }, { 0 }, { 64 }, { 0 } }, + { { 0 }, { 0 }, { 0 }, { 0 } } }, + + { SVGA3D_BC4_SNORM, + SVGA3DBLOCKDESC_BC4_COMP_SNORM, + { 4, 4, 1 }, + 8, + 8, + { { 0 }, { 0 }, { 64 }, { 0 } }, + { { 0 }, { 0 }, { 0 }, { 0 } } }, + + { SVGA3D_BC5_TYPELESS, + SVGA3DBLOCKDESC_BC5_COMP_TYPELESS, + { 4, 4, 1 }, + 16, + 16, + { { 0 }, { 0 }, { 128 }, { 0 } }, + { { 0 }, { 0 }, { 0 }, { 0 } } }, + + { SVGA3D_ATI2, + SVGA3DBLOCKDESC_BC5_COMP_UNORM, + { 4, 4, 1 }, + 16, + 16, + { { 0 }, { 0 }, { 128 }, { 0 } }, + { { 0 }, { 0 }, { 0 }, { 0 } } }, + + { SVGA3D_BC5_SNORM, + SVGA3DBLOCKDESC_BC5_COMP_SNORM, + { 4, 4, 1 }, + 16, + 16, + { { 0 }, { 0 }, { 128 }, { 0 } }, + { { 0 }, { 0 }, { 0 }, { 0 } } }, + + { SVGA3D_R10G10B10_XR_BIAS_A2_UNORM, + SVGA3DBLOCKDESC_RGBA_UNORM, + { 1, 1, 1 }, + 4, + 4, + { { 10 }, { 10 }, { 10 }, { 2 } }, + { { 20 }, { 10 }, { 0 }, { 30 } } }, + + { SVGA3D_B8G8R8A8_TYPELESS, + SVGA3DBLOCKDESC_TYPELESS, + { 1, 1, 1 }, + 4, + 4, + { { 8 }, { 8 }, { 8 }, { 8 } }, + { { 0 }, { 8 }, { 16 }, { 24 } } }, + + { SVGA3D_B8G8R8A8_UNORM_SRGB, + SVGA3DBLOCKDESC_RGBA_UNORM_SRGB, + { 1, 1, 1 }, + 4, + 4, + { { 8 }, { 8 }, { 8 }, { 8 } }, + { { 0 }, { 8 }, { 16 }, { 24 } } }, + + { SVGA3D_B8G8R8X8_TYPELESS, + SVGA3DBLOCKDESC_TYPELESS, + { 1, 1, 1 }, + 4, + 4, + { { 8 }, { 8 }, { 8 }, { 0 } }, + { { 0 }, { 8 }, { 16 }, { 24 } } }, + + { SVGA3D_B8G8R8X8_UNORM_SRGB, + SVGA3DBLOCKDESC_RGB_UNORM_SRGB, + { 1, 1, 1 }, + 4, + 4, + { { 8 }, { 8 }, { 8 }, { 0 } }, + { { 0 }, { 8 }, { 16 }, { 24 } } }, + + { SVGA3D_Z_DF16, + SVGA3DBLOCKDESC_DEPTH_UNORM, + { 1, 1, 1 }, + 2, + 2, + { { 0 }, { 0 }, { 16 }, { 0 } }, + { { 0 }, { 0 }, { 0 }, { 0 } } }, + + { SVGA3D_Z_DF24, + SVGA3DBLOCKDESC_DEPTH_UNORM, + { 1, 1, 1 }, + 4, + 4, + { { 0 }, { 0 }, { 24 }, { 0 } }, + { { 0 }, { 0 }, { 8 }, { 0 } } }, + + { SVGA3D_Z_D24S8_INT, + SVGA3DBLOCKDESC_DS_UNORM, + { 1, 1, 1 }, + 4, + 4, + { { 0 }, { 8 }, { 24 }, { 0 } }, + { { 0 }, { 0 }, { 8 }, { 0 } } }, + + { SVGA3D_YV12, + SVGA3DBLOCKDESC_YV12, + { 2, 2, 1 }, + 6, + 2, + { { 0 }, { 0 }, { 48 }, { 0 } }, + { { 0 }, { 0 }, { 0 }, { 0 } } }, + + { SVGA3D_R32G32B32A32_FLOAT, + SVGA3DBLOCKDESC_RGBA_FP, + { 1, 1, 1 }, + 16, + 16, + { { 32 }, { 32 }, { 32 }, { 32 } }, + { { 64 }, { 32 }, { 0 }, { 96 } } }, + + { SVGA3D_R16G16B16A16_FLOAT, + SVGA3DBLOCKDESC_RGBA_FP, + { 1, 1, 1 }, + 8, + 8, + { { 16 }, { 16 }, { 16 }, { 16 } }, + { { 32 }, { 16 }, { 0 }, { 48 } } }, + + { SVGA3D_R16G16B16A16_UNORM, + SVGA3DBLOCKDESC_RGBA_UNORM, + { 1, 1, 1 }, + 8, + 8, + { { 16 }, { 16 }, { 16 }, { 16 } }, + { { 32 }, { 16 }, { 0 }, { 48 } } }, + + { SVGA3D_R32G32_FLOAT, + SVGA3DBLOCKDESC_RG_FP, + { 1, 1, 1 }, + 8, + 8, + { { 0 }, { 32 }, { 32 }, { 0 } }, + { { 0 }, { 32 }, { 0 }, { 0 } } }, + + { SVGA3D_R10G10B10A2_UNORM, + SVGA3DBLOCKDESC_RGBA_UNORM, + { 1, 1, 1 }, + 4, + 4, + { { 10 }, { 10 }, { 10 }, { 2 } }, + { { 20 }, { 10 }, { 0 }, { 30 } } }, + + { SVGA3D_R8G8B8A8_SNORM, + SVGA3DBLOCKDESC_RGBA_SNORM, + { 1, 1, 1 }, + 4, + 4, + { { 8 }, { 8 }, { 8 }, { 8 } }, + { { 16 }, { 8 }, { 0 }, { 24 } } }, + + { SVGA3D_R16G16_FLOAT, + SVGA3DBLOCKDESC_RG_FP, + { 1, 1, 1 }, + 4, + 4, + { { 0 }, { 16 }, { 16 }, { 0 } }, + { { 0 }, { 16 }, { 0 }, { 0 } } }, + + { SVGA3D_R16G16_UNORM, + SVGA3DBLOCKDESC_RG_UNORM, + { 1, 1, 1 }, + 4, + 4, + { { 0 }, { 16 }, { 16 }, { 0 } }, + { { 0 }, { 16 }, { 0 }, { 0 } } }, + + { SVGA3D_R16G16_SNORM, + SVGA3DBLOCKDESC_RG_SNORM, + { 1, 1, 1 }, + 4, + 4, + { { 0 }, { 16 }, { 16 }, { 0 } }, + { { 0 }, { 16 }, { 0 }, { 0 } } }, + + { SVGA3D_R32_FLOAT, + SVGA3DBLOCKDESC_R_FP, + { 1, 1, 1 }, + 4, + 4, + { { 0 }, { 0 }, { 32 }, { 0 } }, + { { 0 }, { 0 }, { 0 }, { 0 } } }, + + { SVGA3D_R8G8_SNORM, + SVGA3DBLOCKDESC_RG_SNORM, + { 1, 1, 1 }, + 2, + 2, + { { 0 }, { 8 }, { 8 }, { 0 } }, + { { 0 }, { 8 }, { 0 }, { 0 } } }, + + { SVGA3D_R16_FLOAT, + SVGA3DBLOCKDESC_R_FP, + { 1, 1, 1 }, + 2, + 2, + { { 0 }, { 0 }, { 16 }, { 0 } }, + { { 0 }, { 0 }, { 0 }, { 0 } } }, + + { SVGA3D_D16_UNORM, + SVGA3DBLOCKDESC_DEPTH_UNORM, + { 1, 1, 1 }, + 2, + 2, + { { 0 }, { 0 }, { 16 }, { 0 } }, + { { 0 }, { 0 }, { 0 }, { 0 } } }, + + { SVGA3D_A8_UNORM, + SVGA3DBLOCKDESC_A_UNORM, + { 1, 1, 1 }, + 1, + 1, + { { 0 }, { 0 }, { 0 }, { 8 } }, + { { 0 }, { 0 }, { 0 }, { 0 } } }, + + { SVGA3D_BC1_UNORM, + SVGA3DBLOCKDESC_BC1_COMP_UNORM, + { 4, 4, 1 }, + 8, + 8, + { { 0 }, { 0 }, { 64 }, { 0 } }, + { { 0 }, { 0 }, { 0 }, { 0 } } }, + + { SVGA3D_BC2_UNORM, + SVGA3DBLOCKDESC_BC2_COMP_UNORM, + { 4, 4, 1 }, + 16, + 16, + { { 0 }, { 0 }, { 128 }, { 0 } }, + { { 0 }, { 0 }, { 0 }, { 0 } } }, + + { SVGA3D_BC3_UNORM, + SVGA3DBLOCKDESC_BC3_COMP_UNORM, + { 4, 4, 1 }, + 16, + 16, + { { 0 }, { 0 }, { 128 }, { 0 } }, + { { 0 }, { 0 }, { 0 }, { 0 } } }, + + { SVGA3D_B5G6R5_UNORM, + SVGA3DBLOCKDESC_RGB_UNORM, + { 1, 1, 1 }, + 2, + 2, + { { 5 }, { 6 }, { 5 }, { 0 } }, + { { 0 }, { 5 }, { 11 }, { 0 } } }, + + { SVGA3D_B5G5R5A1_UNORM, + SVGA3DBLOCKDESC_RGBA_UNORM, + { 1, 1, 1 }, + 2, + 2, + { { 5 }, { 5 }, { 5 }, { 1 } }, + { { 0 }, { 5 }, { 10 }, { 15 } } }, + + { SVGA3D_B8G8R8A8_UNORM, + SVGA3DBLOCKDESC_RGBA_UNORM, + { 1, 1, 1 }, + 4, + 4, + { { 8 }, { 8 }, { 8 }, { 8 } }, + { { 0 }, { 8 }, { 16 }, { 24 } } }, + + { SVGA3D_B8G8R8X8_UNORM, + SVGA3DBLOCKDESC_RGB_UNORM, + { 1, 1, 1 }, + 4, + 4, + { { 8 }, { 8 }, { 8 }, { 0 } }, + { { 0 }, { 8 }, { 16 }, { 24 } } }, + + { SVGA3D_BC4_UNORM, + SVGA3DBLOCKDESC_BC4_COMP_UNORM, + { 4, 4, 1 }, + 8, + 8, + { { 0 }, { 0 }, { 64 }, { 0 } }, + { { 0 }, { 0 }, { 0 }, { 0 } } }, + + { SVGA3D_BC5_UNORM, + SVGA3DBLOCKDESC_BC5_COMP_UNORM, + { 4, 4, 1 }, + 16, + 16, + { { 0 }, { 0 }, { 128 }, { 0 } }, + { { 0 }, { 0 }, { 0 }, { 0 } } }, + + { SVGA3D_B4G4R4A4_UNORM, + SVGA3DBLOCKDESC_RGBA_UNORM, + { 1, 1, 1 }, + 2, + 2, + { { 4 }, { 4 }, { 4 }, { 4 } }, + { { 0 }, { 4 }, { 8 }, { 12 } } }, + + { SVGA3D_BC6H_TYPELESS, + SVGA3DBLOCKDESC_BC6H_COMP_TYPELESS, + { 4, 4, 1 }, + 16, + 16, + { { 0 }, { 0 }, { 128 }, { 0 } }, + { { 0 }, { 0 }, { 0 }, { 0 } } }, + + { SVGA3D_BC6H_UF16, + SVGA3DBLOCKDESC_BC6H_COMP_UF16, + { 4, 4, 1 }, + 16, + 16, + { { 0 }, { 0 }, { 128 }, { 0 } }, + { { 0 }, { 0 }, { 0 }, { 0 } } }, + + { SVGA3D_BC6H_SF16, + SVGA3DBLOCKDESC_BC6H_COMP_SF16, + { 4, 4, 1 }, + 16, + 16, + { { 0 }, { 0 }, { 128 }, { 0 } }, + { { 0 }, { 0 }, { 0 }, { 0 } } }, + + { SVGA3D_BC7_TYPELESS, + SVGA3DBLOCKDESC_BC7_COMP_TYPELESS, + { 4, 4, 1 }, + 16, + 16, + { { 0 }, { 0 }, { 128 }, { 0 } }, + { { 0 }, { 0 }, { 0 }, { 0 } } }, + + { SVGA3D_BC7_UNORM, + SVGA3DBLOCKDESC_BC7_COMP_UNORM, + { 4, 4, 1 }, + 16, + 16, + { { 0 }, { 0 }, { 128 }, { 0 } }, + { { 0 }, { 0 }, { 0 }, { 0 } } }, + + { SVGA3D_BC7_UNORM_SRGB, + SVGA3DBLOCKDESC_BC7_COMP_UNORM_SRGB, + { 4, 4, 1 }, + 16, + 16, + { { 0 }, { 0 }, { 128 }, { 0 } }, + { { 0 }, { 0 }, { 0 }, { 0 } } }, + + { SVGA3D_AYUV, + SVGA3DBLOCKDESC_AYUV, + { 1, 1, 1 }, + 4, + 4, + { { 8 }, { 8 }, { 8 }, { 8 } }, + { { 0 }, { 8 }, { 16 }, { 24 } } }, + + { SVGA3D_R11G11B10_TYPELESS, + SVGA3DBLOCKDESC_TYPELESS, + { 1, 1, 1 }, + 4, + 4, + { { 10 }, { 11 }, { 11 }, { 0 } }, + { { 22 }, { 11 }, { 0 }, { 0 } } }, }; -/** - * svga3dsurface_subres - Compute the subresource from layer and mipmap. - * @cache: Surface layout data. - * @mip_level: The mipmap level. - * @layer: The surface layer (face or array slice). - * - * Return: The subresource. - */ -static inline u32 svga3dsurface_subres(const struct svga3dsurface_cache *cache, - u32 mip_level, u32 layer) -{ - return cache->num_mip_levels * layer + mip_level; -} - -/** - * svga3dsurface_setup_cache - Build a surface cache entry - * @size: The surface base level dimensions. - * @format: The surface format. - * @num_mip_levels: Number of mipmap levels. - * @num_layers: Number of layers. - * @cache: Pointer to a struct svga3dsurface_cach object to be filled in. - * - * Return: Zero on success, -EINVAL on invalid surface layout. - */ -static inline int svga3dsurface_setup_cache(const struct drm_vmw_size *size, - SVGA3dSurfaceFormat format, - u32 num_mip_levels, - u32 num_layers, - u32 num_samples, - struct svga3dsurface_cache *cache) -{ - const struct svga3d_surface_desc *desc; - u32 i; - - memset(cache, 0, sizeof(*cache)); - cache->desc = desc = svga3dsurface_get_desc(format); - cache->num_mip_levels = num_mip_levels; - cache->num_layers = num_layers; - for (i = 0; i < cache->num_mip_levels; i++) { - struct svga3dsurface_mip *mip = &cache->mip[i]; - - mip->size = svga3dsurface_get_mip_size(*size, i); - mip->bytes = svga3dsurface_get_image_buffer_size - (desc, &mip->size, 0); - mip->row_stride = - __KERNEL_DIV_ROUND_UP(mip->size.width, - desc->block_size.width) * - desc->bytes_per_block * num_samples; - if (!mip->row_stride) - goto invalid_dim; - - mip->img_stride = - __KERNEL_DIV_ROUND_UP(mip->size.height, - desc->block_size.height) * - mip->row_stride; - if (!mip->img_stride) - goto invalid_dim; - - cache->mip_chain_bytes += mip->bytes; - } - cache->sheet_bytes = cache->mip_chain_bytes * num_layers; - if (!cache->sheet_bytes) - goto invalid_dim; - - return 0; - -invalid_dim: - VMW_DEBUG_USER("Invalid surface layout for dirty tracking.\n"); - return -EINVAL; -} - -/** - * svga3dsurface_get_loc - Get a surface location from an offset into the - * backing store - * @cache: Surface layout data. - * @loc: Pointer to a struct svga3dsurface_loc to be filled in. - * @offset: Offset into the surface backing store. - */ -static inline void -svga3dsurface_get_loc(const struct svga3dsurface_cache *cache, - struct svga3dsurface_loc *loc, - size_t offset) -{ - const struct svga3dsurface_mip *mip = &cache->mip[0]; - const struct svga3d_surface_desc *desc = cache->desc; - u32 layer; - int i; - - loc->sheet = offset / cache->sheet_bytes; - offset -= loc->sheet * cache->sheet_bytes; - - layer = offset / cache->mip_chain_bytes; - offset -= layer * cache->mip_chain_bytes; - for (i = 0; i < cache->num_mip_levels; ++i, ++mip) { - if (mip->bytes > offset) - break; - offset -= mip->bytes; - } - - loc->sub_resource = svga3dsurface_subres(cache, i, layer); - loc->z = offset / mip->img_stride; - offset -= loc->z * mip->img_stride; - loc->z *= desc->block_size.depth; - loc->y = offset / mip->row_stride; - offset -= loc->y * mip->row_stride; - loc->y *= desc->block_size.height; - loc->x = offset / desc->bytes_per_block; - loc->x *= desc->block_size.width; -} - -/** - * svga3dsurface_inc_loc - Clamp increment a surface location with one block - * size - * in each dimension. - * @loc: Pointer to a struct svga3dsurface_loc to be incremented. - * - * When computing the size of a range as size = end - start, the range does not - * include the end element. However a location representing the last byte - * of a touched region in the backing store *is* included in the range. - * This function modifies such a location to match the end definition - * given as start + size which is the one used in a SVGA3dBox. - */ -static inline void -svga3dsurface_inc_loc(const struct svga3dsurface_cache *cache, - struct svga3dsurface_loc *loc) -{ - const struct svga3d_surface_desc *desc = cache->desc; - u32 mip = loc->sub_resource % cache->num_mip_levels; - const struct drm_vmw_size *size = &cache->mip[mip].size; - - loc->sub_resource++; - loc->x += desc->block_size.width; - if (loc->x > size->width) - loc->x = size->width; - loc->y += desc->block_size.height; - if (loc->y > size->height) - loc->y = size->height; - loc->z += desc->block_size.depth; - if (loc->z > size->depth) - loc->z = size->depth; -} - -/** - * svga3dsurface_min_loc - The start location in a subresource - * @cache: Surface layout data. - * @sub_resource: The subresource. - * @loc: Pointer to a struct svga3dsurface_loc to be filled in. - */ -static inline void -svga3dsurface_min_loc(const struct svga3dsurface_cache *cache, - u32 sub_resource, - struct svga3dsurface_loc *loc) -{ - loc->sheet = 0; - loc->sub_resource = sub_resource; - loc->x = loc->y = loc->z = 0; -} - -/** - * svga3dsurface_min_loc - The end location in a subresource - * @cache: Surface layout data. - * @sub_resource: The subresource. - * @loc: Pointer to a struct svga3dsurface_loc to be filled in. - * - * Following the end definition given in svga3dsurface_inc_loc(), - * Compute the end location of a surface subresource. - */ -static inline void -svga3dsurface_max_loc(const struct svga3dsurface_cache *cache, - u32 sub_resource, - struct svga3dsurface_loc *loc) -{ - const struct drm_vmw_size *size; - u32 mip; - - loc->sheet = 0; - loc->sub_resource = sub_resource + 1; - mip = sub_resource % cache->num_mip_levels; - size = &cache->mip[mip].size; - loc->x = size->width; - loc->y = size->height; - loc->z = size->depth; +#ifdef __cplusplus } +#endif -#endif /* _SVGA3D_SURFACEDEFS_H_ */ +#endif diff --git a/drivers/gpu/drm/vmwgfx/device_include/svga3d_types.h b/drivers/gpu/drm/vmwgfx/device_include/svga3d_types.h index 77e338a65791..70b88ee16cf6 100644 --- a/drivers/gpu/drm/vmwgfx/device_include/svga3d_types.h +++ b/drivers/gpu/drm/vmwgfx/device_include/svga3d_types.h @@ -1,6 +1,6 @@ -/* SPDX-License-Identifier: GPL-2.0 OR MIT */ /********************************************************** - * Copyright 2012-2015 VMware, Inc. + * Copyright 2012-2021 VMware, Inc. + * SPDX-License-Identifier: GPL-2.0 OR MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation @@ -27,1974 +27,1530 @@ /* * svga3d_types.h -- * - * SVGA 3d hardware definitions for basic types + * SVGA 3d hardware definitions for basic types */ + + #ifndef _SVGA3D_TYPES_H_ #define _SVGA3D_TYPES_H_ -#define INCLUDE_ALLOW_MODULE -#define INCLUDE_ALLOW_USERLEVEL -#define INCLUDE_ALLOW_VMCORE - -#include "includeCheck.h" +#include "vm_basic_types.h" -/* - * Generic Types - */ +#define SVGA3D_INVALID_ID ((uint32)-1) -#define SVGA3D_INVALID_ID ((uint32)-1) +#define SVGA3D_RESOURCE_TYPE_MIN 1 +#define SVGA3D_RESOURCE_BUFFER 1 +#define SVGA3D_RESOURCE_TEXTURE1D 2 +#define SVGA3D_RESOURCE_TEXTURE2D 3 +#define SVGA3D_RESOURCE_TEXTURE3D 4 +#define SVGA3D_RESOURCE_TEXTURECUBE 5 +#define SVGA3D_RESOURCE_TYPE_DX10_MAX 6 +#define SVGA3D_RESOURCE_BUFFEREX 6 +#define SVGA3D_RESOURCE_TYPE_MAX 7 +typedef uint32 SVGA3dResourceType; -typedef uint8 SVGABool8; /* 8-bit Bool definition */ -typedef uint32 SVGA3dBool; /* 32-bit Bool definition */ -typedef uint32 SVGA3dColor; /* a, r, g, b */ +typedef uint8 SVGABool8; +typedef uint32 SVGA3dBool; +typedef uint32 SVGA3dColor; typedef uint32 SVGA3dSurfaceId; -typedef -#include "vmware_pack_begin.h" -struct { - uint32 numerator; - uint32 denominator; -} -#include "vmware_pack_end.h" -SVGA3dFraction64; - -typedef -#include "vmware_pack_begin.h" -struct SVGA3dCopyRect { - uint32 x; - uint32 y; - uint32 w; - uint32 h; - uint32 srcx; - uint32 srcy; -} -#include "vmware_pack_end.h" -SVGA3dCopyRect; - -typedef -#include "vmware_pack_begin.h" -struct SVGA3dCopyBox { - uint32 x; - uint32 y; - uint32 z; - uint32 w; - uint32 h; - uint32 d; - uint32 srcx; - uint32 srcy; - uint32 srcz; -} -#include "vmware_pack_end.h" -SVGA3dCopyBox; - -typedef -#include "vmware_pack_begin.h" -struct SVGA3dRect { - uint32 x; - uint32 y; - uint32 w; - uint32 h; -} -#include "vmware_pack_end.h" -SVGA3dRect; - -typedef -#include "vmware_pack_begin.h" -struct { - uint32 x; - uint32 y; - uint32 z; - uint32 w; - uint32 h; - uint32 d; -} -#include "vmware_pack_end.h" -SVGA3dBox; - -typedef -#include "vmware_pack_begin.h" -struct { - int32 x; - int32 y; - int32 z; - int32 w; - int32 h; - int32 d; -} -#include "vmware_pack_end.h" -SVGA3dSignedBox; - -typedef -#include "vmware_pack_begin.h" -struct { - uint32 x; - uint32 y; - uint32 z; -} -#include "vmware_pack_end.h" -SVGA3dPoint; +#pragma pack(push, 1) +typedef struct { + uint32 numerator; + uint32 denominator; +} SVGA3dFraction64; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct SVGA3dCopyRect { + uint32 x; + uint32 y; + uint32 w; + uint32 h; + uint32 srcx; + uint32 srcy; +} SVGA3dCopyRect; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct SVGA3dCopyBox { + uint32 x; + uint32 y; + uint32 z; + uint32 w; + uint32 h; + uint32 d; + uint32 srcx; + uint32 srcy; + uint32 srcz; +} SVGA3dCopyBox; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct SVGA3dRect { + uint32 x; + uint32 y; + uint32 w; + uint32 h; +} SVGA3dRect; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct { + uint32 x; + uint32 y; + uint32 z; + uint32 w; + uint32 h; + uint32 d; +} SVGA3dBox; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct { + int32 x; + int32 y; + int32 z; + int32 w; + int32 h; + int32 d; +} SVGA3dSignedBox; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct { + uint32 x; + uint32 y; + uint32 z; +} SVGA3dPoint; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef union { + struct { + float r; + float g; + float b; + float a; + }; + + float value[4]; +} SVGA3dRGBAFloat; +#pragma pack(pop) -/* - * Surface formats. - */ typedef enum SVGA3dSurfaceFormat { - SVGA3D_FORMAT_INVALID = 0, - - SVGA3D_X8R8G8B8 = 1, - SVGA3D_FORMAT_MIN = 1, - - SVGA3D_A8R8G8B8 = 2, - - SVGA3D_R5G6B5 = 3, - SVGA3D_X1R5G5B5 = 4, - SVGA3D_A1R5G5B5 = 5, - SVGA3D_A4R4G4B4 = 6, - - SVGA3D_Z_D32 = 7, - SVGA3D_Z_D16 = 8, - SVGA3D_Z_D24S8 = 9, - SVGA3D_Z_D15S1 = 10, - - SVGA3D_LUMINANCE8 = 11, - SVGA3D_LUMINANCE4_ALPHA4 = 12, - SVGA3D_LUMINANCE16 = 13, - SVGA3D_LUMINANCE8_ALPHA8 = 14, - - SVGA3D_DXT1 = 15, - SVGA3D_DXT2 = 16, - SVGA3D_DXT3 = 17, - SVGA3D_DXT4 = 18, - SVGA3D_DXT5 = 19, - - SVGA3D_BUMPU8V8 = 20, - SVGA3D_BUMPL6V5U5 = 21, - SVGA3D_BUMPX8L8V8U8 = 22, - SVGA3D_FORMAT_DEAD1 = 23, - - SVGA3D_ARGB_S10E5 = 24, /* 16-bit floating-point ARGB */ - SVGA3D_ARGB_S23E8 = 25, /* 32-bit floating-point ARGB */ - - SVGA3D_A2R10G10B10 = 26, - - /* signed formats */ - SVGA3D_V8U8 = 27, - SVGA3D_Q8W8V8U8 = 28, - SVGA3D_CxV8U8 = 29, - - /* mixed formats */ - SVGA3D_X8L8V8U8 = 30, - SVGA3D_A2W10V10U10 = 31, - - SVGA3D_ALPHA8 = 32, - - /* Single- and dual-component floating point formats */ - SVGA3D_R_S10E5 = 33, - SVGA3D_R_S23E8 = 34, - SVGA3D_RG_S10E5 = 35, - SVGA3D_RG_S23E8 = 36, - - SVGA3D_BUFFER = 37, - - SVGA3D_Z_D24X8 = 38, - - SVGA3D_V16U16 = 39, - - SVGA3D_G16R16 = 40, - SVGA3D_A16B16G16R16 = 41, - - /* Packed Video formats */ - SVGA3D_UYVY = 42, - SVGA3D_YUY2 = 43, - - /* Planar video formats */ - SVGA3D_NV12 = 44, - - SVGA3D_FORMAT_DEAD2 = 45, - - SVGA3D_R32G32B32A32_TYPELESS = 46, - SVGA3D_R32G32B32A32_UINT = 47, - SVGA3D_R32G32B32A32_SINT = 48, - SVGA3D_R32G32B32_TYPELESS = 49, - SVGA3D_R32G32B32_FLOAT = 50, - SVGA3D_R32G32B32_UINT = 51, - SVGA3D_R32G32B32_SINT = 52, - SVGA3D_R16G16B16A16_TYPELESS = 53, - SVGA3D_R16G16B16A16_UINT = 54, - SVGA3D_R16G16B16A16_SNORM = 55, - SVGA3D_R16G16B16A16_SINT = 56, - SVGA3D_R32G32_TYPELESS = 57, - SVGA3D_R32G32_UINT = 58, - SVGA3D_R32G32_SINT = 59, - SVGA3D_R32G8X24_TYPELESS = 60, - SVGA3D_D32_FLOAT_S8X24_UINT = 61, - SVGA3D_R32_FLOAT_X8X24 = 62, - SVGA3D_X32_G8X24_UINT = 63, - SVGA3D_R10G10B10A2_TYPELESS = 64, - SVGA3D_R10G10B10A2_UINT = 65, - SVGA3D_R11G11B10_FLOAT = 66, - SVGA3D_R8G8B8A8_TYPELESS = 67, - SVGA3D_R8G8B8A8_UNORM = 68, - SVGA3D_R8G8B8A8_UNORM_SRGB = 69, - SVGA3D_R8G8B8A8_UINT = 70, - SVGA3D_R8G8B8A8_SINT = 71, - SVGA3D_R16G16_TYPELESS = 72, - SVGA3D_R16G16_UINT = 73, - SVGA3D_R16G16_SINT = 74, - SVGA3D_R32_TYPELESS = 75, - SVGA3D_D32_FLOAT = 76, - SVGA3D_R32_UINT = 77, - SVGA3D_R32_SINT = 78, - SVGA3D_R24G8_TYPELESS = 79, - SVGA3D_D24_UNORM_S8_UINT = 80, - SVGA3D_R24_UNORM_X8 = 81, - SVGA3D_X24_G8_UINT = 82, - SVGA3D_R8G8_TYPELESS = 83, - SVGA3D_R8G8_UNORM = 84, - SVGA3D_R8G8_UINT = 85, - SVGA3D_R8G8_SINT = 86, - SVGA3D_R16_TYPELESS = 87, - SVGA3D_R16_UNORM = 88, - SVGA3D_R16_UINT = 89, - SVGA3D_R16_SNORM = 90, - SVGA3D_R16_SINT = 91, - SVGA3D_R8_TYPELESS = 92, - SVGA3D_R8_UNORM = 93, - SVGA3D_R8_UINT = 94, - SVGA3D_R8_SNORM = 95, - SVGA3D_R8_SINT = 96, - SVGA3D_P8 = 97, - SVGA3D_R9G9B9E5_SHAREDEXP = 98, - SVGA3D_R8G8_B8G8_UNORM = 99, - SVGA3D_G8R8_G8B8_UNORM = 100, - SVGA3D_BC1_TYPELESS = 101, - SVGA3D_BC1_UNORM_SRGB = 102, - SVGA3D_BC2_TYPELESS = 103, - SVGA3D_BC2_UNORM_SRGB = 104, - SVGA3D_BC3_TYPELESS = 105, - SVGA3D_BC3_UNORM_SRGB = 106, - SVGA3D_BC4_TYPELESS = 107, - SVGA3D_ATI1 = 108, /* DX9-specific BC4_UNORM */ - SVGA3D_BC4_SNORM = 109, - SVGA3D_BC5_TYPELESS = 110, - SVGA3D_ATI2 = 111, /* DX9-specific BC5_UNORM */ - SVGA3D_BC5_SNORM = 112, - SVGA3D_R10G10B10_XR_BIAS_A2_UNORM = 113, - SVGA3D_B8G8R8A8_TYPELESS = 114, - SVGA3D_B8G8R8A8_UNORM_SRGB = 115, - SVGA3D_B8G8R8X8_TYPELESS = 116, - SVGA3D_B8G8R8X8_UNORM_SRGB = 117, - - /* Advanced depth formats. */ - SVGA3D_Z_DF16 = 118, - SVGA3D_Z_DF24 = 119, - SVGA3D_Z_D24S8_INT = 120, - - /* Planar video formats. */ - SVGA3D_YV12 = 121, - - SVGA3D_R32G32B32A32_FLOAT = 122, - SVGA3D_R16G16B16A16_FLOAT = 123, - SVGA3D_R16G16B16A16_UNORM = 124, - SVGA3D_R32G32_FLOAT = 125, - SVGA3D_R10G10B10A2_UNORM = 126, - SVGA3D_R8G8B8A8_SNORM = 127, - SVGA3D_R16G16_FLOAT = 128, - SVGA3D_R16G16_UNORM = 129, - SVGA3D_R16G16_SNORM = 130, - SVGA3D_R32_FLOAT = 131, - SVGA3D_R8G8_SNORM = 132, - SVGA3D_R16_FLOAT = 133, - SVGA3D_D16_UNORM = 134, - SVGA3D_A8_UNORM = 135, - SVGA3D_BC1_UNORM = 136, - SVGA3D_BC2_UNORM = 137, - SVGA3D_BC3_UNORM = 138, - SVGA3D_B5G6R5_UNORM = 139, - SVGA3D_B5G5R5A1_UNORM = 140, - SVGA3D_B8G8R8A8_UNORM = 141, - SVGA3D_B8G8R8X8_UNORM = 142, - SVGA3D_BC4_UNORM = 143, - SVGA3D_BC5_UNORM = 144, - SVGA3D_B4G4R4A4_UNORM = 145, - - /* DX11 compressed formats */ - SVGA3D_BC6H_TYPELESS = 146, - SVGA3D_BC6H_UF16 = 147, - SVGA3D_BC6H_SF16 = 148, - SVGA3D_BC7_TYPELESS = 149, - SVGA3D_BC7_UNORM = 150, - SVGA3D_BC7_UNORM_SRGB = 151, - - /* Video format with alpha */ - SVGA3D_AYUV = 152, - - SVGA3D_FORMAT_MAX + SVGA3D_FORMAT_INVALID = 0, + + SVGA3D_X8R8G8B8 = 1, + SVGA3D_FORMAT_MIN = 1, + + SVGA3D_A8R8G8B8 = 2, + + SVGA3D_R5G6B5 = 3, + SVGA3D_X1R5G5B5 = 4, + SVGA3D_A1R5G5B5 = 5, + SVGA3D_A4R4G4B4 = 6, + + SVGA3D_Z_D32 = 7, + SVGA3D_Z_D16 = 8, + SVGA3D_Z_D24S8 = 9, + SVGA3D_Z_D15S1 = 10, + + SVGA3D_LUMINANCE8 = 11, + SVGA3D_LUMINANCE4_ALPHA4 = 12, + SVGA3D_LUMINANCE16 = 13, + SVGA3D_LUMINANCE8_ALPHA8 = 14, + + SVGA3D_DXT1 = 15, + SVGA3D_DXT2 = 16, + SVGA3D_DXT3 = 17, + SVGA3D_DXT4 = 18, + SVGA3D_DXT5 = 19, + + SVGA3D_BUMPU8V8 = 20, + SVGA3D_BUMPL6V5U5 = 21, + SVGA3D_BUMPX8L8V8U8 = 22, + SVGA3D_FORMAT_DEAD1 = 23, + + SVGA3D_ARGB_S10E5 = 24, + SVGA3D_ARGB_S23E8 = 25, + + SVGA3D_A2R10G10B10 = 26, + + SVGA3D_V8U8 = 27, + SVGA3D_Q8W8V8U8 = 28, + SVGA3D_CxV8U8 = 29, + + SVGA3D_X8L8V8U8 = 30, + SVGA3D_A2W10V10U10 = 31, + + SVGA3D_ALPHA8 = 32, + + SVGA3D_R_S10E5 = 33, + SVGA3D_R_S23E8 = 34, + SVGA3D_RG_S10E5 = 35, + SVGA3D_RG_S23E8 = 36, + + SVGA3D_BUFFER = 37, + + SVGA3D_Z_D24X8 = 38, + + SVGA3D_V16U16 = 39, + + SVGA3D_G16R16 = 40, + SVGA3D_A16B16G16R16 = 41, + + SVGA3D_UYVY = 42, + SVGA3D_YUY2 = 43, + + SVGA3D_NV12 = 44, + + SVGA3D_FORMAT_DEAD2 = 45, + + SVGA3D_R32G32B32A32_TYPELESS = 46, + SVGA3D_R32G32B32A32_UINT = 47, + SVGA3D_R32G32B32A32_SINT = 48, + SVGA3D_R32G32B32_TYPELESS = 49, + SVGA3D_R32G32B32_FLOAT = 50, + SVGA3D_R32G32B32_UINT = 51, + SVGA3D_R32G32B32_SINT = 52, + SVGA3D_R16G16B16A16_TYPELESS = 53, + SVGA3D_R16G16B16A16_UINT = 54, + SVGA3D_R16G16B16A16_SNORM = 55, + SVGA3D_R16G16B16A16_SINT = 56, + SVGA3D_R32G32_TYPELESS = 57, + SVGA3D_R32G32_UINT = 58, + SVGA3D_R32G32_SINT = 59, + SVGA3D_R32G8X24_TYPELESS = 60, + SVGA3D_D32_FLOAT_S8X24_UINT = 61, + SVGA3D_R32_FLOAT_X8X24 = 62, + SVGA3D_X32_G8X24_UINT = 63, + SVGA3D_R10G10B10A2_TYPELESS = 64, + SVGA3D_R10G10B10A2_UINT = 65, + SVGA3D_R11G11B10_FLOAT = 66, + SVGA3D_R8G8B8A8_TYPELESS = 67, + SVGA3D_R8G8B8A8_UNORM = 68, + SVGA3D_R8G8B8A8_UNORM_SRGB = 69, + SVGA3D_R8G8B8A8_UINT = 70, + SVGA3D_R8G8B8A8_SINT = 71, + SVGA3D_R16G16_TYPELESS = 72, + SVGA3D_R16G16_UINT = 73, + SVGA3D_R16G16_SINT = 74, + SVGA3D_R32_TYPELESS = 75, + SVGA3D_D32_FLOAT = 76, + SVGA3D_R32_UINT = 77, + SVGA3D_R32_SINT = 78, + SVGA3D_R24G8_TYPELESS = 79, + SVGA3D_D24_UNORM_S8_UINT = 80, + SVGA3D_R24_UNORM_X8 = 81, + SVGA3D_X24_G8_UINT = 82, + SVGA3D_R8G8_TYPELESS = 83, + SVGA3D_R8G8_UNORM = 84, + SVGA3D_R8G8_UINT = 85, + SVGA3D_R8G8_SINT = 86, + SVGA3D_R16_TYPELESS = 87, + SVGA3D_R16_UNORM = 88, + SVGA3D_R16_UINT = 89, + SVGA3D_R16_SNORM = 90, + SVGA3D_R16_SINT = 91, + SVGA3D_R8_TYPELESS = 92, + SVGA3D_R8_UNORM = 93, + SVGA3D_R8_UINT = 94, + SVGA3D_R8_SNORM = 95, + SVGA3D_R8_SINT = 96, + SVGA3D_P8 = 97, + SVGA3D_R9G9B9E5_SHAREDEXP = 98, + SVGA3D_R8G8_B8G8_UNORM = 99, + SVGA3D_G8R8_G8B8_UNORM = 100, + SVGA3D_BC1_TYPELESS = 101, + SVGA3D_BC1_UNORM_SRGB = 102, + SVGA3D_BC2_TYPELESS = 103, + SVGA3D_BC2_UNORM_SRGB = 104, + SVGA3D_BC3_TYPELESS = 105, + SVGA3D_BC3_UNORM_SRGB = 106, + SVGA3D_BC4_TYPELESS = 107, + SVGA3D_ATI1 = 108, + SVGA3D_BC4_SNORM = 109, + SVGA3D_BC5_TYPELESS = 110, + SVGA3D_ATI2 = 111, + SVGA3D_BC5_SNORM = 112, + SVGA3D_R10G10B10_XR_BIAS_A2_UNORM = 113, + SVGA3D_B8G8R8A8_TYPELESS = 114, + SVGA3D_B8G8R8A8_UNORM_SRGB = 115, + SVGA3D_B8G8R8X8_TYPELESS = 116, + SVGA3D_B8G8R8X8_UNORM_SRGB = 117, + + SVGA3D_Z_DF16 = 118, + SVGA3D_Z_DF24 = 119, + SVGA3D_Z_D24S8_INT = 120, + + SVGA3D_YV12 = 121, + + SVGA3D_R32G32B32A32_FLOAT = 122, + SVGA3D_R16G16B16A16_FLOAT = 123, + SVGA3D_R16G16B16A16_UNORM = 124, + SVGA3D_R32G32_FLOAT = 125, + SVGA3D_R10G10B10A2_UNORM = 126, + SVGA3D_R8G8B8A8_SNORM = 127, + SVGA3D_R16G16_FLOAT = 128, + SVGA3D_R16G16_UNORM = 129, + SVGA3D_R16G16_SNORM = 130, + SVGA3D_R32_FLOAT = 131, + SVGA3D_R8G8_SNORM = 132, + SVGA3D_R16_FLOAT = 133, + SVGA3D_D16_UNORM = 134, + SVGA3D_A8_UNORM = 135, + SVGA3D_BC1_UNORM = 136, + SVGA3D_BC2_UNORM = 137, + SVGA3D_BC3_UNORM = 138, + SVGA3D_B5G6R5_UNORM = 139, + SVGA3D_B5G5R5A1_UNORM = 140, + SVGA3D_B8G8R8A8_UNORM = 141, + SVGA3D_B8G8R8X8_UNORM = 142, + SVGA3D_BC4_UNORM = 143, + SVGA3D_BC5_UNORM = 144, + SVGA3D_B4G4R4A4_UNORM = 145, + + SVGA3D_BC6H_TYPELESS = 146, + SVGA3D_BC6H_UF16 = 147, + SVGA3D_BC6H_SF16 = 148, + SVGA3D_BC7_TYPELESS = 149, + SVGA3D_BC7_UNORM = 150, + SVGA3D_BC7_UNORM_SRGB = 151, + + SVGA3D_AYUV = 152, + + SVGA3D_R11G11B10_TYPELESS = 153, + + SVGA3D_FORMAT_MAX } SVGA3dSurfaceFormat; -/* - * SVGA3d Surface Flags -- - */ -#define SVGA3D_SURFACE_CUBEMAP (1 << 0) +#define SVGA3D_SURFACE_CUBEMAP (1 << 0) -/* - * HINT flags are not enforced by the device but are useful for - * performance. - */ -#define SVGA3D_SURFACE_HINT_STATIC (CONST64U(1) << 1) -#define SVGA3D_SURFACE_HINT_DYNAMIC (CONST64U(1) << 2) -#define SVGA3D_SURFACE_HINT_INDEXBUFFER (CONST64U(1) << 3) -#define SVGA3D_SURFACE_HINT_VERTEXBUFFER (CONST64U(1) << 4) -#define SVGA3D_SURFACE_HINT_TEXTURE (CONST64U(1) << 5) -#define SVGA3D_SURFACE_HINT_RENDERTARGET (CONST64U(1) << 6) -#define SVGA3D_SURFACE_HINT_DEPTHSTENCIL (CONST64U(1) << 7) -#define SVGA3D_SURFACE_HINT_WRITEONLY (CONST64U(1) << 8) -#define SVGA3D_SURFACE_DEAD2 (CONST64U(1) << 9) -#define SVGA3D_SURFACE_AUTOGENMIPMAPS (CONST64U(1) << 10) - -#define SVGA3D_SURFACE_DEAD1 (CONST64U(1) << 11) +#define SVGA3D_SURFACE_HINT_STATIC (CONST64U(1) << 1) +#define SVGA3D_SURFACE_HINT_DYNAMIC (CONST64U(1) << 2) +#define SVGA3D_SURFACE_HINT_INDEXBUFFER (CONST64U(1) << 3) +#define SVGA3D_SURFACE_HINT_VERTEXBUFFER (CONST64U(1) << 4) +#define SVGA3D_SURFACE_HINT_TEXTURE (CONST64U(1) << 5) +#define SVGA3D_SURFACE_HINT_RENDERTARGET (CONST64U(1) << 6) +#define SVGA3D_SURFACE_HINT_DEPTHSTENCIL (CONST64U(1) << 7) +#define SVGA3D_SURFACE_HINT_WRITEONLY (CONST64U(1) << 8) +#define SVGA3D_SURFACE_DEAD2 (CONST64U(1) << 9) +#define SVGA3D_SURFACE_AUTOGENMIPMAPS (CONST64U(1) << 10) -/* - * Is this surface using a base-level pitch for it's mob backing? - * - * This flag is not intended to be set by guest-drivers, but is instead - * set by the device when the surface is bound to a mob with a specified - * pitch. - */ -#define SVGA3D_SURFACE_MOB_PITCH (CONST64U(1) << 12) +#define SVGA3D_SURFACE_DEAD1 (CONST64U(1) << 11) -#define SVGA3D_SURFACE_INACTIVE (CONST64U(1) << 13) -#define SVGA3D_SURFACE_HINT_RT_LOCKABLE (CONST64U(1) << 14) -#define SVGA3D_SURFACE_VOLUME (CONST64U(1) << 15) +#define SVGA3D_SURFACE_MOB_PITCH (CONST64U(1) << 12) -/* - * Required to be set on a surface to bind it to a screen target. - */ -#define SVGA3D_SURFACE_SCREENTARGET (CONST64U(1) << 16) +#define SVGA3D_SURFACE_INACTIVE (CONST64U(1) << 13) +#define SVGA3D_SURFACE_HINT_RT_LOCKABLE (CONST64U(1) << 14) +#define SVGA3D_SURFACE_VOLUME (CONST64U(1) << 15) -/* - * Align images in the guest-backing mob to 16-bytes. - */ -#define SVGA3D_SURFACE_ALIGN16 (CONST64U(1) << 17) +#define SVGA3D_SURFACE_SCREENTARGET (CONST64U(1) << 16) -#define SVGA3D_SURFACE_1D (CONST64U(1) << 18) -#define SVGA3D_SURFACE_ARRAY (CONST64U(1) << 19) +#define SVGA3D_SURFACE_ALIGN16 (CONST64U(1) << 17) -/* - * Bind flags. - * These are enforced for any surface defined with DefineGBSurface_v2. - */ -#define SVGA3D_SURFACE_BIND_VERTEX_BUFFER (CONST64U(1) << 20) -#define SVGA3D_SURFACE_BIND_INDEX_BUFFER (CONST64U(1) << 21) -#define SVGA3D_SURFACE_BIND_CONSTANT_BUFFER (CONST64U(1) << 22) -#define SVGA3D_SURFACE_BIND_SHADER_RESOURCE (CONST64U(1) << 23) -#define SVGA3D_SURFACE_BIND_RENDER_TARGET (CONST64U(1) << 24) -#define SVGA3D_SURFACE_BIND_DEPTH_STENCIL (CONST64U(1) << 25) -#define SVGA3D_SURFACE_BIND_STREAM_OUTPUT (CONST64U(1) << 26) +#define SVGA3D_SURFACE_1D (CONST64U(1) << 18) +#define SVGA3D_SURFACE_ARRAY (CONST64U(1) << 19) -/* - * The STAGING flags notes that the surface will not be used directly by the - * drawing pipeline, i.e. that it will not be bound to any bind point. - * Staging surfaces may be used by copy operations to move data in and out - * of other surfaces. No bind flags may be set on surfaces with this flag. - * - * The HINT_INDIRECT_UPDATE flag suggests that the surface will receive - * updates indirectly, i.e. the surface will not be updated directly, but - * will receive copies from staging surfaces. - */ -#define SVGA3D_SURFACE_STAGING_UPLOAD (CONST64U(1) << 27) -#define SVGA3D_SURFACE_STAGING_DOWNLOAD (CONST64U(1) << 28) -#define SVGA3D_SURFACE_HINT_INDIRECT_UPDATE (CONST64U(1) << 29) +#define SVGA3D_SURFACE_BIND_VERTEX_BUFFER (CONST64U(1) << 20) +#define SVGA3D_SURFACE_BIND_INDEX_BUFFER (CONST64U(1) << 21) +#define SVGA3D_SURFACE_BIND_CONSTANT_BUFFER (CONST64U(1) << 22) +#define SVGA3D_SURFACE_BIND_SHADER_RESOURCE (CONST64U(1) << 23) +#define SVGA3D_SURFACE_BIND_RENDER_TARGET (CONST64U(1) << 24) +#define SVGA3D_SURFACE_BIND_DEPTH_STENCIL (CONST64U(1) << 25) +#define SVGA3D_SURFACE_BIND_STREAM_OUTPUT (CONST64U(1) << 26) -/* - * Setting this flag allow this surface to be used with the - * SVGA_3D_CMD_DX_TRANSFER_FROM_BUFFER command. It is only valid for - * buffer surfaces, and no bind flags are allowed to be set on surfaces - * with this flag except SVGA3D_SURFACE_TRANSFER_TO_BUFFER. - */ -#define SVGA3D_SURFACE_TRANSFER_FROM_BUFFER (CONST64U(1) << 30) +#define SVGA3D_SURFACE_STAGING_UPLOAD (CONST64U(1) << 27) +#define SVGA3D_SURFACE_STAGING_DOWNLOAD (CONST64U(1) << 28) +#define SVGA3D_SURFACE_HINT_INDIRECT_UPDATE (CONST64U(1) << 29) -/* - * Reserved for video operations. - */ -#define SVGA3D_SURFACE_RESERVED1 (CONST64U(1) << 31) +#define SVGA3D_SURFACE_TRANSFER_FROM_BUFFER (CONST64U(1) << 30) -/* - * Specifies that a surface is multisample, and therefore requires the full - * mob-backing to store all the samples. - */ -#define SVGA3D_SURFACE_MULTISAMPLE (CONST64U(1) << 32) +#define SVGA3D_SURFACE_RESERVED1 (CONST64U(1) << 31) +#define SVGA3D_SURFACE_VADECODE SVGA3D_SURFACE_RESERVED1 -/* - * Specified that the surface is allowed to be bound to a UAView. - */ -#define SVGA3D_SURFACE_BIND_UAVIEW (CONST64U(1) << 33) +#define SVGA3D_SURFACE_MULTISAMPLE (CONST64U(1) << 32) -/* - * Setting this flag allow this surface to be used with the - * SVGA_3D_CMD_DX_TRANSFER_TO_BUFFER command. It is only valid for - * buffer surfaces, and no bind flags are allowed to be set on surfaces - * with this flag except SVGA3D_SURFACE_TRANSFER_FROM_BUFFER. - */ -#define SVGA3D_SURFACE_TRANSFER_TO_BUFFER (CONST64U(1) << 34) +#define SVGA3D_SURFACE_BIND_UAVIEW (CONST64U(1) << 33) -#define SVGA3D_SURFACE_BIND_LOGICOPS (CONST64U(1) << 35) +#define SVGA3D_SURFACE_TRANSFER_TO_BUFFER (CONST64U(1) << 34) -/* - * Optional flags for use with SVGA3D_SURFACE_BIND_UAVIEW - */ -#define SVGA3D_SURFACE_BIND_RAW_VIEWS (CONST64U(1) << 36) -#define SVGA3D_SURFACE_BUFFER_STRUCTURED (CONST64U(1) << 37) +#define SVGA3D_SURFACE_BIND_LOGICOPS (CONST64U(1) << 35) -#define SVGA3D_SURFACE_DRAWINDIRECT_ARGS (CONST64U(1) << 38) -#define SVGA3D_SURFACE_RESOURCE_CLAMP (CONST64U(1) << 39) +#define SVGA3D_SURFACE_BIND_RAW_VIEWS (CONST64U(1) << 36) +#define SVGA3D_SURFACE_BUFFER_STRUCTURED (CONST64U(1) << 37) -#define SVGA3D_SURFACE_FLAG_MAX (CONST64U(1) << 40) +#define SVGA3D_SURFACE_DRAWINDIRECT_ARGS (CONST64U(1) << 38) +#define SVGA3D_SURFACE_RESOURCE_CLAMP (CONST64U(1) << 39) + +#define SVGA3D_SURFACE_STAGING_COPY (CONST64U(1) << 40) + +#define SVGA3D_SURFACE_FLAG_MAX (CONST64U(1) << 44) -/* - * Surface flags types: - * - * SVGA3dSurface1Flags: Lower 32-bits of flags. - * SVGA3dSurface2Flags: Upper 32-bits of flags. - * SVGA3dSurfaceAllFlags: Full 64-bits of flags. - */ typedef uint32 SVGA3dSurface1Flags; typedef uint32 SVGA3dSurface2Flags; typedef uint64 SVGA3dSurfaceAllFlags; -#define SVGA3D_SURFACE_FLAGS1_MASK ((uint64_t)MAX_UINT32) +#define SVGA3D_SURFACE_FLAGS1_MASK ((uint64)MAX_UINT32) #define SVGA3D_SURFACE_FLAGS2_MASK (MAX_UINT64 & ~SVGA3D_SURFACE_FLAGS1_MASK) -#define SVGA3D_SURFACE_HB_DISALLOWED_MASK \ - ( SVGA3D_SURFACE_MOB_PITCH | \ - SVGA3D_SURFACE_SCREENTARGET | \ - SVGA3D_SURFACE_ALIGN16 | \ - SVGA3D_SURFACE_BIND_CONSTANT_BUFFER | \ - SVGA3D_SURFACE_BIND_STREAM_OUTPUT | \ - SVGA3D_SURFACE_STAGING_UPLOAD | \ - SVGA3D_SURFACE_STAGING_DOWNLOAD | \ - SVGA3D_SURFACE_HINT_INDIRECT_UPDATE | \ - SVGA3D_SURFACE_TRANSFER_FROM_BUFFER | \ - SVGA3D_SURFACE_RESERVED1 | \ - SVGA3D_SURFACE_MULTISAMPLE | \ - SVGA3D_SURFACE_BIND_UAVIEW | \ - SVGA3D_SURFACE_TRANSFER_TO_BUFFER | \ - SVGA3D_SURFACE_BIND_LOGICOPS | \ - SVGA3D_SURFACE_BIND_RAW_VIEWS | \ - SVGA3D_SURFACE_BUFFER_STRUCTURED | \ - SVGA3D_SURFACE_DRAWINDIRECT_ARGS | \ - SVGA3D_SURFACE_RESOURCE_CLAMP \ - ) - -#define SVGA3D_SURFACE_HB_PRESENT_DISALLOWED_MASK \ - ( SVGA3D_SURFACE_1D | \ - SVGA3D_SURFACE_RESERVED1 | \ - SVGA3D_SURFACE_MULTISAMPLE \ - ) - -#define SVGA3D_SURFACE_2D_DISALLOWED_MASK \ - ( SVGA3D_SURFACE_CUBEMAP | \ - SVGA3D_SURFACE_AUTOGENMIPMAPS | \ - SVGA3D_SURFACE_VOLUME | \ - SVGA3D_SURFACE_1D | \ - SVGA3D_SURFACE_BIND_VERTEX_BUFFER | \ - SVGA3D_SURFACE_BIND_INDEX_BUFFER | \ - SVGA3D_SURFACE_BIND_CONSTANT_BUFFER | \ - SVGA3D_SURFACE_BIND_DEPTH_STENCIL | \ - SVGA3D_SURFACE_BIND_STREAM_OUTPUT | \ - SVGA3D_SURFACE_TRANSFER_FROM_BUFFER | \ - SVGA3D_SURFACE_RESERVED1 | \ - SVGA3D_SURFACE_MULTISAMPLE | \ - SVGA3D_SURFACE_BIND_UAVIEW | \ - SVGA3D_SURFACE_TRANSFER_TO_BUFFER | \ - SVGA3D_SURFACE_BIND_RAW_VIEWS | \ - SVGA3D_SURFACE_BUFFER_STRUCTURED | \ - SVGA3D_SURFACE_DRAWINDIRECT_ARGS | \ - SVGA3D_SURFACE_RESOURCE_CLAMP \ - ) - -#define SVGA3D_SURFACE_BASICOPS_DISALLOWED_MASK \ - ( SVGA3D_SURFACE_CUBEMAP | \ - SVGA3D_SURFACE_AUTOGENMIPMAPS | \ - SVGA3D_SURFACE_VOLUME | \ - SVGA3D_SURFACE_1D | \ - SVGA3D_SURFACE_RESERVED1 | \ - SVGA3D_SURFACE_MULTISAMPLE \ - ) - -#define SVGA3D_SURFACE_SCREENTARGET_DISALLOWED_MASK \ - ( SVGA3D_SURFACE_CUBEMAP | \ - SVGA3D_SURFACE_AUTOGENMIPMAPS | \ - SVGA3D_SURFACE_VOLUME | \ - SVGA3D_SURFACE_1D | \ - SVGA3D_SURFACE_BIND_VERTEX_BUFFER | \ - SVGA3D_SURFACE_BIND_INDEX_BUFFER | \ - SVGA3D_SURFACE_BIND_CONSTANT_BUFFER | \ - SVGA3D_SURFACE_BIND_DEPTH_STENCIL | \ - SVGA3D_SURFACE_BIND_STREAM_OUTPUT | \ - SVGA3D_SURFACE_INACTIVE | \ - SVGA3D_SURFACE_STAGING_UPLOAD | \ - SVGA3D_SURFACE_STAGING_DOWNLOAD | \ - SVGA3D_SURFACE_HINT_INDIRECT_UPDATE | \ - SVGA3D_SURFACE_TRANSFER_FROM_BUFFER | \ - SVGA3D_SURFACE_RESERVED1 | \ - SVGA3D_SURFACE_MULTISAMPLE | \ - SVGA3D_SURFACE_BIND_UAVIEW | \ - SVGA3D_SURFACE_TRANSFER_TO_BUFFER | \ - SVGA3D_SURFACE_BIND_RAW_VIEWS | \ - SVGA3D_SURFACE_BUFFER_STRUCTURED | \ - SVGA3D_SURFACE_DRAWINDIRECT_ARGS | \ - SVGA3D_SURFACE_RESOURCE_CLAMP \ - ) - -#define SVGA3D_SURFACE_BUFFER_DISALLOWED_MASK \ - ( SVGA3D_SURFACE_CUBEMAP | \ - SVGA3D_SURFACE_AUTOGENMIPMAPS | \ - SVGA3D_SURFACE_VOLUME | \ - SVGA3D_SURFACE_1D | \ - SVGA3D_SURFACE_DEAD2 | \ - SVGA3D_SURFACE_ARRAY | \ - SVGA3D_SURFACE_MULTISAMPLE | \ - SVGA3D_SURFACE_MOB_PITCH | \ - SVGA3D_SURFACE_RESOURCE_CLAMP \ - ) - -#define SVGA3D_SURFACE_MULTISAMPLE_DISALLOWED_MASK \ - ( SVGA3D_SURFACE_CUBEMAP | \ - SVGA3D_SURFACE_AUTOGENMIPMAPS | \ - SVGA3D_SURFACE_VOLUME | \ - SVGA3D_SURFACE_1D | \ - SVGA3D_SURFACE_SCREENTARGET | \ - SVGA3D_SURFACE_MOB_PITCH | \ - SVGA3D_SURFACE_TRANSFER_FROM_BUFFER | \ - SVGA3D_SURFACE_RESERVED1 | \ - SVGA3D_SURFACE_BIND_UAVIEW | \ - SVGA3D_SURFACE_TRANSFER_TO_BUFFER | \ - SVGA3D_SURFACE_BIND_LOGICOPS | \ - SVGA3D_SURFACE_BIND_RAW_VIEWS | \ - SVGA3D_SURFACE_BUFFER_STRUCTURED | \ - SVGA3D_SURFACE_DRAWINDIRECT_ARGS \ - ) - -#define SVGA3D_SURFACE_DX_ONLY_MASK \ - ( SVGA3D_SURFACE_BIND_STREAM_OUTPUT | \ - SVGA3D_SURFACE_STAGING_UPLOAD | \ - SVGA3D_SURFACE_STAGING_DOWNLOAD | \ - SVGA3D_SURFACE_TRANSFER_FROM_BUFFER | \ - SVGA3D_SURFACE_TRANSFER_TO_BUFFER \ - ) - -#define SVGA3D_SURFACE_STAGING_MASK \ - ( SVGA3D_SURFACE_STAGING_UPLOAD | \ - SVGA3D_SURFACE_STAGING_DOWNLOAD \ - ) - -#define SVGA3D_SURFACE_BIND_MASK \ - ( SVGA3D_SURFACE_BIND_VERTEX_BUFFER | \ - SVGA3D_SURFACE_BIND_INDEX_BUFFER | \ - SVGA3D_SURFACE_BIND_CONSTANT_BUFFER | \ - SVGA3D_SURFACE_BIND_SHADER_RESOURCE | \ - SVGA3D_SURFACE_BIND_RENDER_TARGET | \ - SVGA3D_SURFACE_BIND_DEPTH_STENCIL | \ - SVGA3D_SURFACE_BIND_STREAM_OUTPUT | \ - SVGA3D_SURFACE_BIND_UAVIEW | \ - SVGA3D_SURFACE_BIND_LOGICOPS | \ - SVGA3D_SURFACE_BIND_RAW_VIEWS \ - ) - -#define SVGA3D_SURFACE_VADECODE_DISALLOWED_MASK \ - ( SVGA3D_SURFACE_CUBEMAP | \ - SVGA3D_SURFACE_HINT_STATIC | \ - SVGA3D_SURFACE_HINT_DYNAMIC | \ - SVGA3D_SURFACE_HINT_INDEXBUFFER | \ - SVGA3D_SURFACE_HINT_VERTEXBUFFER | \ - SVGA3D_SURFACE_HINT_TEXTURE | \ - SVGA3D_SURFACE_HINT_RENDERTARGET | \ - SVGA3D_SURFACE_HINT_DEPTHSTENCIL | \ - SVGA3D_SURFACE_HINT_WRITEONLY | \ - SVGA3D_SURFACE_DEAD2 | \ - SVGA3D_SURFACE_AUTOGENMIPMAPS | \ - SVGA3D_SURFACE_HINT_RT_LOCKABLE | \ - SVGA3D_SURFACE_VOLUME | \ - SVGA3D_SURFACE_SCREENTARGET | \ - SVGA3D_SURFACE_1D | \ - SVGA3D_SURFACE_BIND_VERTEX_BUFFER | \ - SVGA3D_SURFACE_BIND_INDEX_BUFFER | \ - SVGA3D_SURFACE_BIND_CONSTANT_BUFFER | \ - SVGA3D_SURFACE_BIND_RENDER_TARGET | \ - SVGA3D_SURFACE_BIND_SHADER_RESOURCE | \ - SVGA3D_SURFACE_BIND_DEPTH_STENCIL | \ - SVGA3D_SURFACE_BIND_STREAM_OUTPUT | \ - SVGA3D_SURFACE_INACTIVE | \ - SVGA3D_SURFACE_STAGING_UPLOAD | \ - SVGA3D_SURFACE_STAGING_DOWNLOAD | \ - SVGA3D_SURFACE_HINT_INDIRECT_UPDATE | \ - SVGA3D_SURFACE_TRANSFER_FROM_BUFFER | \ - SVGA3D_SURFACE_MULTISAMPLE | \ - SVGA3D_SURFACE_BIND_UAVIEW | \ - SVGA3D_SURFACE_TRANSFER_TO_BUFFER | \ - SVGA3D_SURFACE_BIND_LOGICOPS | \ - SVGA3D_SURFACE_BIND_RAW_VIEWS | \ - SVGA3D_SURFACE_BUFFER_STRUCTURED | \ - SVGA3D_SURFACE_DRAWINDIRECT_ARGS | \ - SVGA3D_SURFACE_RESOURCE_CLAMP \ - ) - -#define SVGA3D_SURFACE_VAPROCESSFRAME_OUTPUT_DISALLOWED_MASK \ - ( SVGA3D_SURFACE_HINT_INDEXBUFFER | \ - SVGA3D_SURFACE_HINT_VERTEXBUFFER | \ - SVGA3D_SURFACE_HINT_DEPTHSTENCIL | \ - SVGA3D_SURFACE_DEAD2 | \ - SVGA3D_SURFACE_VOLUME | \ - SVGA3D_SURFACE_1D | \ - SVGA3D_SURFACE_BIND_VERTEX_BUFFER | \ - SVGA3D_SURFACE_BIND_INDEX_BUFFER | \ - SVGA3D_SURFACE_BIND_CONSTANT_BUFFER | \ - SVGA3D_SURFACE_BIND_DEPTH_STENCIL | \ - SVGA3D_SURFACE_BIND_STREAM_OUTPUT | \ - SVGA3D_SURFACE_INACTIVE | \ - SVGA3D_SURFACE_STAGING_UPLOAD | \ - SVGA3D_SURFACE_STAGING_DOWNLOAD | \ - SVGA3D_SURFACE_TRANSFER_FROM_BUFFER | \ - SVGA3D_SURFACE_VADECODE | \ - SVGA3D_SURFACE_MULTISAMPLE | \ - SVGA3D_SURFACE_BIND_UAVIEW | \ - SVGA3D_SURFACE_TRANSFER_TO_BUFFER | \ - SVGA3D_SURFACE_BIND_LOGICOPS | \ - SVGA3D_SURFACE_BIND_RAW_VIEWS | \ - SVGA3D_SURFACE_BUFFER_STRUCTURED | \ - SVGA3D_SURFACE_DRAWINDIRECT_ARGS | \ - SVGA3D_SURFACE_RESOURCE_CLAMP \ - ) - -#define SVGA3D_SURFACE_VAPROCESSFRAME_INPUT_DISALLOWED_MASK \ - ( SVGA3D_SURFACE_CUBEMAP | \ - SVGA3D_SURFACE_HINT_INDEXBUFFER | \ - SVGA3D_SURFACE_HINT_VERTEXBUFFER | \ - SVGA3D_SURFACE_HINT_DEPTHSTENCIL | \ - SVGA3D_SURFACE_DEAD2 | \ - SVGA3D_SURFACE_VOLUME | \ - SVGA3D_SURFACE_SCREENTARGET | \ - SVGA3D_SURFACE_1D | \ - SVGA3D_SURFACE_BIND_VERTEX_BUFFER | \ - SVGA3D_SURFACE_BIND_INDEX_BUFFER | \ - SVGA3D_SURFACE_BIND_CONSTANT_BUFFER | \ - SVGA3D_SURFACE_BIND_DEPTH_STENCIL | \ - SVGA3D_SURFACE_BIND_STREAM_OUTPUT | \ - SVGA3D_SURFACE_STAGING_UPLOAD | \ - SVGA3D_SURFACE_STAGING_DOWNLOAD | \ - SVGA3D_SURFACE_TRANSFER_FROM_BUFFER | \ - SVGA3D_SURFACE_MULTISAMPLE | \ - SVGA3D_SURFACE_BIND_UAVIEW | \ - SVGA3D_SURFACE_TRANSFER_TO_BUFFER | \ - SVGA3D_SURFACE_BIND_LOGICOPS | \ - SVGA3D_SURFACE_BIND_RAW_VIEWS | \ - SVGA3D_SURFACE_BUFFER_STRUCTURED | \ - SVGA3D_SURFACE_DRAWINDIRECT_ARGS | \ - SVGA3D_SURFACE_RESOURCE_CLAMP \ - ) - -#define SVGA3D_SURFACE_LOGICOPS_DISALLOWED_MASK \ - ( SVGA3D_SURFACE_CUBEMAP | \ - SVGA3D_SURFACE_DEAD2 | \ - SVGA3D_SURFACE_AUTOGENMIPMAPS | \ - SVGA3D_SURFACE_VOLUME | \ - SVGA3D_SURFACE_1D | \ - SVGA3D_SURFACE_BIND_VERTEX_BUFFER | \ - SVGA3D_SURFACE_BIND_INDEX_BUFFER | \ - SVGA3D_SURFACE_BIND_CONSTANT_BUFFER | \ - SVGA3D_SURFACE_BIND_DEPTH_STENCIL | \ - SVGA3D_SURFACE_BIND_STREAM_OUTPUT | \ - SVGA3D_SURFACE_TRANSFER_FROM_BUFFER | \ - SVGA3D_SURFACE_VADECODE | \ - SVGA3D_SURFACE_MULTISAMPLE | \ - SVGA3D_SURFACE_BIND_UAVIEW | \ - SVGA3D_SURFACE_TRANSFER_TO_BUFFER | \ - SVGA3D_SURFACE_BIND_RAW_VIEWS | \ - SVGA3D_SURFACE_BUFFER_STRUCTURED | \ - SVGA3D_SURFACE_DRAWINDIRECT_ARGS | \ - SVGA3D_SURFACE_RESOURCE_CLAMP \ - ) +#define SVGA3D_SURFACE_HB_DISALLOWED_MASK \ + (SVGA3D_SURFACE_MOB_PITCH | SVGA3D_SURFACE_SCREENTARGET | \ + SVGA3D_SURFACE_ALIGN16 | SVGA3D_SURFACE_BIND_CONSTANT_BUFFER | \ + SVGA3D_SURFACE_BIND_STREAM_OUTPUT | SVGA3D_SURFACE_STAGING_UPLOAD | \ + SVGA3D_SURFACE_STAGING_DOWNLOAD | \ + SVGA3D_SURFACE_HINT_INDIRECT_UPDATE | \ + SVGA3D_SURFACE_TRANSFER_FROM_BUFFER | SVGA3D_SURFACE_MULTISAMPLE | \ + SVGA3D_SURFACE_BIND_UAVIEW | SVGA3D_SURFACE_TRANSFER_TO_BUFFER | \ + SVGA3D_SURFACE_BIND_LOGICOPS | SVGA3D_SURFACE_BIND_RAW_VIEWS | \ + SVGA3D_SURFACE_BUFFER_STRUCTURED | SVGA3D_SURFACE_DRAWINDIRECT_ARGS | \ + SVGA3D_SURFACE_RESOURCE_CLAMP | SVGA3D_SURFACE_STAGING_COPY | \ + SVGA3D_SURFACE_RESTRICT_UPDATE | SVGA3D_SURFACE_BIND_TENSOR | \ + SVGA3D_SURFACE_LO_STAGING) + +#define SVGA3D_SURFACE_HB_PRESENT_DISALLOWED_MASK \ + (SVGA3D_SURFACE_1D | SVGA3D_SURFACE_MULTISAMPLE | \ + SVGA3D_SURFACE_STAGING_COPY) + +#define SVGA3D_SURFACE_2D_DISALLOWED_MASK \ + (SVGA3D_SURFACE_CUBEMAP | SVGA3D_SURFACE_AUTOGENMIPMAPS | \ + SVGA3D_SURFACE_VOLUME | SVGA3D_SURFACE_1D | \ + SVGA3D_SURFACE_BIND_VERTEX_BUFFER | \ + SVGA3D_SURFACE_BIND_INDEX_BUFFER | \ + SVGA3D_SURFACE_BIND_CONSTANT_BUFFER | \ + SVGA3D_SURFACE_BIND_DEPTH_STENCIL | \ + SVGA3D_SURFACE_BIND_STREAM_OUTPUT | \ + SVGA3D_SURFACE_TRANSFER_FROM_BUFFER | SVGA3D_SURFACE_MULTISAMPLE | \ + SVGA3D_SURFACE_BIND_UAVIEW | SVGA3D_SURFACE_TRANSFER_TO_BUFFER | \ + SVGA3D_SURFACE_BIND_RAW_VIEWS | SVGA3D_SURFACE_BUFFER_STRUCTURED | \ + SVGA3D_SURFACE_DRAWINDIRECT_ARGS | SVGA3D_SURFACE_RESOURCE_CLAMP | \ + SVGA3D_SURFACE_BIND_TENSOR) + +#define SVGA3D_SURFACE_BASICOPS_DISALLOWED_MASK \ + (SVGA3D_SURFACE_CUBEMAP | SVGA3D_SURFACE_AUTOGENMIPMAPS | \ + SVGA3D_SURFACE_VOLUME | SVGA3D_SURFACE_1D | \ + SVGA3D_SURFACE_MULTISAMPLE) + +#define SVGA3D_SURFACE_SCREENTARGET_DISALLOWED_MASK \ + (SVGA3D_SURFACE_CUBEMAP | SVGA3D_SURFACE_AUTOGENMIPMAPS | \ + SVGA3D_SURFACE_VOLUME | SVGA3D_SURFACE_1D | \ + SVGA3D_SURFACE_BIND_VERTEX_BUFFER | \ + SVGA3D_SURFACE_BIND_INDEX_BUFFER | \ + SVGA3D_SURFACE_BIND_CONSTANT_BUFFER | \ + SVGA3D_SURFACE_BIND_DEPTH_STENCIL | \ + SVGA3D_SURFACE_BIND_STREAM_OUTPUT | SVGA3D_SURFACE_INACTIVE | \ + SVGA3D_SURFACE_STAGING_UPLOAD | SVGA3D_SURFACE_STAGING_DOWNLOAD | \ + SVGA3D_SURFACE_HINT_INDIRECT_UPDATE | \ + SVGA3D_SURFACE_TRANSFER_FROM_BUFFER | SVGA3D_SURFACE_MULTISAMPLE | \ + SVGA3D_SURFACE_TRANSFER_TO_BUFFER | SVGA3D_SURFACE_BIND_RAW_VIEWS | \ + SVGA3D_SURFACE_BUFFER_STRUCTURED | SVGA3D_SURFACE_DRAWINDIRECT_ARGS | \ + SVGA3D_SURFACE_RESOURCE_CLAMP | SVGA3D_SURFACE_STAGING_COPY | \ + SVGA3D_SURFACE_BIND_TENSOR | SVGA3D_SURFACE_LO_STAGING) + +#define SVGA3D_SURFACE_BUFFER_DISALLOWED_MASK \ + (SVGA3D_SURFACE_CUBEMAP | SVGA3D_SURFACE_AUTOGENMIPMAPS | \ + SVGA3D_SURFACE_VOLUME | SVGA3D_SURFACE_1D | SVGA3D_SURFACE_DEAD2 | \ + SVGA3D_SURFACE_ARRAY | SVGA3D_SURFACE_MULTISAMPLE | \ + SVGA3D_SURFACE_MOB_PITCH | SVGA3D_SURFACE_RESOURCE_CLAMP) + +#define SVGA3D_SURFACE_MULTISAMPLE_DISALLOWED_MASK \ + (SVGA3D_SURFACE_CUBEMAP | SVGA3D_SURFACE_AUTOGENMIPMAPS | \ + SVGA3D_SURFACE_VOLUME | SVGA3D_SURFACE_1D | \ + SVGA3D_SURFACE_SCREENTARGET | SVGA3D_SURFACE_MOB_PITCH | \ + SVGA3D_SURFACE_TRANSFER_FROM_BUFFER | SVGA3D_SURFACE_BIND_UAVIEW | \ + SVGA3D_SURFACE_TRANSFER_TO_BUFFER | SVGA3D_SURFACE_BIND_LOGICOPS | \ + SVGA3D_SURFACE_BIND_RAW_VIEWS | SVGA3D_SURFACE_BUFFER_STRUCTURED | \ + SVGA3D_SURFACE_DRAWINDIRECT_ARGS | SVGA3D_SURFACE_STAGING_COPY) + +#define SVGA3D_SURFACE_DX_ONLY_MASK \ + (SVGA3D_SURFACE_BIND_STREAM_OUTPUT | SVGA3D_SURFACE_STAGING_UPLOAD | \ + SVGA3D_SURFACE_STAGING_DOWNLOAD | \ + SVGA3D_SURFACE_TRANSFER_FROM_BUFFER | \ + SVGA3D_SURFACE_TRANSFER_TO_BUFFER) + +#define SVGA3D_SURFACE_ANY_STAGING_MASK \ + (SVGA3D_SURFACE_STAGING_UPLOAD | SVGA3D_SURFACE_STAGING_DOWNLOAD | \ + SVGA3D_SURFACE_STAGING_COPY | SVGA3D_SURFACE_LO_STAGING) + +#define SVGA3D_SURFACE_ANY_NONHINT_STAGING_MASK \ + (SVGA3D_SURFACE_ANY_STAGING_MASK & ~(SVGA3D_SURFACE_LO_STAGING)) + +#define SVGA3D_SURFACE_BIND_MASK \ + (SVGA3D_SURFACE_BIND_VERTEX_BUFFER | \ + SVGA3D_SURFACE_BIND_INDEX_BUFFER | \ + SVGA3D_SURFACE_BIND_CONSTANT_BUFFER | \ + SVGA3D_SURFACE_BIND_SHADER_RESOURCE | \ + SVGA3D_SURFACE_BIND_RENDER_TARGET | \ + SVGA3D_SURFACE_BIND_DEPTH_STENCIL | \ + SVGA3D_SURFACE_BIND_STREAM_OUTPUT | SVGA3D_SURFACE_BIND_UAVIEW | \ + SVGA3D_SURFACE_BIND_LOGICOPS | SVGA3D_SURFACE_BIND_RAW_VIEWS | \ + SVGA3D_SURFACE_BIND_TENSOR) + +#define SVGA3D_SURFACE_STAGING_DISALLOWED_MASK \ + (SVGA3D_SURFACE_BIND_MASK | SVGA3D_SURFACE_AUTOGENMIPMAPS | \ + SVGA3D_SURFACE_SCREENTARGET | SVGA3D_SURFACE_HINT_RENDERTARGET | \ + SVGA3D_SURFACE_HINT_INDIRECT_UPDATE | SVGA3D_SURFACE_MULTISAMPLE | \ + SVGA3D_SURFACE_DRAWINDIRECT_ARGS | SVGA3D_SURFACE_RESOURCE_CLAMP | \ + SVGA3D_SURFACE_BIND_TENSOR) + +#define SVGA3D_SURFACE_STAGING_COPY_DISALLOWED_MASK \ + (SVGA3D_SURFACE_STAGING_DISALLOWED_MASK | \ + SVGA3D_SURFACE_TRANSFER_TO_BUFFER | \ + SVGA3D_SURFACE_TRANSFER_FROM_BUFFER) + +#define SVGA3D_SURFACE_LOGICOPS_DISALLOWED_MASK \ + (SVGA3D_SURFACE_CUBEMAP | SVGA3D_SURFACE_DEAD2 | \ + SVGA3D_SURFACE_AUTOGENMIPMAPS | SVGA3D_SURFACE_VOLUME | \ + SVGA3D_SURFACE_1D | SVGA3D_SURFACE_BIND_VERTEX_BUFFER | \ + SVGA3D_SURFACE_BIND_INDEX_BUFFER | \ + SVGA3D_SURFACE_BIND_CONSTANT_BUFFER | \ + SVGA3D_SURFACE_BIND_DEPTH_STENCIL | \ + SVGA3D_SURFACE_BIND_STREAM_OUTPUT | \ + SVGA3D_SURFACE_TRANSFER_FROM_BUFFER | SVGA3D_SURFACE_MULTISAMPLE | \ + SVGA3D_SURFACE_BIND_UAVIEW | SVGA3D_SURFACE_TRANSFER_TO_BUFFER | \ + SVGA3D_SURFACE_BIND_RAW_VIEWS | SVGA3D_SURFACE_BUFFER_STRUCTURED | \ + SVGA3D_SURFACE_DRAWINDIRECT_ARGS | SVGA3D_SURFACE_RESOURCE_CLAMP | \ + SVGA3D_SURFACE_STAGING_COPY) + +#define SVGA3D_SURFACE_SM5_MASK \ + (SVGA3D_SURFACE_DRAWINDIRECT_ARGS | SVGA3D_SURFACE_BUFFER_STRUCTURED | \ + SVGA3D_SURFACE_BIND_RAW_VIEWS | SVGA3D_SURFACE_BIND_UAVIEW | \ + SVGA3D_SURFACE_RESOURCE_CLAMP) #define SVGA3D_BUFFER_STRUCTURED_STRIDE_MAX 2048 - -/* - * These are really the D3DFORMAT_OP defines from the wdk. We need - * them so that we can query the host for what the supported surface - * operations are (when we're using the D3D backend, in particular), - * and so we can send those operations to the guest. - */ typedef enum { - SVGA3DFORMAT_OP_TEXTURE = 0x00000001, - SVGA3DFORMAT_OP_VOLUMETEXTURE = 0x00000002, - SVGA3DFORMAT_OP_CUBETEXTURE = 0x00000004, - SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET = 0x00000008, - SVGA3DFORMAT_OP_SAME_FORMAT_RENDERTARGET = 0x00000010, - SVGA3DFORMAT_OP_ZSTENCIL = 0x00000040, - SVGA3DFORMAT_OP_ZSTENCIL_WITH_ARBITRARY_COLOR_DEPTH = 0x00000080, + SVGA3DFORMAT_OP_TEXTURE = 0x00000001, + SVGA3DFORMAT_OP_VOLUMETEXTURE = 0x00000002, + SVGA3DFORMAT_OP_CUBETEXTURE = 0x00000004, + SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET = 0x00000008, + SVGA3DFORMAT_OP_SAME_FORMAT_RENDERTARGET = 0x00000010, + SVGA3DFORMAT_OP_ZSTENCIL = 0x00000040, + SVGA3DFORMAT_OP_ZSTENCIL_WITH_ARBITRARY_COLOR_DEPTH = 0x00000080, -/* - * This format can be used as a render target if the current display mode - * is the same depth if the alpha channel is ignored. e.g. if the device - * can render to A8R8G8B8 when the display mode is X8R8G8B8, then the - * format op list entry for A8R8G8B8 should have this cap. - */ - SVGA3DFORMAT_OP_SAME_FORMAT_UP_TO_ALPHA_RENDERTARGET = 0x00000100, + SVGA3DFORMAT_OP_SAME_FORMAT_UP_TO_ALPHA_RENDERTARGET = 0x00000100, -/* - * This format contains DirectDraw support (including Flip). This flag - * should not to be set on alpha formats. - */ - SVGA3DFORMAT_OP_DISPLAYMODE = 0x00000400, + SVGA3DFORMAT_OP_DISPLAYMODE = 0x00000400, -/* - * The rasterizer can support some level of Direct3D support in this format - * and implies that the driver can create a Context in this mode (for some - * render target format). When this flag is set, the SVGA3DFORMAT_OP_DISPLAYMODE - * flag must also be set. - */ - SVGA3DFORMAT_OP_3DACCELERATION = 0x00000800, + SVGA3DFORMAT_OP_3DACCELERATION = 0x00000800, -/* - * This is set for a private format when the driver has put the bpp in - * the structure. - */ - SVGA3DFORMAT_OP_PIXELSIZE = 0x00001000, + SVGA3DFORMAT_OP_PIXELSIZE = 0x00001000, -/* - * Indicates that this format can be converted to any RGB format for which - * SVGA3DFORMAT_OP_MEMBEROFGROUP_ARGB is specified. - */ - SVGA3DFORMAT_OP_CONVERT_TO_ARGB = 0x00002000, + SVGA3DFORMAT_OP_CONVERT_TO_ARGB = 0x00002000, -/* - * Indicates that this format can be used to create offscreen plain surfaces. - */ - SVGA3DFORMAT_OP_OFFSCREENPLAIN = 0x00004000, + SVGA3DFORMAT_OP_OFFSCREENPLAIN = 0x00004000, -/* - * Indicated that this format can be read as an SRGB texture (meaning that the - * sampler will linearize the looked up data). - */ - SVGA3DFORMAT_OP_SRGBREAD = 0x00008000, + SVGA3DFORMAT_OP_SRGBREAD = 0x00008000, -/* - * Indicates that this format can be used in the bumpmap instructions. - */ - SVGA3DFORMAT_OP_BUMPMAP = 0x00010000, + SVGA3DFORMAT_OP_BUMPMAP = 0x00010000, -/* - * Indicates that this format can be sampled by the displacement map sampler. - */ - SVGA3DFORMAT_OP_DMAP = 0x00020000, + SVGA3DFORMAT_OP_DMAP = 0x00020000, -/* - * Indicates that this format cannot be used with texture filtering. - */ - SVGA3DFORMAT_OP_NOFILTER = 0x00040000, + SVGA3DFORMAT_OP_NOFILTER = 0x00040000, -/* - * Indicates that format conversions are supported to this RGB format if - * SVGA3DFORMAT_OP_CONVERT_TO_ARGB is specified in the source format. - */ - SVGA3DFORMAT_OP_MEMBEROFGROUP_ARGB = 0x00080000, + SVGA3DFORMAT_OP_MEMBEROFGROUP_ARGB = 0x00080000, -/* - * Indicated that this format can be written as an SRGB target - * (meaning that the pixel pipe will DE-linearize data on output to format) - */ - SVGA3DFORMAT_OP_SRGBWRITE = 0x00100000, + SVGA3DFORMAT_OP_SRGBWRITE = 0x00100000, -/* - * Indicates that this format cannot be used with alpha blending. - */ - SVGA3DFORMAT_OP_NOALPHABLEND = 0x00200000, + SVGA3DFORMAT_OP_NOALPHABLEND = 0x00200000, -/* - * Indicates that the device can auto-generated sublevels for resources - * of this format. - */ - SVGA3DFORMAT_OP_AUTOGENMIPMAP = 0x00400000, + SVGA3DFORMAT_OP_AUTOGENMIPMAP = 0x00400000, -/* - * Indicates that this format can be used by vertex texture sampler. - */ - SVGA3DFORMAT_OP_VERTEXTEXTURE = 0x00800000, + SVGA3DFORMAT_OP_VERTEXTEXTURE = 0x00800000, -/* - * Indicates that this format supports neither texture coordinate - * wrap modes, nor mipmapping. - */ - SVGA3DFORMAT_OP_NOTEXCOORDWRAPNORMIP = 0x01000000 + SVGA3DFORMAT_OP_NOTEXCOORDWRAPNORMIP = 0x01000000 } SVGA3dFormatOp; -#define SVGA3D_FORMAT_POSITIVE \ - (SVGA3DFORMAT_OP_TEXTURE | \ - SVGA3DFORMAT_OP_VOLUMETEXTURE | \ - SVGA3DFORMAT_OP_CUBETEXTURE | \ - SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET | \ - SVGA3DFORMAT_OP_SAME_FORMAT_RENDERTARGET | \ - SVGA3DFORMAT_OP_ZSTENCIL | \ - SVGA3DFORMAT_OP_ZSTENCIL_WITH_ARBITRARY_COLOR_DEPTH | \ - SVGA3DFORMAT_OP_SAME_FORMAT_UP_TO_ALPHA_RENDERTARGET | \ - SVGA3DFORMAT_OP_DISPLAYMODE | \ - SVGA3DFORMAT_OP_3DACCELERATION | \ - SVGA3DFORMAT_OP_PIXELSIZE | \ - SVGA3DFORMAT_OP_CONVERT_TO_ARGB | \ - SVGA3DFORMAT_OP_OFFSCREENPLAIN | \ - SVGA3DFORMAT_OP_SRGBREAD | \ - SVGA3DFORMAT_OP_BUMPMAP | \ - SVGA3DFORMAT_OP_DMAP | \ - SVGA3DFORMAT_OP_MEMBEROFGROUP_ARGB | \ - SVGA3DFORMAT_OP_SRGBWRITE | \ - SVGA3DFORMAT_OP_AUTOGENMIPMAP | \ - SVGA3DFORMAT_OP_VERTEXTEXTURE) - -#define SVGA3D_FORMAT_NEGATIVE \ - (SVGA3DFORMAT_OP_NOFILTER | \ - SVGA3DFORMAT_OP_NOALPHABLEND | \ - SVGA3DFORMAT_OP_NOTEXCOORDWRAPNORMIP) +#define SVGA3D_FORMAT_POSITIVE \ + (SVGA3DFORMAT_OP_TEXTURE | SVGA3DFORMAT_OP_VOLUMETEXTURE | \ + SVGA3DFORMAT_OP_CUBETEXTURE | \ + SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET | \ + SVGA3DFORMAT_OP_SAME_FORMAT_RENDERTARGET | SVGA3DFORMAT_OP_ZSTENCIL | \ + SVGA3DFORMAT_OP_ZSTENCIL_WITH_ARBITRARY_COLOR_DEPTH | \ + SVGA3DFORMAT_OP_SAME_FORMAT_UP_TO_ALPHA_RENDERTARGET | \ + SVGA3DFORMAT_OP_DISPLAYMODE | SVGA3DFORMAT_OP_3DACCELERATION | \ + SVGA3DFORMAT_OP_PIXELSIZE | SVGA3DFORMAT_OP_CONVERT_TO_ARGB | \ + SVGA3DFORMAT_OP_OFFSCREENPLAIN | SVGA3DFORMAT_OP_SRGBREAD | \ + SVGA3DFORMAT_OP_BUMPMAP | SVGA3DFORMAT_OP_DMAP | \ + SVGA3DFORMAT_OP_MEMBEROFGROUP_ARGB | SVGA3DFORMAT_OP_SRGBWRITE | \ + SVGA3DFORMAT_OP_AUTOGENMIPMAP | SVGA3DFORMAT_OP_VERTEXTEXTURE) + +#define SVGA3D_FORMAT_NEGATIVE \ + (SVGA3DFORMAT_OP_NOFILTER | SVGA3DFORMAT_OP_NOALPHABLEND | \ + SVGA3DFORMAT_OP_NOTEXCOORDWRAPNORMIP) -/* - * This structure is a conversion of SVGA3DFORMAT_OP_* - * Entries must be located at the same position. - */ typedef union { - uint32 value; - struct { - uint32 texture : 1; - uint32 volumeTexture : 1; - uint32 cubeTexture : 1; - uint32 offscreenRenderTarget : 1; - uint32 sameFormatRenderTarget : 1; - uint32 unknown1 : 1; - uint32 zStencil : 1; - uint32 zStencilArbitraryDepth : 1; - uint32 sameFormatUpToAlpha : 1; - uint32 unknown2 : 1; - uint32 displayMode : 1; - uint32 acceleration3d : 1; - uint32 pixelSize : 1; - uint32 convertToARGB : 1; - uint32 offscreenPlain : 1; - uint32 sRGBRead : 1; - uint32 bumpMap : 1; - uint32 dmap : 1; - uint32 noFilter : 1; - uint32 memberOfGroupARGB : 1; - uint32 sRGBWrite : 1; - uint32 noAlphaBlend : 1; - uint32 autoGenMipMap : 1; - uint32 vertexTexture : 1; - uint32 noTexCoordWrapNorMip : 1; - }; + uint32 value; + struct { + uint32 texture : 1; + uint32 volumeTexture : 1; + uint32 cubeTexture : 1; + uint32 offscreenRenderTarget : 1; + uint32 sameFormatRenderTarget : 1; + uint32 unknown1 : 1; + uint32 zStencil : 1; + uint32 zStencilArbitraryDepth : 1; + uint32 sameFormatUpToAlpha : 1; + uint32 unknown2 : 1; + uint32 displayMode : 1; + uint32 acceleration3d : 1; + uint32 pixelSize : 1; + uint32 convertToARGB : 1; + uint32 offscreenPlain : 1; + uint32 sRGBRead : 1; + uint32 bumpMap : 1; + uint32 dmap : 1; + uint32 noFilter : 1; + uint32 memberOfGroupARGB : 1; + uint32 sRGBWrite : 1; + uint32 noAlphaBlend : 1; + uint32 autoGenMipMap : 1; + uint32 vertexTexture : 1; + uint32 noTexCoordWrapNorMip : 1; + }; } SVGA3dSurfaceFormatCaps; -/* - * SVGA_3D_CMD_SETRENDERSTATE Types. All value types - * must fit in a uint32. - */ - typedef enum { - SVGA3D_RS_INVALID = 0, - SVGA3D_RS_MIN = 1, - SVGA3D_RS_ZENABLE = 1, /* SVGA3dBool */ - SVGA3D_RS_ZWRITEENABLE = 2, /* SVGA3dBool */ - SVGA3D_RS_ALPHATESTENABLE = 3, /* SVGA3dBool */ - SVGA3D_RS_DITHERENABLE = 4, /* SVGA3dBool */ - SVGA3D_RS_BLENDENABLE = 5, /* SVGA3dBool */ - SVGA3D_RS_FOGENABLE = 6, /* SVGA3dBool */ - SVGA3D_RS_SPECULARENABLE = 7, /* SVGA3dBool */ - SVGA3D_RS_STENCILENABLE = 8, /* SVGA3dBool */ - SVGA3D_RS_LIGHTINGENABLE = 9, /* SVGA3dBool */ - SVGA3D_RS_NORMALIZENORMALS = 10, /* SVGA3dBool */ - SVGA3D_RS_POINTSPRITEENABLE = 11, /* SVGA3dBool */ - SVGA3D_RS_POINTSCALEENABLE = 12, /* SVGA3dBool */ - SVGA3D_RS_STENCILREF = 13, /* uint32 */ - SVGA3D_RS_STENCILMASK = 14, /* uint32 */ - SVGA3D_RS_STENCILWRITEMASK = 15, /* uint32 */ - SVGA3D_RS_FOGSTART = 16, /* float */ - SVGA3D_RS_FOGEND = 17, /* float */ - SVGA3D_RS_FOGDENSITY = 18, /* float */ - SVGA3D_RS_POINTSIZE = 19, /* float */ - SVGA3D_RS_POINTSIZEMIN = 20, /* float */ - SVGA3D_RS_POINTSIZEMAX = 21, /* float */ - SVGA3D_RS_POINTSCALE_A = 22, /* float */ - SVGA3D_RS_POINTSCALE_B = 23, /* float */ - SVGA3D_RS_POINTSCALE_C = 24, /* float */ - SVGA3D_RS_FOGCOLOR = 25, /* SVGA3dColor */ - SVGA3D_RS_AMBIENT = 26, /* SVGA3dColor */ - SVGA3D_RS_CLIPPLANEENABLE = 27, /* SVGA3dClipPlanes */ - SVGA3D_RS_FOGMODE = 28, /* SVGA3dFogMode */ - SVGA3D_RS_FILLMODE = 29, /* SVGA3dFillMode */ - SVGA3D_RS_SHADEMODE = 30, /* SVGA3dShadeMode */ - SVGA3D_RS_LINEPATTERN = 31, /* SVGA3dLinePattern */ - SVGA3D_RS_SRCBLEND = 32, /* SVGA3dBlendOp */ - SVGA3D_RS_DSTBLEND = 33, /* SVGA3dBlendOp */ - SVGA3D_RS_BLENDEQUATION = 34, /* SVGA3dBlendEquation */ - SVGA3D_RS_CULLMODE = 35, /* SVGA3dFace */ - SVGA3D_RS_ZFUNC = 36, /* SVGA3dCmpFunc */ - SVGA3D_RS_ALPHAFUNC = 37, /* SVGA3dCmpFunc */ - SVGA3D_RS_STENCILFUNC = 38, /* SVGA3dCmpFunc */ - SVGA3D_RS_STENCILFAIL = 39, /* SVGA3dStencilOp */ - SVGA3D_RS_STENCILZFAIL = 40, /* SVGA3dStencilOp */ - SVGA3D_RS_STENCILPASS = 41, /* SVGA3dStencilOp */ - SVGA3D_RS_ALPHAREF = 42, /* float (0.0 .. 1.0) */ - SVGA3D_RS_FRONTWINDING = 43, /* SVGA3dFrontWinding */ - SVGA3D_RS_COORDINATETYPE = 44, /* SVGA3dCoordinateType */ - SVGA3D_RS_ZBIAS = 45, /* float */ - SVGA3D_RS_RANGEFOGENABLE = 46, /* SVGA3dBool */ - SVGA3D_RS_COLORWRITEENABLE = 47, /* SVGA3dColorMask */ - SVGA3D_RS_VERTEXMATERIALENABLE = 48, /* SVGA3dBool */ - SVGA3D_RS_DIFFUSEMATERIALSOURCE = 49, /* SVGA3dVertexMaterial */ - SVGA3D_RS_SPECULARMATERIALSOURCE = 50, /* SVGA3dVertexMaterial */ - SVGA3D_RS_AMBIENTMATERIALSOURCE = 51, /* SVGA3dVertexMaterial */ - SVGA3D_RS_EMISSIVEMATERIALSOURCE = 52, /* SVGA3dVertexMaterial */ - SVGA3D_RS_TEXTUREFACTOR = 53, /* SVGA3dColor */ - SVGA3D_RS_LOCALVIEWER = 54, /* SVGA3dBool */ - SVGA3D_RS_SCISSORTESTENABLE = 55, /* SVGA3dBool */ - SVGA3D_RS_BLENDCOLOR = 56, /* SVGA3dColor */ - SVGA3D_RS_STENCILENABLE2SIDED = 57, /* SVGA3dBool */ - SVGA3D_RS_CCWSTENCILFUNC = 58, /* SVGA3dCmpFunc */ - SVGA3D_RS_CCWSTENCILFAIL = 59, /* SVGA3dStencilOp */ - SVGA3D_RS_CCWSTENCILZFAIL = 60, /* SVGA3dStencilOp */ - SVGA3D_RS_CCWSTENCILPASS = 61, /* SVGA3dStencilOp */ - SVGA3D_RS_VERTEXBLEND = 62, /* SVGA3dVertexBlendFlags */ - SVGA3D_RS_SLOPESCALEDEPTHBIAS = 63, /* float */ - SVGA3D_RS_DEPTHBIAS = 64, /* float */ - - - /* - * Output Gamma Level - * - * Output gamma effects the gamma curve of colors that are output from the - * rendering pipeline. A value of 1.0 specifies a linear color space. If the - * value is <= 0.0, gamma correction is ignored and linear color space is - * used. - */ - - SVGA3D_RS_OUTPUTGAMMA = 65, /* float */ - SVGA3D_RS_ZVISIBLE = 66, /* SVGA3dBool */ - SVGA3D_RS_LASTPIXEL = 67, /* SVGA3dBool */ - SVGA3D_RS_CLIPPING = 68, /* SVGA3dBool */ - SVGA3D_RS_WRAP0 = 69, /* SVGA3dWrapFlags */ - SVGA3D_RS_WRAP1 = 70, /* SVGA3dWrapFlags */ - SVGA3D_RS_WRAP2 = 71, /* SVGA3dWrapFlags */ - SVGA3D_RS_WRAP3 = 72, /* SVGA3dWrapFlags */ - SVGA3D_RS_WRAP4 = 73, /* SVGA3dWrapFlags */ - SVGA3D_RS_WRAP5 = 74, /* SVGA3dWrapFlags */ - SVGA3D_RS_WRAP6 = 75, /* SVGA3dWrapFlags */ - SVGA3D_RS_WRAP7 = 76, /* SVGA3dWrapFlags */ - SVGA3D_RS_WRAP8 = 77, /* SVGA3dWrapFlags */ - SVGA3D_RS_WRAP9 = 78, /* SVGA3dWrapFlags */ - SVGA3D_RS_WRAP10 = 79, /* SVGA3dWrapFlags */ - SVGA3D_RS_WRAP11 = 80, /* SVGA3dWrapFlags */ - SVGA3D_RS_WRAP12 = 81, /* SVGA3dWrapFlags */ - SVGA3D_RS_WRAP13 = 82, /* SVGA3dWrapFlags */ - SVGA3D_RS_WRAP14 = 83, /* SVGA3dWrapFlags */ - SVGA3D_RS_WRAP15 = 84, /* SVGA3dWrapFlags */ - SVGA3D_RS_MULTISAMPLEANTIALIAS = 85, /* SVGA3dBool */ - SVGA3D_RS_MULTISAMPLEMASK = 86, /* uint32 */ - SVGA3D_RS_INDEXEDVERTEXBLENDENABLE = 87, /* SVGA3dBool */ - SVGA3D_RS_TWEENFACTOR = 88, /* float */ - SVGA3D_RS_ANTIALIASEDLINEENABLE = 89, /* SVGA3dBool */ - SVGA3D_RS_COLORWRITEENABLE1 = 90, /* SVGA3dColorMask */ - SVGA3D_RS_COLORWRITEENABLE2 = 91, /* SVGA3dColorMask */ - SVGA3D_RS_COLORWRITEENABLE3 = 92, /* SVGA3dColorMask */ - SVGA3D_RS_SEPARATEALPHABLENDENABLE = 93, /* SVGA3dBool */ - SVGA3D_RS_SRCBLENDALPHA = 94, /* SVGA3dBlendOp */ - SVGA3D_RS_DSTBLENDALPHA = 95, /* SVGA3dBlendOp */ - SVGA3D_RS_BLENDEQUATIONALPHA = 96, /* SVGA3dBlendEquation */ - SVGA3D_RS_TRANSPARENCYANTIALIAS = 97, /* SVGA3dTransparencyAntialiasType */ - SVGA3D_RS_LINEWIDTH = 98, /* float */ - SVGA3D_RS_MAX + SVGA3D_RS_INVALID = 0, + SVGA3D_RS_MIN = 1, + SVGA3D_RS_ZENABLE = 1, + SVGA3D_RS_ZWRITEENABLE = 2, + SVGA3D_RS_ALPHATESTENABLE = 3, + SVGA3D_RS_DITHERENABLE = 4, + SVGA3D_RS_BLENDENABLE = 5, + SVGA3D_RS_FOGENABLE = 6, + SVGA3D_RS_SPECULARENABLE = 7, + SVGA3D_RS_STENCILENABLE = 8, + SVGA3D_RS_LIGHTINGENABLE = 9, + SVGA3D_RS_NORMALIZENORMALS = 10, + SVGA3D_RS_POINTSPRITEENABLE = 11, + SVGA3D_RS_POINTSCALEENABLE = 12, + SVGA3D_RS_STENCILREF = 13, + SVGA3D_RS_STENCILMASK = 14, + SVGA3D_RS_STENCILWRITEMASK = 15, + SVGA3D_RS_FOGSTART = 16, + SVGA3D_RS_FOGEND = 17, + SVGA3D_RS_FOGDENSITY = 18, + SVGA3D_RS_POINTSIZE = 19, + SVGA3D_RS_POINTSIZEMIN = 20, + SVGA3D_RS_POINTSIZEMAX = 21, + SVGA3D_RS_POINTSCALE_A = 22, + SVGA3D_RS_POINTSCALE_B = 23, + SVGA3D_RS_POINTSCALE_C = 24, + SVGA3D_RS_FOGCOLOR = 25, + SVGA3D_RS_AMBIENT = 26, + SVGA3D_RS_CLIPPLANEENABLE = 27, + SVGA3D_RS_FOGMODE = 28, + SVGA3D_RS_FILLMODE = 29, + SVGA3D_RS_SHADEMODE = 30, + SVGA3D_RS_LINEPATTERN = 31, + SVGA3D_RS_SRCBLEND = 32, + SVGA3D_RS_DSTBLEND = 33, + SVGA3D_RS_BLENDEQUATION = 34, + SVGA3D_RS_CULLMODE = 35, + SVGA3D_RS_ZFUNC = 36, + SVGA3D_RS_ALPHAFUNC = 37, + SVGA3D_RS_STENCILFUNC = 38, + SVGA3D_RS_STENCILFAIL = 39, + SVGA3D_RS_STENCILZFAIL = 40, + SVGA3D_RS_STENCILPASS = 41, + SVGA3D_RS_ALPHAREF = 42, + SVGA3D_RS_FRONTWINDING = 43, + SVGA3D_RS_COORDINATETYPE = 44, + SVGA3D_RS_ZBIAS = 45, + SVGA3D_RS_RANGEFOGENABLE = 46, + SVGA3D_RS_COLORWRITEENABLE = 47, + SVGA3D_RS_VERTEXMATERIALENABLE = 48, + SVGA3D_RS_DIFFUSEMATERIALSOURCE = 49, + SVGA3D_RS_SPECULARMATERIALSOURCE = 50, + SVGA3D_RS_AMBIENTMATERIALSOURCE = 51, + SVGA3D_RS_EMISSIVEMATERIALSOURCE = 52, + SVGA3D_RS_TEXTUREFACTOR = 53, + SVGA3D_RS_LOCALVIEWER = 54, + SVGA3D_RS_SCISSORTESTENABLE = 55, + SVGA3D_RS_BLENDCOLOR = 56, + SVGA3D_RS_STENCILENABLE2SIDED = 57, + SVGA3D_RS_CCWSTENCILFUNC = 58, + SVGA3D_RS_CCWSTENCILFAIL = 59, + SVGA3D_RS_CCWSTENCILZFAIL = 60, + SVGA3D_RS_CCWSTENCILPASS = 61, + SVGA3D_RS_VERTEXBLEND = 62, + SVGA3D_RS_SLOPESCALEDEPTHBIAS = 63, + SVGA3D_RS_DEPTHBIAS = 64, + + SVGA3D_RS_OUTPUTGAMMA = 65, + SVGA3D_RS_ZVISIBLE = 66, + SVGA3D_RS_LASTPIXEL = 67, + SVGA3D_RS_CLIPPING = 68, + SVGA3D_RS_WRAP0 = 69, + SVGA3D_RS_WRAP1 = 70, + SVGA3D_RS_WRAP2 = 71, + SVGA3D_RS_WRAP3 = 72, + SVGA3D_RS_WRAP4 = 73, + SVGA3D_RS_WRAP5 = 74, + SVGA3D_RS_WRAP6 = 75, + SVGA3D_RS_WRAP7 = 76, + SVGA3D_RS_WRAP8 = 77, + SVGA3D_RS_WRAP9 = 78, + SVGA3D_RS_WRAP10 = 79, + SVGA3D_RS_WRAP11 = 80, + SVGA3D_RS_WRAP12 = 81, + SVGA3D_RS_WRAP13 = 82, + SVGA3D_RS_WRAP14 = 83, + SVGA3D_RS_WRAP15 = 84, + SVGA3D_RS_MULTISAMPLEANTIALIAS = 85, + SVGA3D_RS_MULTISAMPLEMASK = 86, + SVGA3D_RS_INDEXEDVERTEXBLENDENABLE = 87, + SVGA3D_RS_TWEENFACTOR = 88, + SVGA3D_RS_ANTIALIASEDLINEENABLE = 89, + SVGA3D_RS_COLORWRITEENABLE1 = 90, + SVGA3D_RS_COLORWRITEENABLE2 = 91, + SVGA3D_RS_COLORWRITEENABLE3 = 92, + SVGA3D_RS_SEPARATEALPHABLENDENABLE = 93, + SVGA3D_RS_SRCBLENDALPHA = 94, + SVGA3D_RS_DSTBLENDALPHA = 95, + SVGA3D_RS_BLENDEQUATIONALPHA = 96, + SVGA3D_RS_TRANSPARENCYANTIALIAS = 97, + SVGA3D_RS_LINEWIDTH = 98, + SVGA3D_RS_MAX } SVGA3dRenderStateName; typedef enum { - SVGA3D_TRANSPARENCYANTIALIAS_NORMAL = 0, - SVGA3D_TRANSPARENCYANTIALIAS_ALPHATOCOVERAGE = 1, - SVGA3D_TRANSPARENCYANTIALIAS_SUPERSAMPLE = 2, - SVGA3D_TRANSPARENCYANTIALIAS_MAX + SVGA3D_TRANSPARENCYANTIALIAS_NORMAL = 0, + SVGA3D_TRANSPARENCYANTIALIAS_ALPHATOCOVERAGE = 1, + SVGA3D_TRANSPARENCYANTIALIAS_SUPERSAMPLE = 2, + SVGA3D_TRANSPARENCYANTIALIAS_MAX } SVGA3dTransparencyAntialiasType; typedef enum { - SVGA3D_VERTEXMATERIAL_NONE = 0, /* Use the value in the current material */ - SVGA3D_VERTEXMATERIAL_DIFFUSE = 1, /* Use the value in the diffuse component */ - SVGA3D_VERTEXMATERIAL_SPECULAR = 2, /* Use the value in the specular component */ - SVGA3D_VERTEXMATERIAL_MAX = 3, + SVGA3D_VERTEXMATERIAL_NONE = 0, + SVGA3D_VERTEXMATERIAL_DIFFUSE = 1, + SVGA3D_VERTEXMATERIAL_SPECULAR = 2, + SVGA3D_VERTEXMATERIAL_MAX = 3, } SVGA3dVertexMaterial; typedef enum { - SVGA3D_FILLMODE_INVALID = 0, - SVGA3D_FILLMODE_MIN = 1, - SVGA3D_FILLMODE_POINT = 1, - SVGA3D_FILLMODE_LINE = 2, - SVGA3D_FILLMODE_FILL = 3, - SVGA3D_FILLMODE_MAX + SVGA3D_FILLMODE_INVALID = 0, + SVGA3D_FILLMODE_MIN = 1, + SVGA3D_FILLMODE_POINT = 1, + SVGA3D_FILLMODE_LINE = 2, + SVGA3D_FILLMODE_FILL = 3, + SVGA3D_FILLMODE_MAX } SVGA3dFillModeType; - -typedef -#include "vmware_pack_begin.h" -union { - struct { - uint16 mode; /* SVGA3dFillModeType */ - uint16 face; /* SVGA3dFace */ - }; - uint32 uintValue; -} -#include "vmware_pack_end.h" -SVGA3dFillMode; +#pragma pack(push, 1) +typedef union { + struct { + uint16 mode; + uint16 face; + }; + uint32 uintValue; +} SVGA3dFillMode; +#pragma pack(pop) typedef enum { - SVGA3D_SHADEMODE_INVALID = 0, - SVGA3D_SHADEMODE_FLAT = 1, - SVGA3D_SHADEMODE_SMOOTH = 2, - SVGA3D_SHADEMODE_PHONG = 3, /* Not supported */ - SVGA3D_SHADEMODE_MAX + SVGA3D_SHADEMODE_INVALID = 0, + SVGA3D_SHADEMODE_FLAT = 1, + SVGA3D_SHADEMODE_SMOOTH = 2, + SVGA3D_SHADEMODE_PHONG = 3, + SVGA3D_SHADEMODE_MAX } SVGA3dShadeMode; -typedef -#include "vmware_pack_begin.h" -union { - struct { - uint16 repeat; - uint16 pattern; - }; - uint32 uintValue; -} -#include "vmware_pack_end.h" -SVGA3dLinePattern; +#pragma pack(push, 1) +typedef union { + struct { + uint16 repeat; + uint16 pattern; + }; + uint32 uintValue; +} SVGA3dLinePattern; +#pragma pack(pop) typedef enum { - SVGA3D_BLENDOP_INVALID = 0, - SVGA3D_BLENDOP_MIN = 1, - SVGA3D_BLENDOP_ZERO = 1, - SVGA3D_BLENDOP_ONE = 2, - SVGA3D_BLENDOP_SRCCOLOR = 3, - SVGA3D_BLENDOP_INVSRCCOLOR = 4, - SVGA3D_BLENDOP_SRCALPHA = 5, - SVGA3D_BLENDOP_INVSRCALPHA = 6, - SVGA3D_BLENDOP_DESTALPHA = 7, - SVGA3D_BLENDOP_INVDESTALPHA = 8, - SVGA3D_BLENDOP_DESTCOLOR = 9, - SVGA3D_BLENDOP_INVDESTCOLOR = 10, - SVGA3D_BLENDOP_SRCALPHASAT = 11, - SVGA3D_BLENDOP_BLENDFACTOR = 12, - SVGA3D_BLENDOP_INVBLENDFACTOR = 13, - SVGA3D_BLENDOP_SRC1COLOR = 14, - SVGA3D_BLENDOP_INVSRC1COLOR = 15, - SVGA3D_BLENDOP_SRC1ALPHA = 16, - SVGA3D_BLENDOP_INVSRC1ALPHA = 17, - SVGA3D_BLENDOP_BLENDFACTORALPHA = 18, - SVGA3D_BLENDOP_INVBLENDFACTORALPHA = 19, - SVGA3D_BLENDOP_MAX + SVGA3D_BLENDOP_INVALID = 0, + SVGA3D_BLENDOP_MIN = 1, + SVGA3D_BLENDOP_ZERO = 1, + SVGA3D_BLENDOP_ONE = 2, + SVGA3D_BLENDOP_SRCCOLOR = 3, + SVGA3D_BLENDOP_INVSRCCOLOR = 4, + SVGA3D_BLENDOP_SRCALPHA = 5, + SVGA3D_BLENDOP_INVSRCALPHA = 6, + SVGA3D_BLENDOP_DESTALPHA = 7, + SVGA3D_BLENDOP_INVDESTALPHA = 8, + SVGA3D_BLENDOP_DESTCOLOR = 9, + SVGA3D_BLENDOP_INVDESTCOLOR = 10, + SVGA3D_BLENDOP_SRCALPHASAT = 11, + SVGA3D_BLENDOP_BLENDFACTOR = 12, + SVGA3D_BLENDOP_INVBLENDFACTOR = 13, + SVGA3D_BLENDOP_SRC1COLOR = 14, + SVGA3D_BLENDOP_INVSRC1COLOR = 15, + SVGA3D_BLENDOP_SRC1ALPHA = 16, + SVGA3D_BLENDOP_INVSRC1ALPHA = 17, + SVGA3D_BLENDOP_BLENDFACTORALPHA = 18, + SVGA3D_BLENDOP_INVBLENDFACTORALPHA = 19, + SVGA3D_BLENDOP_MAX } SVGA3dBlendOp; typedef enum { - SVGA3D_BLENDEQ_INVALID = 0, - SVGA3D_BLENDEQ_MIN = 1, - SVGA3D_BLENDEQ_ADD = 1, - SVGA3D_BLENDEQ_SUBTRACT = 2, - SVGA3D_BLENDEQ_REVSUBTRACT = 3, - SVGA3D_BLENDEQ_MINIMUM = 4, - SVGA3D_BLENDEQ_MAXIMUM = 5, - SVGA3D_BLENDEQ_MAX + SVGA3D_BLENDEQ_INVALID = 0, + SVGA3D_BLENDEQ_MIN = 1, + SVGA3D_BLENDEQ_ADD = 1, + SVGA3D_BLENDEQ_SUBTRACT = 2, + SVGA3D_BLENDEQ_REVSUBTRACT = 3, + SVGA3D_BLENDEQ_MINIMUM = 4, + SVGA3D_BLENDEQ_MAXIMUM = 5, + SVGA3D_BLENDEQ_MAX } SVGA3dBlendEquation; typedef enum { - SVGA3D_DX11_LOGICOP_MIN = 0, - SVGA3D_DX11_LOGICOP_CLEAR = 0, - SVGA3D_DX11_LOGICOP_SET = 1, - SVGA3D_DX11_LOGICOP_COPY = 2, - SVGA3D_DX11_LOGICOP_COPY_INVERTED = 3, - SVGA3D_DX11_LOGICOP_NOOP = 4, - SVGA3D_DX11_LOGICOP_INVERT = 5, - SVGA3D_DX11_LOGICOP_AND = 6, - SVGA3D_DX11_LOGICOP_NAND = 7, - SVGA3D_DX11_LOGICOP_OR = 8, - SVGA3D_DX11_LOGICOP_NOR = 9, - SVGA3D_DX11_LOGICOP_XOR = 10, - SVGA3D_DX11_LOGICOP_EQUIV = 11, - SVGA3D_DX11_LOGICOP_AND_REVERSE = 12, - SVGA3D_DX11_LOGICOP_AND_INVERTED = 13, - SVGA3D_DX11_LOGICOP_OR_REVERSE = 14, - SVGA3D_DX11_LOGICOP_OR_INVERTED = 15, - SVGA3D_DX11_LOGICOP_MAX + SVGA3D_DX11_LOGICOP_MIN = 0, + SVGA3D_DX11_LOGICOP_CLEAR = 0, + SVGA3D_DX11_LOGICOP_SET = 1, + SVGA3D_DX11_LOGICOP_COPY = 2, + SVGA3D_DX11_LOGICOP_COPY_INVERTED = 3, + SVGA3D_DX11_LOGICOP_NOOP = 4, + SVGA3D_DX11_LOGICOP_INVERT = 5, + SVGA3D_DX11_LOGICOP_AND = 6, + SVGA3D_DX11_LOGICOP_NAND = 7, + SVGA3D_DX11_LOGICOP_OR = 8, + SVGA3D_DX11_LOGICOP_NOR = 9, + SVGA3D_DX11_LOGICOP_XOR = 10, + SVGA3D_DX11_LOGICOP_EQUIV = 11, + SVGA3D_DX11_LOGICOP_AND_REVERSE = 12, + SVGA3D_DX11_LOGICOP_AND_INVERTED = 13, + SVGA3D_DX11_LOGICOP_OR_REVERSE = 14, + SVGA3D_DX11_LOGICOP_OR_INVERTED = 15, + SVGA3D_DX11_LOGICOP_MAX } SVGA3dDX11LogicOp; typedef enum { - SVGA3D_FRONTWINDING_INVALID = 0, - SVGA3D_FRONTWINDING_CW = 1, - SVGA3D_FRONTWINDING_CCW = 2, - SVGA3D_FRONTWINDING_MAX + SVGA3D_FRONTWINDING_INVALID = 0, + SVGA3D_FRONTWINDING_CW = 1, + SVGA3D_FRONTWINDING_MIN = 1, + SVGA3D_FRONTWINDING_CCW = 2, + SVGA3D_FRONTWINDING_MAX } SVGA3dFrontWinding; typedef enum { - SVGA3D_FACE_INVALID = 0, - SVGA3D_FACE_NONE = 1, - SVGA3D_FACE_MIN = 1, - SVGA3D_FACE_FRONT = 2, - SVGA3D_FACE_BACK = 3, - SVGA3D_FACE_FRONT_BACK = 4, - SVGA3D_FACE_MAX + SVGA3D_FACE_INVALID = 0, + SVGA3D_FACE_NONE = 1, + SVGA3D_FACE_MIN = 1, + SVGA3D_FACE_FRONT = 2, + SVGA3D_FACE_BACK = 3, + SVGA3D_FACE_FRONT_BACK = 4, + SVGA3D_FACE_MAX } SVGA3dFace; -/* - * The order and the values should not be changed - */ - typedef enum { - SVGA3D_CMP_INVALID = 0, - SVGA3D_CMP_NEVER = 1, - SVGA3D_CMP_LESS = 2, - SVGA3D_CMP_EQUAL = 3, - SVGA3D_CMP_LESSEQUAL = 4, - SVGA3D_CMP_GREATER = 5, - SVGA3D_CMP_NOTEQUAL = 6, - SVGA3D_CMP_GREATEREQUAL = 7, - SVGA3D_CMP_ALWAYS = 8, - SVGA3D_CMP_MAX + SVGA3D_CMP_INVALID = 0, + SVGA3D_CMP_NEVER = 1, + SVGA3D_CMP_LESS = 2, + SVGA3D_CMP_EQUAL = 3, + SVGA3D_CMP_LESSEQUAL = 4, + SVGA3D_CMP_GREATER = 5, + SVGA3D_CMP_NOTEQUAL = 6, + SVGA3D_CMP_GREATEREQUAL = 7, + SVGA3D_CMP_ALWAYS = 8, + SVGA3D_CMP_MAX } SVGA3dCmpFunc; -/* - * SVGA3D_FOGFUNC_* specifies the fog equation, or PER_VERTEX which allows - * the fog factor to be specified in the alpha component of the specular - * (a.k.a. secondary) vertex color. - */ typedef enum { - SVGA3D_FOGFUNC_INVALID = 0, - SVGA3D_FOGFUNC_EXP = 1, - SVGA3D_FOGFUNC_EXP2 = 2, - SVGA3D_FOGFUNC_LINEAR = 3, - SVGA3D_FOGFUNC_PER_VERTEX = 4 + SVGA3D_FOGFUNC_INVALID = 0, + SVGA3D_FOGFUNC_EXP = 1, + SVGA3D_FOGFUNC_EXP2 = 2, + SVGA3D_FOGFUNC_LINEAR = 3, + SVGA3D_FOGFUNC_PER_VERTEX = 4 } SVGA3dFogFunction; -/* - * SVGA3D_FOGTYPE_* specifies if fog factors are computed on a per-vertex - * or per-pixel basis. - */ typedef enum { - SVGA3D_FOGTYPE_INVALID = 0, - SVGA3D_FOGTYPE_VERTEX = 1, - SVGA3D_FOGTYPE_PIXEL = 2, - SVGA3D_FOGTYPE_MAX = 3 + SVGA3D_FOGTYPE_INVALID = 0, + SVGA3D_FOGTYPE_VERTEX = 1, + SVGA3D_FOGTYPE_PIXEL = 2, + SVGA3D_FOGTYPE_MAX = 3 } SVGA3dFogType; -/* - * SVGA3D_FOGBASE_* selects depth or range-based fog. Depth-based fog is - * computed using the eye Z value of each pixel (or vertex), whereas range- - * based fog is computed using the actual distance (range) to the eye. - */ typedef enum { - SVGA3D_FOGBASE_INVALID = 0, - SVGA3D_FOGBASE_DEPTHBASED = 1, - SVGA3D_FOGBASE_RANGEBASED = 2, - SVGA3D_FOGBASE_MAX = 3 + SVGA3D_FOGBASE_INVALID = 0, + SVGA3D_FOGBASE_DEPTHBASED = 1, + SVGA3D_FOGBASE_RANGEBASED = 2, + SVGA3D_FOGBASE_MAX = 3 } SVGA3dFogBase; typedef enum { - SVGA3D_STENCILOP_INVALID = 0, - SVGA3D_STENCILOP_MIN = 1, - SVGA3D_STENCILOP_KEEP = 1, - SVGA3D_STENCILOP_ZERO = 2, - SVGA3D_STENCILOP_REPLACE = 3, - SVGA3D_STENCILOP_INCRSAT = 4, - SVGA3D_STENCILOP_DECRSAT = 5, - SVGA3D_STENCILOP_INVERT = 6, - SVGA3D_STENCILOP_INCR = 7, - SVGA3D_STENCILOP_DECR = 8, - SVGA3D_STENCILOP_MAX + SVGA3D_STENCILOP_INVALID = 0, + SVGA3D_STENCILOP_MIN = 1, + SVGA3D_STENCILOP_KEEP = 1, + SVGA3D_STENCILOP_ZERO = 2, + SVGA3D_STENCILOP_REPLACE = 3, + SVGA3D_STENCILOP_INCRSAT = 4, + SVGA3D_STENCILOP_DECRSAT = 5, + SVGA3D_STENCILOP_INVERT = 6, + SVGA3D_STENCILOP_INCR = 7, + SVGA3D_STENCILOP_DECR = 8, + SVGA3D_STENCILOP_MAX } SVGA3dStencilOp; typedef enum { - SVGA3D_CLIPPLANE_0 = (1 << 0), - SVGA3D_CLIPPLANE_1 = (1 << 1), - SVGA3D_CLIPPLANE_2 = (1 << 2), - SVGA3D_CLIPPLANE_3 = (1 << 3), - SVGA3D_CLIPPLANE_4 = (1 << 4), - SVGA3D_CLIPPLANE_5 = (1 << 5), + SVGA3D_CLIPPLANE_0 = (1 << 0), + SVGA3D_CLIPPLANE_1 = (1 << 1), + SVGA3D_CLIPPLANE_2 = (1 << 2), + SVGA3D_CLIPPLANE_3 = (1 << 3), + SVGA3D_CLIPPLANE_4 = (1 << 4), + SVGA3D_CLIPPLANE_5 = (1 << 5), } SVGA3dClipPlanes; typedef enum { - SVGA3D_CLEAR_COLOR = 0x1, - SVGA3D_CLEAR_DEPTH = 0x2, - SVGA3D_CLEAR_STENCIL = 0x4, - - /* - * Hint only, must be used together with SVGA3D_CLEAR_COLOR. If - * SVGA3D_CLEAR_DEPTH or SVGA3D_CLEAR_STENCIL bit is set, this - * bit will be ignored. - */ - SVGA3D_CLEAR_COLORFILL = 0x8 + SVGA3D_CLEAR_COLOR = 0x1, + SVGA3D_CLEAR_DEPTH = 0x2, + SVGA3D_CLEAR_STENCIL = 0x4, + + SVGA3D_CLEAR_COLORFILL = 0x8 } SVGA3dClearFlag; typedef enum { - SVGA3D_RT_DEPTH = 0, - SVGA3D_RT_MIN = 0, - SVGA3D_RT_STENCIL = 1, - SVGA3D_RT_COLOR0 = 2, - SVGA3D_RT_COLOR1 = 3, - SVGA3D_RT_COLOR2 = 4, - SVGA3D_RT_COLOR3 = 5, - SVGA3D_RT_COLOR4 = 6, - SVGA3D_RT_COLOR5 = 7, - SVGA3D_RT_COLOR6 = 8, - SVGA3D_RT_COLOR7 = 9, - SVGA3D_RT_MAX, - SVGA3D_RT_INVALID = ((uint32)-1), + SVGA3D_RT_DEPTH = 0, + SVGA3D_RT_MIN = 0, + SVGA3D_RT_STENCIL = 1, + SVGA3D_RT_COLOR0 = 2, + SVGA3D_RT_COLOR1 = 3, + SVGA3D_RT_COLOR2 = 4, + SVGA3D_RT_COLOR3 = 5, + SVGA3D_RT_COLOR4 = 6, + SVGA3D_RT_COLOR5 = 7, + SVGA3D_RT_COLOR6 = 8, + SVGA3D_RT_COLOR7 = 9, + SVGA3D_RT_MAX, + SVGA3D_RT_INVALID = ((uint32)-1), } SVGA3dRenderTargetType; #define SVGA3D_MAX_RT_COLOR (SVGA3D_RT_COLOR7 - SVGA3D_RT_COLOR0 + 1) -typedef -#include "vmware_pack_begin.h" -union { - struct { - uint32 red : 1; - uint32 green : 1; - uint32 blue : 1; - uint32 alpha : 1; - }; - uint32 uintValue; -} -#include "vmware_pack_end.h" -SVGA3dColorMask; +#pragma pack(push, 1) +typedef union { + struct { + uint32 red : 1; + uint32 green : 1; + uint32 blue : 1; + uint32 alpha : 1; + }; + uint32 uintValue; +} SVGA3dColorMask; +#pragma pack(pop) typedef enum { - SVGA3D_VBLEND_DISABLE = 0, - SVGA3D_VBLEND_1WEIGHT = 1, - SVGA3D_VBLEND_2WEIGHT = 2, - SVGA3D_VBLEND_3WEIGHT = 3, - SVGA3D_VBLEND_MAX = 4, + SVGA3D_VBLEND_DISABLE = 0, + SVGA3D_VBLEND_1WEIGHT = 1, + SVGA3D_VBLEND_2WEIGHT = 2, + SVGA3D_VBLEND_3WEIGHT = 3, + SVGA3D_VBLEND_MAX = 4, } SVGA3dVertexBlendFlags; typedef enum { - SVGA3D_WRAPCOORD_0 = 1 << 0, - SVGA3D_WRAPCOORD_1 = 1 << 1, - SVGA3D_WRAPCOORD_2 = 1 << 2, - SVGA3D_WRAPCOORD_3 = 1 << 3, - SVGA3D_WRAPCOORD_ALL = 0xF, + SVGA3D_WRAPCOORD_0 = 1 << 0, + SVGA3D_WRAPCOORD_1 = 1 << 1, + SVGA3D_WRAPCOORD_2 = 1 << 2, + SVGA3D_WRAPCOORD_3 = 1 << 3, + SVGA3D_WRAPCOORD_ALL = 0xF, } SVGA3dWrapFlags; -/* - * SVGA_3D_CMD_TEXTURESTATE Types. All value types - * must fit in a uint32. - */ - typedef enum { - SVGA3D_TS_INVALID = 0, - SVGA3D_TS_MIN = 1, - SVGA3D_TS_BIND_TEXTURE = 1, /* SVGA3dSurfaceId */ - SVGA3D_TS_COLOROP = 2, /* SVGA3dTextureCombiner */ - SVGA3D_TS_COLORARG1 = 3, /* SVGA3dTextureArgData */ - SVGA3D_TS_COLORARG2 = 4, /* SVGA3dTextureArgData */ - SVGA3D_TS_ALPHAOP = 5, /* SVGA3dTextureCombiner */ - SVGA3D_TS_ALPHAARG1 = 6, /* SVGA3dTextureArgData */ - SVGA3D_TS_ALPHAARG2 = 7, /* SVGA3dTextureArgData */ - SVGA3D_TS_ADDRESSU = 8, /* SVGA3dTextureAddress */ - SVGA3D_TS_ADDRESSV = 9, /* SVGA3dTextureAddress */ - SVGA3D_TS_MIPFILTER = 10, /* SVGA3dTextureFilter */ - SVGA3D_TS_MAGFILTER = 11, /* SVGA3dTextureFilter */ - SVGA3D_TS_MINFILTER = 12, /* SVGA3dTextureFilter */ - SVGA3D_TS_BORDERCOLOR = 13, /* SVGA3dColor */ - SVGA3D_TS_TEXCOORDINDEX = 14, /* uint32 */ - SVGA3D_TS_TEXTURETRANSFORMFLAGS = 15, /* SVGA3dTexTransformFlags */ - SVGA3D_TS_TEXCOORDGEN = 16, /* SVGA3dTextureCoordGen */ - SVGA3D_TS_BUMPENVMAT00 = 17, /* float */ - SVGA3D_TS_BUMPENVMAT01 = 18, /* float */ - SVGA3D_TS_BUMPENVMAT10 = 19, /* float */ - SVGA3D_TS_BUMPENVMAT11 = 20, /* float */ - SVGA3D_TS_TEXTURE_MIPMAP_LEVEL = 21, /* uint32 */ - SVGA3D_TS_TEXTURE_LOD_BIAS = 22, /* float */ - SVGA3D_TS_TEXTURE_ANISOTROPIC_LEVEL = 23, /* uint32 */ - SVGA3D_TS_ADDRESSW = 24, /* SVGA3dTextureAddress */ - - - /* - * Sampler Gamma Level - * - * Sampler gamma effects the color of samples taken from the sampler. A - * value of 1.0 will produce linear samples. If the value is <= 0.0 the - * gamma value is ignored and a linear space is used. - */ - - SVGA3D_TS_GAMMA = 25, /* float */ - SVGA3D_TS_BUMPENVLSCALE = 26, /* float */ - SVGA3D_TS_BUMPENVLOFFSET = 27, /* float */ - SVGA3D_TS_COLORARG0 = 28, /* SVGA3dTextureArgData */ - SVGA3D_TS_ALPHAARG0 = 29, /* SVGA3dTextureArgData */ - SVGA3D_TS_PREGB_MAX = 30, /* Max value before GBObjects */ - SVGA3D_TS_CONSTANT = 30, /* SVGA3dColor */ - SVGA3D_TS_COLOR_KEY_ENABLE = 31, /* SVGA3dBool */ - SVGA3D_TS_COLOR_KEY = 32, /* SVGA3dColor */ - SVGA3D_TS_MAX + SVGA3D_TS_INVALID = 0, + SVGA3D_TS_MIN = 1, + SVGA3D_TS_BIND_TEXTURE = 1, + SVGA3D_TS_COLOROP = 2, + SVGA3D_TS_COLORARG1 = 3, + SVGA3D_TS_COLORARG2 = 4, + SVGA3D_TS_ALPHAOP = 5, + SVGA3D_TS_ALPHAARG1 = 6, + SVGA3D_TS_ALPHAARG2 = 7, + SVGA3D_TS_ADDRESSU = 8, + SVGA3D_TS_ADDRESSV = 9, + SVGA3D_TS_MIPFILTER = 10, + SVGA3D_TS_MAGFILTER = 11, + SVGA3D_TS_MINFILTER = 12, + SVGA3D_TS_BORDERCOLOR = 13, + SVGA3D_TS_TEXCOORDINDEX = 14, + SVGA3D_TS_TEXTURETRANSFORMFLAGS = 15, + SVGA3D_TS_TEXCOORDGEN = 16, + SVGA3D_TS_BUMPENVMAT00 = 17, + SVGA3D_TS_BUMPENVMAT01 = 18, + SVGA3D_TS_BUMPENVMAT10 = 19, + SVGA3D_TS_BUMPENVMAT11 = 20, + SVGA3D_TS_TEXTURE_MIPMAP_LEVEL = 21, + SVGA3D_TS_TEXTURE_LOD_BIAS = 22, + SVGA3D_TS_TEXTURE_ANISOTROPIC_LEVEL = 23, + SVGA3D_TS_ADDRESSW = 24, + + SVGA3D_TS_GAMMA = 25, + SVGA3D_TS_BUMPENVLSCALE = 26, + SVGA3D_TS_BUMPENVLOFFSET = 27, + SVGA3D_TS_COLORARG0 = 28, + SVGA3D_TS_ALPHAARG0 = 29, + SVGA3D_TS_PREGB_MAX = 30, + SVGA3D_TS_CONSTANT = 30, + SVGA3D_TS_COLOR_KEY_ENABLE = 31, + SVGA3D_TS_COLOR_KEY = 32, + SVGA3D_TS_MAX } SVGA3dTextureStateName; typedef enum { - SVGA3D_TC_INVALID = 0, - SVGA3D_TC_DISABLE = 1, - SVGA3D_TC_SELECTARG1 = 2, - SVGA3D_TC_SELECTARG2 = 3, - SVGA3D_TC_MODULATE = 4, - SVGA3D_TC_ADD = 5, - SVGA3D_TC_ADDSIGNED = 6, - SVGA3D_TC_SUBTRACT = 7, - SVGA3D_TC_BLENDTEXTUREALPHA = 8, - SVGA3D_TC_BLENDDIFFUSEALPHA = 9, - SVGA3D_TC_BLENDCURRENTALPHA = 10, - SVGA3D_TC_BLENDFACTORALPHA = 11, - SVGA3D_TC_MODULATE2X = 12, - SVGA3D_TC_MODULATE4X = 13, - SVGA3D_TC_DSDT = 14, - SVGA3D_TC_DOTPRODUCT3 = 15, - SVGA3D_TC_BLENDTEXTUREALPHAPM = 16, - SVGA3D_TC_ADDSIGNED2X = 17, - SVGA3D_TC_ADDSMOOTH = 18, - SVGA3D_TC_PREMODULATE = 19, - SVGA3D_TC_MODULATEALPHA_ADDCOLOR = 20, - SVGA3D_TC_MODULATECOLOR_ADDALPHA = 21, - SVGA3D_TC_MODULATEINVALPHA_ADDCOLOR = 22, - SVGA3D_TC_MODULATEINVCOLOR_ADDALPHA = 23, - SVGA3D_TC_BUMPENVMAPLUMINANCE = 24, - SVGA3D_TC_MULTIPLYADD = 25, - SVGA3D_TC_LERP = 26, - SVGA3D_TC_MAX + SVGA3D_TC_INVALID = 0, + SVGA3D_TC_DISABLE = 1, + SVGA3D_TC_SELECTARG1 = 2, + SVGA3D_TC_SELECTARG2 = 3, + SVGA3D_TC_MODULATE = 4, + SVGA3D_TC_ADD = 5, + SVGA3D_TC_ADDSIGNED = 6, + SVGA3D_TC_SUBTRACT = 7, + SVGA3D_TC_BLENDTEXTUREALPHA = 8, + SVGA3D_TC_BLENDDIFFUSEALPHA = 9, + SVGA3D_TC_BLENDCURRENTALPHA = 10, + SVGA3D_TC_BLENDFACTORALPHA = 11, + SVGA3D_TC_MODULATE2X = 12, + SVGA3D_TC_MODULATE4X = 13, + SVGA3D_TC_DSDT = 14, + SVGA3D_TC_DOTPRODUCT3 = 15, + SVGA3D_TC_BLENDTEXTUREALPHAPM = 16, + SVGA3D_TC_ADDSIGNED2X = 17, + SVGA3D_TC_ADDSMOOTH = 18, + SVGA3D_TC_PREMODULATE = 19, + SVGA3D_TC_MODULATEALPHA_ADDCOLOR = 20, + SVGA3D_TC_MODULATECOLOR_ADDALPHA = 21, + SVGA3D_TC_MODULATEINVALPHA_ADDCOLOR = 22, + SVGA3D_TC_MODULATEINVCOLOR_ADDALPHA = 23, + SVGA3D_TC_BUMPENVMAPLUMINANCE = 24, + SVGA3D_TC_MULTIPLYADD = 25, + SVGA3D_TC_LERP = 26, + SVGA3D_TC_MAX } SVGA3dTextureCombiner; -#define SVGA3D_TC_CAP_BIT(svga3d_tc_op) (svga3d_tc_op ? (1 << (svga3d_tc_op - 1)) : 0) +#define SVGA3D_TC_CAP_BIT(svga3d_tc_op) \ + (svga3d_tc_op ? (1 << (svga3d_tc_op - 1)) : 0) typedef enum { - SVGA3D_TEX_ADDRESS_INVALID = 0, - SVGA3D_TEX_ADDRESS_MIN = 1, - SVGA3D_TEX_ADDRESS_WRAP = 1, - SVGA3D_TEX_ADDRESS_MIRROR = 2, - SVGA3D_TEX_ADDRESS_CLAMP = 3, - SVGA3D_TEX_ADDRESS_BORDER = 4, - SVGA3D_TEX_ADDRESS_MIRRORONCE = 5, - SVGA3D_TEX_ADDRESS_EDGE = 6, - SVGA3D_TEX_ADDRESS_MAX + SVGA3D_TEX_ADDRESS_INVALID = 0, + SVGA3D_TEX_ADDRESS_MIN = 1, + SVGA3D_TEX_ADDRESS_WRAP = 1, + SVGA3D_TEX_ADDRESS_MIRROR = 2, + SVGA3D_TEX_ADDRESS_CLAMP = 3, + SVGA3D_TEX_ADDRESS_BORDER = 4, + SVGA3D_TEX_ADDRESS_MIRRORONCE = 5, + SVGA3D_TEX_ADDRESS_EDGE = 6, + SVGA3D_TEX_ADDRESS_MAX } SVGA3dTextureAddress; -/* - * SVGA3D_TEX_FILTER_NONE as the minification filter means mipmapping is - * disabled, and the rasterizer should use the magnification filter instead. - */ typedef enum { - SVGA3D_TEX_FILTER_NONE = 0, - SVGA3D_TEX_FILTER_MIN = 0, - SVGA3D_TEX_FILTER_NEAREST = 1, - SVGA3D_TEX_FILTER_LINEAR = 2, - SVGA3D_TEX_FILTER_ANISOTROPIC = 3, - SVGA3D_TEX_FILTER_FLATCUBIC = 4, /* Deprecated, not implemented */ - SVGA3D_TEX_FILTER_GAUSSIANCUBIC = 5, /* Deprecated, not implemented */ - SVGA3D_TEX_FILTER_PYRAMIDALQUAD = 6, /* Not currently implemented */ - SVGA3D_TEX_FILTER_GAUSSIANQUAD = 7, /* Not currently implemented */ - SVGA3D_TEX_FILTER_MAX + SVGA3D_TEX_FILTER_NONE = 0, + SVGA3D_TEX_FILTER_MIN = 0, + SVGA3D_TEX_FILTER_NEAREST = 1, + SVGA3D_TEX_FILTER_LINEAR = 2, + SVGA3D_TEX_FILTER_ANISOTROPIC = 3, + SVGA3D_TEX_FILTER_FLATCUBIC = 4, + SVGA3D_TEX_FILTER_GAUSSIANCUBIC = 5, + SVGA3D_TEX_FILTER_PYRAMIDALQUAD = 6, + SVGA3D_TEX_FILTER_GAUSSIANQUAD = 7, + SVGA3D_TEX_FILTER_MAX } SVGA3dTextureFilter; typedef enum { - SVGA3D_TEX_TRANSFORM_OFF = 0, - SVGA3D_TEX_TRANSFORM_S = (1 << 0), - SVGA3D_TEX_TRANSFORM_T = (1 << 1), - SVGA3D_TEX_TRANSFORM_R = (1 << 2), - SVGA3D_TEX_TRANSFORM_Q = (1 << 3), - SVGA3D_TEX_PROJECTED = (1 << 15), + SVGA3D_TEX_TRANSFORM_OFF = 0, + SVGA3D_TEX_TRANSFORM_S = (1 << 0), + SVGA3D_TEX_TRANSFORM_T = (1 << 1), + SVGA3D_TEX_TRANSFORM_R = (1 << 2), + SVGA3D_TEX_TRANSFORM_Q = (1 << 3), + SVGA3D_TEX_PROJECTED = (1 << 15), } SVGA3dTexTransformFlags; typedef enum { - SVGA3D_TEXCOORD_GEN_OFF = 0, - SVGA3D_TEXCOORD_GEN_EYE_POSITION = 1, - SVGA3D_TEXCOORD_GEN_EYE_NORMAL = 2, - SVGA3D_TEXCOORD_GEN_REFLECTIONVECTOR = 3, - SVGA3D_TEXCOORD_GEN_SPHERE = 4, - SVGA3D_TEXCOORD_GEN_MAX + SVGA3D_TEXCOORD_GEN_OFF = 0, + SVGA3D_TEXCOORD_GEN_EYE_POSITION = 1, + SVGA3D_TEXCOORD_GEN_EYE_NORMAL = 2, + SVGA3D_TEXCOORD_GEN_REFLECTIONVECTOR = 3, + SVGA3D_TEXCOORD_GEN_SPHERE = 4, + SVGA3D_TEXCOORD_GEN_MAX } SVGA3dTextureCoordGen; -/* - * Texture argument constants for texture combiner - */ typedef enum { - SVGA3D_TA_INVALID = 0, - SVGA3D_TA_TFACTOR = 1, - SVGA3D_TA_PREVIOUS = 2, - SVGA3D_TA_DIFFUSE = 3, - SVGA3D_TA_TEXTURE = 4, - SVGA3D_TA_SPECULAR = 5, - SVGA3D_TA_CONSTANT = 6, - SVGA3D_TA_MAX + SVGA3D_TA_INVALID = 0, + SVGA3D_TA_TFACTOR = 1, + SVGA3D_TA_PREVIOUS = 2, + SVGA3D_TA_DIFFUSE = 3, + SVGA3D_TA_TEXTURE = 4, + SVGA3D_TA_SPECULAR = 5, + SVGA3D_TA_CONSTANT = 6, + SVGA3D_TA_MAX } SVGA3dTextureArgData; #define SVGA3D_TM_MASK_LEN 4 -/* Modifiers for texture argument constants defined above. */ typedef enum { - SVGA3D_TM_NONE = 0, - SVGA3D_TM_ALPHA = (1 << SVGA3D_TM_MASK_LEN), - SVGA3D_TM_ONE_MINUS = (2 << SVGA3D_TM_MASK_LEN), + SVGA3D_TM_NONE = 0, + SVGA3D_TM_ALPHA = (1 << SVGA3D_TM_MASK_LEN), + SVGA3D_TM_ONE_MINUS = (2 << SVGA3D_TM_MASK_LEN), } SVGA3dTextureArgModifier; -/* - * Vertex declarations - * - * Notes: - * - * SVGA3D_DECLUSAGE_POSITIONT is for pre-transformed vertices. If you - * draw with any POSITIONT vertex arrays, the programmable vertex - * pipeline will be implicitly disabled. Drawing will take place as if - * no vertex shader was bound. - */ - typedef enum { - SVGA3D_DECLUSAGE_POSITION = 0, - SVGA3D_DECLUSAGE_BLENDWEIGHT, - SVGA3D_DECLUSAGE_BLENDINDICES, - SVGA3D_DECLUSAGE_NORMAL, - SVGA3D_DECLUSAGE_PSIZE, - SVGA3D_DECLUSAGE_TEXCOORD, - SVGA3D_DECLUSAGE_TANGENT, - SVGA3D_DECLUSAGE_BINORMAL, - SVGA3D_DECLUSAGE_TESSFACTOR, - SVGA3D_DECLUSAGE_POSITIONT, - SVGA3D_DECLUSAGE_COLOR, - SVGA3D_DECLUSAGE_FOG, - SVGA3D_DECLUSAGE_DEPTH, - SVGA3D_DECLUSAGE_SAMPLE, - SVGA3D_DECLUSAGE_MAX + SVGA3D_DECLUSAGE_POSITION = 0, + SVGA3D_DECLUSAGE_BLENDWEIGHT, + SVGA3D_DECLUSAGE_BLENDINDICES, + SVGA3D_DECLUSAGE_NORMAL, + SVGA3D_DECLUSAGE_PSIZE, + SVGA3D_DECLUSAGE_TEXCOORD, + SVGA3D_DECLUSAGE_TANGENT, + SVGA3D_DECLUSAGE_BINORMAL, + SVGA3D_DECLUSAGE_TESSFACTOR, + SVGA3D_DECLUSAGE_POSITIONT, + SVGA3D_DECLUSAGE_COLOR, + SVGA3D_DECLUSAGE_FOG, + SVGA3D_DECLUSAGE_DEPTH, + SVGA3D_DECLUSAGE_SAMPLE, + SVGA3D_DECLUSAGE_MAX } SVGA3dDeclUsage; typedef enum { - SVGA3D_DECLMETHOD_DEFAULT = 0, - SVGA3D_DECLMETHOD_PARTIALU, - SVGA3D_DECLMETHOD_PARTIALV, - SVGA3D_DECLMETHOD_CROSSUV, /* Normal */ - SVGA3D_DECLMETHOD_UV, - SVGA3D_DECLMETHOD_LOOKUP, /* Lookup a displacement map */ - SVGA3D_DECLMETHOD_LOOKUPPRESAMPLED, /* Lookup a pre-sampled displacement */ - /* map */ + SVGA3D_DECLMETHOD_DEFAULT = 0, + SVGA3D_DECLMETHOD_PARTIALU, + SVGA3D_DECLMETHOD_PARTIALV, + SVGA3D_DECLMETHOD_CROSSUV, + SVGA3D_DECLMETHOD_UV, + SVGA3D_DECLMETHOD_LOOKUP, + SVGA3D_DECLMETHOD_LOOKUPPRESAMPLED, } SVGA3dDeclMethod; typedef enum { - SVGA3D_DECLTYPE_FLOAT1 = 0, - SVGA3D_DECLTYPE_FLOAT2 = 1, - SVGA3D_DECLTYPE_FLOAT3 = 2, - SVGA3D_DECLTYPE_FLOAT4 = 3, - SVGA3D_DECLTYPE_D3DCOLOR = 4, - SVGA3D_DECLTYPE_UBYTE4 = 5, - SVGA3D_DECLTYPE_SHORT2 = 6, - SVGA3D_DECLTYPE_SHORT4 = 7, - SVGA3D_DECLTYPE_UBYTE4N = 8, - SVGA3D_DECLTYPE_SHORT2N = 9, - SVGA3D_DECLTYPE_SHORT4N = 10, - SVGA3D_DECLTYPE_USHORT2N = 11, - SVGA3D_DECLTYPE_USHORT4N = 12, - SVGA3D_DECLTYPE_UDEC3 = 13, - SVGA3D_DECLTYPE_DEC3N = 14, - SVGA3D_DECLTYPE_FLOAT16_2 = 15, - SVGA3D_DECLTYPE_FLOAT16_4 = 16, - SVGA3D_DECLTYPE_MAX, + SVGA3D_DECLTYPE_FLOAT1 = 0, + SVGA3D_DECLTYPE_FLOAT2 = 1, + SVGA3D_DECLTYPE_FLOAT3 = 2, + SVGA3D_DECLTYPE_FLOAT4 = 3, + SVGA3D_DECLTYPE_D3DCOLOR = 4, + SVGA3D_DECLTYPE_UBYTE4 = 5, + SVGA3D_DECLTYPE_SHORT2 = 6, + SVGA3D_DECLTYPE_SHORT4 = 7, + SVGA3D_DECLTYPE_UBYTE4N = 8, + SVGA3D_DECLTYPE_SHORT2N = 9, + SVGA3D_DECLTYPE_SHORT4N = 10, + SVGA3D_DECLTYPE_USHORT2N = 11, + SVGA3D_DECLTYPE_USHORT4N = 12, + SVGA3D_DECLTYPE_UDEC3 = 13, + SVGA3D_DECLTYPE_DEC3N = 14, + SVGA3D_DECLTYPE_FLOAT16_2 = 15, + SVGA3D_DECLTYPE_FLOAT16_4 = 16, + SVGA3D_DECLTYPE_MAX, } SVGA3dDeclType; -/* - * This structure is used for the divisor for geometry instancing; - * it's a direct translation of the Direct3D equivalent. - */ typedef union { - struct { - /* - * For index data, this number represents the number of instances to draw. - * For instance data, this number represents the number of - * instances/vertex in this stream - */ - uint32 count : 30; - - /* - * This is 1 if this is supposed to be the data that is repeated for - * every instance. - */ - uint32 indexedData : 1; - - /* - * This is 1 if this is supposed to be the per-instance data. - */ - uint32 instanceData : 1; - }; - - uint32 value; + struct { + uint32 count : 30; + + uint32 indexedData : 1; + + uint32 instanceData : 1; + }; + + uint32 value; } SVGA3dVertexDivisor; typedef enum { - /* - * SVGA3D_PRIMITIVE_INVALID is a valid primitive type. - * - * List MIN second so debuggers will think INVALID is - * the correct name. - */ - SVGA3D_PRIMITIVE_INVALID = 0, - SVGA3D_PRIMITIVE_MIN = 0, - SVGA3D_PRIMITIVE_TRIANGLELIST = 1, - SVGA3D_PRIMITIVE_POINTLIST = 2, - SVGA3D_PRIMITIVE_LINELIST = 3, - SVGA3D_PRIMITIVE_LINESTRIP = 4, - SVGA3D_PRIMITIVE_TRIANGLESTRIP = 5, - SVGA3D_PRIMITIVE_TRIANGLEFAN = 6, - SVGA3D_PRIMITIVE_LINELIST_ADJ = 7, - SVGA3D_PRIMITIVE_PREDX_MAX = 7, - SVGA3D_PRIMITIVE_LINESTRIP_ADJ = 8, - SVGA3D_PRIMITIVE_TRIANGLELIST_ADJ = 9, - SVGA3D_PRIMITIVE_TRIANGLESTRIP_ADJ = 10, - SVGA3D_PRIMITIVE_DX10_MAX = 11, - SVGA3D_PRIMITIVE_1_CONTROL_POINT_PATCH = 11, - SVGA3D_PRIMITIVE_2_CONTROL_POINT_PATCH = 12, - SVGA3D_PRIMITIVE_3_CONTROL_POINT_PATCH = 13, - SVGA3D_PRIMITIVE_4_CONTROL_POINT_PATCH = 14, - SVGA3D_PRIMITIVE_5_CONTROL_POINT_PATCH = 15, - SVGA3D_PRIMITIVE_6_CONTROL_POINT_PATCH = 16, - SVGA3D_PRIMITIVE_7_CONTROL_POINT_PATCH = 17, - SVGA3D_PRIMITIVE_8_CONTROL_POINT_PATCH = 18, - SVGA3D_PRIMITIVE_9_CONTROL_POINT_PATCH = 19, - SVGA3D_PRIMITIVE_10_CONTROL_POINT_PATCH = 20, - SVGA3D_PRIMITIVE_11_CONTROL_POINT_PATCH = 21, - SVGA3D_PRIMITIVE_12_CONTROL_POINT_PATCH = 22, - SVGA3D_PRIMITIVE_13_CONTROL_POINT_PATCH = 23, - SVGA3D_PRIMITIVE_14_CONTROL_POINT_PATCH = 24, - SVGA3D_PRIMITIVE_15_CONTROL_POINT_PATCH = 25, - SVGA3D_PRIMITIVE_16_CONTROL_POINT_PATCH = 26, - SVGA3D_PRIMITIVE_17_CONTROL_POINT_PATCH = 27, - SVGA3D_PRIMITIVE_18_CONTROL_POINT_PATCH = 28, - SVGA3D_PRIMITIVE_19_CONTROL_POINT_PATCH = 29, - SVGA3D_PRIMITIVE_20_CONTROL_POINT_PATCH = 30, - SVGA3D_PRIMITIVE_21_CONTROL_POINT_PATCH = 31, - SVGA3D_PRIMITIVE_22_CONTROL_POINT_PATCH = 32, - SVGA3D_PRIMITIVE_23_CONTROL_POINT_PATCH = 33, - SVGA3D_PRIMITIVE_24_CONTROL_POINT_PATCH = 34, - SVGA3D_PRIMITIVE_25_CONTROL_POINT_PATCH = 35, - SVGA3D_PRIMITIVE_26_CONTROL_POINT_PATCH = 36, - SVGA3D_PRIMITIVE_27_CONTROL_POINT_PATCH = 37, - SVGA3D_PRIMITIVE_28_CONTROL_POINT_PATCH = 38, - SVGA3D_PRIMITIVE_29_CONTROL_POINT_PATCH = 39, - SVGA3D_PRIMITIVE_30_CONTROL_POINT_PATCH = 40, - SVGA3D_PRIMITIVE_31_CONTROL_POINT_PATCH = 41, - SVGA3D_PRIMITIVE_32_CONTROL_POINT_PATCH = 42, - SVGA3D_PRIMITIVE_MAX = 43 + + SVGA3D_PRIMITIVE_INVALID = 0, + SVGA3D_PRIMITIVE_MIN = 0, + SVGA3D_PRIMITIVE_TRIANGLELIST = 1, + SVGA3D_PRIMITIVE_POINTLIST = 2, + SVGA3D_PRIMITIVE_LINELIST = 3, + SVGA3D_PRIMITIVE_LINESTRIP = 4, + SVGA3D_PRIMITIVE_TRIANGLESTRIP = 5, + SVGA3D_PRIMITIVE_TRIANGLEFAN = 6, + SVGA3D_PRIMITIVE_LINELIST_ADJ = 7, + SVGA3D_PRIMITIVE_PREDX_MAX = 7, + SVGA3D_PRIMITIVE_LINESTRIP_ADJ = 8, + SVGA3D_PRIMITIVE_TRIANGLELIST_ADJ = 9, + SVGA3D_PRIMITIVE_TRIANGLESTRIP_ADJ = 10, + SVGA3D_PRIMITIVE_DX10_MAX = 11, + SVGA3D_PRIMITIVE_1_CONTROL_POINT_PATCH = 11, + SVGA3D_PRIMITIVE_2_CONTROL_POINT_PATCH = 12, + SVGA3D_PRIMITIVE_3_CONTROL_POINT_PATCH = 13, + SVGA3D_PRIMITIVE_4_CONTROL_POINT_PATCH = 14, + SVGA3D_PRIMITIVE_5_CONTROL_POINT_PATCH = 15, + SVGA3D_PRIMITIVE_6_CONTROL_POINT_PATCH = 16, + SVGA3D_PRIMITIVE_7_CONTROL_POINT_PATCH = 17, + SVGA3D_PRIMITIVE_8_CONTROL_POINT_PATCH = 18, + SVGA3D_PRIMITIVE_9_CONTROL_POINT_PATCH = 19, + SVGA3D_PRIMITIVE_10_CONTROL_POINT_PATCH = 20, + SVGA3D_PRIMITIVE_11_CONTROL_POINT_PATCH = 21, + SVGA3D_PRIMITIVE_12_CONTROL_POINT_PATCH = 22, + SVGA3D_PRIMITIVE_13_CONTROL_POINT_PATCH = 23, + SVGA3D_PRIMITIVE_14_CONTROL_POINT_PATCH = 24, + SVGA3D_PRIMITIVE_15_CONTROL_POINT_PATCH = 25, + SVGA3D_PRIMITIVE_16_CONTROL_POINT_PATCH = 26, + SVGA3D_PRIMITIVE_17_CONTROL_POINT_PATCH = 27, + SVGA3D_PRIMITIVE_18_CONTROL_POINT_PATCH = 28, + SVGA3D_PRIMITIVE_19_CONTROL_POINT_PATCH = 29, + SVGA3D_PRIMITIVE_20_CONTROL_POINT_PATCH = 30, + SVGA3D_PRIMITIVE_21_CONTROL_POINT_PATCH = 31, + SVGA3D_PRIMITIVE_22_CONTROL_POINT_PATCH = 32, + SVGA3D_PRIMITIVE_23_CONTROL_POINT_PATCH = 33, + SVGA3D_PRIMITIVE_24_CONTROL_POINT_PATCH = 34, + SVGA3D_PRIMITIVE_25_CONTROL_POINT_PATCH = 35, + SVGA3D_PRIMITIVE_26_CONTROL_POINT_PATCH = 36, + SVGA3D_PRIMITIVE_27_CONTROL_POINT_PATCH = 37, + SVGA3D_PRIMITIVE_28_CONTROL_POINT_PATCH = 38, + SVGA3D_PRIMITIVE_29_CONTROL_POINT_PATCH = 39, + SVGA3D_PRIMITIVE_30_CONTROL_POINT_PATCH = 40, + SVGA3D_PRIMITIVE_31_CONTROL_POINT_PATCH = 41, + SVGA3D_PRIMITIVE_32_CONTROL_POINT_PATCH = 42, + SVGA3D_PRIMITIVE_MAX = 43 } SVGA3dPrimitiveType; typedef enum { - SVGA3D_COORDINATE_INVALID = 0, - SVGA3D_COORDINATE_LEFTHANDED = 1, - SVGA3D_COORDINATE_RIGHTHANDED = 2, - SVGA3D_COORDINATE_MAX + SVGA3D_COORDINATE_INVALID = 0, + SVGA3D_COORDINATE_LEFTHANDED = 1, + SVGA3D_COORDINATE_RIGHTHANDED = 2, + SVGA3D_COORDINATE_MAX } SVGA3dCoordinateType; typedef enum { - SVGA3D_TRANSFORM_INVALID = 0, - SVGA3D_TRANSFORM_WORLD = 1, - SVGA3D_TRANSFORM_MIN = 1, - SVGA3D_TRANSFORM_VIEW = 2, - SVGA3D_TRANSFORM_PROJECTION = 3, - SVGA3D_TRANSFORM_TEXTURE0 = 4, - SVGA3D_TRANSFORM_TEXTURE1 = 5, - SVGA3D_TRANSFORM_TEXTURE2 = 6, - SVGA3D_TRANSFORM_TEXTURE3 = 7, - SVGA3D_TRANSFORM_TEXTURE4 = 8, - SVGA3D_TRANSFORM_TEXTURE5 = 9, - SVGA3D_TRANSFORM_TEXTURE6 = 10, - SVGA3D_TRANSFORM_TEXTURE7 = 11, - SVGA3D_TRANSFORM_WORLD1 = 12, - SVGA3D_TRANSFORM_WORLD2 = 13, - SVGA3D_TRANSFORM_WORLD3 = 14, - SVGA3D_TRANSFORM_MAX + SVGA3D_TRANSFORM_INVALID = 0, + SVGA3D_TRANSFORM_WORLD = 1, + SVGA3D_TRANSFORM_MIN = 1, + SVGA3D_TRANSFORM_VIEW = 2, + SVGA3D_TRANSFORM_PROJECTION = 3, + SVGA3D_TRANSFORM_TEXTURE0 = 4, + SVGA3D_TRANSFORM_TEXTURE1 = 5, + SVGA3D_TRANSFORM_TEXTURE2 = 6, + SVGA3D_TRANSFORM_TEXTURE3 = 7, + SVGA3D_TRANSFORM_TEXTURE4 = 8, + SVGA3D_TRANSFORM_TEXTURE5 = 9, + SVGA3D_TRANSFORM_TEXTURE6 = 10, + SVGA3D_TRANSFORM_TEXTURE7 = 11, + SVGA3D_TRANSFORM_WORLD1 = 12, + SVGA3D_TRANSFORM_WORLD2 = 13, + SVGA3D_TRANSFORM_WORLD3 = 14, + SVGA3D_TRANSFORM_MAX } SVGA3dTransformType; typedef enum { - SVGA3D_LIGHTTYPE_INVALID = 0, - SVGA3D_LIGHTTYPE_MIN = 1, - SVGA3D_LIGHTTYPE_POINT = 1, - SVGA3D_LIGHTTYPE_SPOT1 = 2, /* 1-cone, in degrees */ - SVGA3D_LIGHTTYPE_SPOT2 = 3, /* 2-cone, in radians */ - SVGA3D_LIGHTTYPE_DIRECTIONAL = 4, - SVGA3D_LIGHTTYPE_MAX + SVGA3D_LIGHTTYPE_INVALID = 0, + SVGA3D_LIGHTTYPE_MIN = 1, + SVGA3D_LIGHTTYPE_POINT = 1, + SVGA3D_LIGHTTYPE_SPOT1 = 2, + SVGA3D_LIGHTTYPE_SPOT2 = 3, + SVGA3D_LIGHTTYPE_DIRECTIONAL = 4, + SVGA3D_LIGHTTYPE_MAX } SVGA3dLightType; typedef enum { - SVGA3D_CUBEFACE_POSX = 0, - SVGA3D_CUBEFACE_NEGX = 1, - SVGA3D_CUBEFACE_POSY = 2, - SVGA3D_CUBEFACE_NEGY = 3, - SVGA3D_CUBEFACE_POSZ = 4, - SVGA3D_CUBEFACE_NEGZ = 5, + SVGA3D_CUBEFACE_POSX = 0, + SVGA3D_CUBEFACE_NEGX = 1, + SVGA3D_CUBEFACE_POSY = 2, + SVGA3D_CUBEFACE_NEGY = 3, + SVGA3D_CUBEFACE_POSZ = 4, + SVGA3D_CUBEFACE_NEGZ = 5, } SVGA3dCubeFace; typedef enum { - SVGA3D_SHADERTYPE_INVALID = 0, - SVGA3D_SHADERTYPE_MIN = 1, - SVGA3D_SHADERTYPE_VS = 1, - SVGA3D_SHADERTYPE_PS = 2, - SVGA3D_SHADERTYPE_PREDX_MAX = 3, - SVGA3D_SHADERTYPE_GS = 3, - SVGA3D_SHADERTYPE_DX10_MAX = 4, - SVGA3D_SHADERTYPE_HS = 4, - SVGA3D_SHADERTYPE_DS = 5, - SVGA3D_SHADERTYPE_CS = 6, - SVGA3D_SHADERTYPE_MAX = 7 + SVGA3D_SHADERTYPE_INVALID = 0, + SVGA3D_SHADERTYPE_MIN = 1, + SVGA3D_SHADERTYPE_VS = 1, + SVGA3D_SHADERTYPE_PS = 2, + SVGA3D_SHADERTYPE_PREDX_MAX = 3, + SVGA3D_SHADERTYPE_GS = 3, + SVGA3D_SHADERTYPE_DX10_MAX = 4, + SVGA3D_SHADERTYPE_HS = 4, + SVGA3D_SHADERTYPE_DS = 5, + SVGA3D_SHADERTYPE_CS = 6, + SVGA3D_SHADERTYPE_MAX = 7 } SVGA3dShaderType; -#define SVGA3D_NUM_SHADERTYPE_PREDX \ - (SVGA3D_SHADERTYPE_PREDX_MAX - SVGA3D_SHADERTYPE_MIN) +#define SVGA3D_NUM_SHADERTYPE_PREDX \ + (SVGA3D_SHADERTYPE_PREDX_MAX - SVGA3D_SHADERTYPE_MIN) -#define SVGA3D_NUM_SHADERTYPE_DX10 \ - (SVGA3D_SHADERTYPE_DX10_MAX - SVGA3D_SHADERTYPE_MIN) +#define SVGA3D_NUM_SHADERTYPE_DX10 \ + (SVGA3D_SHADERTYPE_DX10_MAX - SVGA3D_SHADERTYPE_MIN) -#define SVGA3D_NUM_SHADERTYPE \ - (SVGA3D_SHADERTYPE_MAX - SVGA3D_SHADERTYPE_MIN) +#define SVGA3D_NUM_SHADERTYPE (SVGA3D_SHADERTYPE_MAX - SVGA3D_SHADERTYPE_MIN) typedef enum { - SVGA3D_CONST_TYPE_MIN = 0, - SVGA3D_CONST_TYPE_FLOAT = 0, - SVGA3D_CONST_TYPE_INT = 1, - SVGA3D_CONST_TYPE_BOOL = 2, - SVGA3D_CONST_TYPE_MAX = 3, + SVGA3D_CONST_TYPE_MIN = 0, + SVGA3D_CONST_TYPE_FLOAT = 0, + SVGA3D_CONST_TYPE_INT = 1, + SVGA3D_CONST_TYPE_BOOL = 2, + SVGA3D_CONST_TYPE_MAX = 3, } SVGA3dShaderConstType; -/* - * Register limits for shader consts. - */ -#define SVGA3D_CONSTREG_MAX 256 -#define SVGA3D_CONSTINTREG_MAX 16 -#define SVGA3D_CONSTBOOLREG_MAX 16 +#define SVGA3D_CONSTREG_MAX 256 +#define SVGA3D_CONSTINTREG_MAX 16 +#define SVGA3D_CONSTBOOLREG_MAX 16 typedef enum { - SVGA3D_STRETCH_BLT_POINT = 0, - SVGA3D_STRETCH_BLT_LINEAR = 1, - SVGA3D_STRETCH_BLT_MAX + SVGA3D_STRETCH_BLT_POINT = 0, + SVGA3D_STRETCH_BLT_LINEAR = 1, + SVGA3D_STRETCH_BLT_MAX } SVGA3dStretchBltMode; typedef enum { - SVGA3D_QUERYTYPE_INVALID = ((uint8)-1), - SVGA3D_QUERYTYPE_MIN = 0, - SVGA3D_QUERYTYPE_OCCLUSION = 0, - SVGA3D_QUERYTYPE_TIMESTAMP = 1, - SVGA3D_QUERYTYPE_TIMESTAMPDISJOINT = 2, - SVGA3D_QUERYTYPE_PIPELINESTATS = 3, - SVGA3D_QUERYTYPE_OCCLUSIONPREDICATE = 4, - SVGA3D_QUERYTYPE_STREAMOUTPUTSTATS = 5, - SVGA3D_QUERYTYPE_STREAMOVERFLOWPREDICATE = 6, - SVGA3D_QUERYTYPE_OCCLUSION64 = 7, - SVGA3D_QUERYTYPE_DX10_MAX = 8, - SVGA3D_QUERYTYPE_SOSTATS_STREAM0 = 8, - SVGA3D_QUERYTYPE_SOSTATS_STREAM1 = 9, - SVGA3D_QUERYTYPE_SOSTATS_STREAM2 = 10, - SVGA3D_QUERYTYPE_SOSTATS_STREAM3 = 11, - SVGA3D_QUERYTYPE_SOP_STREAM0 = 12, - SVGA3D_QUERYTYPE_SOP_STREAM1 = 13, - SVGA3D_QUERYTYPE_SOP_STREAM2 = 14, - SVGA3D_QUERYTYPE_SOP_STREAM3 = 15, - SVGA3D_QUERYTYPE_MAX + SVGA3D_QUERYTYPE_INVALID = ((uint8)-1), + SVGA3D_QUERYTYPE_MIN = 0, + SVGA3D_QUERYTYPE_OCCLUSION = 0, + SVGA3D_QUERYTYPE_TIMESTAMP = 1, + SVGA3D_QUERYTYPE_TIMESTAMPDISJOINT = 2, + SVGA3D_QUERYTYPE_PIPELINESTATS = 3, + SVGA3D_QUERYTYPE_OCCLUSIONPREDICATE = 4, + SVGA3D_QUERYTYPE_STREAMOUTPUTSTATS = 5, + SVGA3D_QUERYTYPE_STREAMOVERFLOWPREDICATE = 6, + SVGA3D_QUERYTYPE_OCCLUSION64 = 7, + SVGA3D_QUERYTYPE_DX10_MAX = 8, + SVGA3D_QUERYTYPE_SOSTATS_STREAM0 = 8, + SVGA3D_QUERYTYPE_SOSTATS_STREAM1 = 9, + SVGA3D_QUERYTYPE_SOSTATS_STREAM2 = 10, + SVGA3D_QUERYTYPE_SOSTATS_STREAM3 = 11, + SVGA3D_QUERYTYPE_SOP_STREAM0 = 12, + SVGA3D_QUERYTYPE_SOP_STREAM1 = 13, + SVGA3D_QUERYTYPE_SOP_STREAM2 = 14, + SVGA3D_QUERYTYPE_SOP_STREAM3 = 15, + SVGA3D_QUERYTYPE_MAX } SVGA3dQueryType; typedef uint8 SVGA3dQueryTypeUint8; -#define SVGA3D_NUM_QUERYTYPE (SVGA3D_QUERYTYPE_MAX - SVGA3D_QUERYTYPE_MIN) +#define SVGA3D_NUM_QUERYTYPE (SVGA3D_QUERYTYPE_MAX - SVGA3D_QUERYTYPE_MIN) -/* - * This is the maximum number of queries per context that can be active - * simultaneously between a beginQuery and endQuery. - */ #define SVGA3D_MAX_QUERY 64 -/* - * Query result buffer formats - */ -typedef -#include "vmware_pack_begin.h" -struct { - uint32 samplesRendered; -} -#include "vmware_pack_end.h" -SVGADXOcclusionQueryResult; - -typedef -#include "vmware_pack_begin.h" -struct { - uint32 passed; -} -#include "vmware_pack_end.h" -SVGADXEventQueryResult; - -typedef -#include "vmware_pack_begin.h" -struct { - uint64 timestamp; -} -#include "vmware_pack_end.h" -SVGADXTimestampQueryResult; - -typedef -#include "vmware_pack_begin.h" -struct { - uint64 realFrequency; - uint32 disjoint; -} -#include "vmware_pack_end.h" -SVGADXTimestampDisjointQueryResult; - -typedef -#include "vmware_pack_begin.h" -struct { - uint64 inputAssemblyVertices; - uint64 inputAssemblyPrimitives; - uint64 vertexShaderInvocations; - uint64 geometryShaderInvocations; - uint64 geometryShaderPrimitives; - uint64 clipperInvocations; - uint64 clipperPrimitives; - uint64 pixelShaderInvocations; - uint64 hullShaderInvocations; - uint64 domainShaderInvocations; - uint64 computeShaderInvocations; -} -#include "vmware_pack_end.h" -SVGADXPipelineStatisticsQueryResult; - -typedef -#include "vmware_pack_begin.h" -struct { - uint32 anySamplesRendered; -} -#include "vmware_pack_end.h" -SVGADXOcclusionPredicateQueryResult; - -typedef -#include "vmware_pack_begin.h" -struct { - uint64 numPrimitivesWritten; - uint64 numPrimitivesRequired; -} -#include "vmware_pack_end.h" -SVGADXStreamOutStatisticsQueryResult; - -typedef -#include "vmware_pack_begin.h" -struct { - uint32 overflowed; -} -#include "vmware_pack_end.h" -SVGADXStreamOutPredicateQueryResult; - -typedef -#include "vmware_pack_begin.h" -struct { - uint64 samplesRendered; -} -#include "vmware_pack_end.h" -SVGADXOcclusion64QueryResult; - -/* - * SVGADXQueryResultUnion is not intended for use in the protocol, but is - * very helpful when working with queries generically. - */ -typedef -#include "vmware_pack_begin.h" -union SVGADXQueryResultUnion { - SVGADXOcclusionQueryResult occ; - SVGADXEventQueryResult event; - SVGADXTimestampQueryResult ts; - SVGADXTimestampDisjointQueryResult tsDisjoint; - SVGADXPipelineStatisticsQueryResult pipelineStats; - SVGADXOcclusionPredicateQueryResult occPred; - SVGADXStreamOutStatisticsQueryResult soStats; - SVGADXStreamOutPredicateQueryResult soPred; - SVGADXOcclusion64QueryResult occ64; -} -#include "vmware_pack_end.h" -SVGADXQueryResultUnion; +#pragma pack(push, 1) +typedef struct { + uint32 samplesRendered; +} SVGADXOcclusionQueryResult; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct { + uint32 passed; +} SVGADXEventQueryResult; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct { + uint64 timestamp; +} SVGADXTimestampQueryResult; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct { + uint64 realFrequency; + uint32 disjoint; +} SVGADXTimestampDisjointQueryResult; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct { + uint64 inputAssemblyVertices; + uint64 inputAssemblyPrimitives; + uint64 vertexShaderInvocations; + uint64 geometryShaderInvocations; + uint64 geometryShaderPrimitives; + uint64 clipperInvocations; + uint64 clipperPrimitives; + uint64 pixelShaderInvocations; + uint64 hullShaderInvocations; + uint64 domainShaderInvocations; + uint64 computeShaderInvocations; +} SVGADXPipelineStatisticsQueryResult; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct { + uint32 anySamplesRendered; +} SVGADXOcclusionPredicateQueryResult; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct { + uint64 numPrimitivesWritten; + uint64 numPrimitivesRequired; +} SVGADXStreamOutStatisticsQueryResult; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct { + uint32 overflowed; +} SVGADXStreamOutPredicateQueryResult; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct { + uint64 samplesRendered; +} SVGADXOcclusion64QueryResult; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef union SVGADXQueryResultUnion { + SVGADXOcclusionQueryResult occ; + SVGADXEventQueryResult event; + SVGADXTimestampQueryResult ts; + SVGADXTimestampDisjointQueryResult tsDisjoint; + SVGADXPipelineStatisticsQueryResult pipelineStats; + SVGADXOcclusionPredicateQueryResult occPred; + SVGADXStreamOutStatisticsQueryResult soStats; + SVGADXStreamOutPredicateQueryResult soPred; + SVGADXOcclusion64QueryResult occ64; +} SVGADXQueryResultUnion; +#pragma pack(pop) typedef enum { - SVGA3D_QUERYSTATE_PENDING = 0, /* Query is not finished yet */ - SVGA3D_QUERYSTATE_SUCCEEDED = 1, /* Completed successfully */ - SVGA3D_QUERYSTATE_FAILED = 2, /* Completed unsuccessfully */ - SVGA3D_QUERYSTATE_NEW = 3, /* Never submitted (guest only) */ + SVGA3D_QUERYSTATE_PENDING = 0, + SVGA3D_QUERYSTATE_SUCCEEDED = 1, + SVGA3D_QUERYSTATE_FAILED = 2, + SVGA3D_QUERYSTATE_NEW = 3, } SVGA3dQueryState; typedef enum { - SVGA3D_WRITE_HOST_VRAM = 1, - SVGA3D_READ_HOST_VRAM = 2, + SVGA3D_WRITE_HOST_VRAM = 1, + SVGA3D_READ_HOST_VRAM = 2, } SVGA3dTransferType; -#define SVGA3D_LOGICOP_INVALID 0 -#define SVGA3D_LOGICOP_MIN 1 -#define SVGA3D_LOGICOP_COPY 1 -#define SVGA3D_LOGICOP_NOT 2 -#define SVGA3D_LOGICOP_AND 3 -#define SVGA3D_LOGICOP_OR 4 -#define SVGA3D_LOGICOP_XOR 5 -#define SVGA3D_LOGICOP_NXOR 6 -#define SVGA3D_LOGICOP_ROP3 7 -#define SVGA3D_LOGICOP_MAX 8 +#define SVGA3D_LOGICOP_INVALID 0 +#define SVGA3D_LOGICOP_COPY 1 + +#define SVGA3D_LOGICOP_MIN 1 +#define SVGA3D_LOGICOP_NOT 2 +#define SVGA3D_LOGICOP_AND 3 +#define SVGA3D_LOGICOP_OR 4 +#define SVGA3D_LOGICOP_XOR 5 +#define SVGA3D_LOGICOP_NXOR 6 +#define SVGA3D_LOGICOP_ROP3 7 + +#define SVGA3D_LOGICOP_MAX 8 typedef uint16 SVGA3dLogicOp; -#define SVGA3D_LOGICOP_ROP3_INVALID ((uint16) -1) -#define SVGA3D_LOGICOP_ROP3_MIN 0 -#define SVGA3D_LOGICOP_ROP3_MAX 256 +#define SVGA3D_LOGICOP_ROP3_INVALID ((uint16)-1) +#define SVGA3D_LOGICOP_ROP3_MIN 0 +#define SVGA3D_LOGICOP_ROP3_MAX 256 typedef uint16 SVGA3dLogicOpRop3; -typedef -#include "vmware_pack_begin.h" -struct { - union { - struct { - uint16 function; // SVGA3dFogFunction - uint8 type; // SVGA3dFogType - uint8 base; // SVGA3dFogBase - }; - uint32 uintValue; - }; -} -#include "vmware_pack_end.h" -SVGA3dFogMode; - -/* - * Uniquely identify one image (a 1D/2D/3D array) from a surface. This - * is a surface ID as well as face/mipmap indices. - */ -typedef -#include "vmware_pack_begin.h" -struct SVGA3dSurfaceImageId { - uint32 sid; - uint32 face; - uint32 mipmap; -} -#include "vmware_pack_end.h" -SVGA3dSurfaceImageId; - -typedef -#include "vmware_pack_begin.h" -struct SVGA3dSubSurfaceId { - uint32 sid; - uint32 subResourceId; -} -#include "vmware_pack_end.h" -SVGA3dSubSurfaceId; - -typedef -#include "vmware_pack_begin.h" -struct { - uint32 width; - uint32 height; - uint32 depth; -} -#include "vmware_pack_end.h" -SVGA3dSize; +#pragma pack(push, 1) +typedef struct { + union { + struct { + uint16 function; + uint8 type; + uint8 base; + }; + uint32 uintValue; + }; +} SVGA3dFogMode; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct SVGA3dSurfaceImageId { + uint32 sid; + uint32 face; + uint32 mipmap; +} SVGA3dSurfaceImageId; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct SVGA3dSubSurfaceId { + uint32 sid; + uint32 subResourceId; +} SVGA3dSubSurfaceId; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct { + uint32 width; + uint32 height; + uint32 depth; +} SVGA3dSize; +#pragma pack(pop) -/* - * Guest-backed objects definitions. - */ typedef enum { - SVGA_OTABLE_MOB = 0, - SVGA_OTABLE_MIN = 0, - SVGA_OTABLE_SURFACE = 1, - SVGA_OTABLE_CONTEXT = 2, - SVGA_OTABLE_SHADER = 3, - SVGA_OTABLE_SCREENTARGET = 4, - - SVGA_OTABLE_DX9_MAX = 5, - - SVGA_OTABLE_DXCONTEXT = 5, - SVGA_OTABLE_DX_MAX = 6, - - SVGA_OTABLE_RESERVED1 = 6, - SVGA_OTABLE_RESERVED2 = 7, - - /* - * Additions to this table need to be tied to HW-version features and - * checkpointed accordingly. - */ - SVGA_OTABLE_DEVEL_MAX = 8, - SVGA_OTABLE_MAX = 8 + SVGA_OTABLE_MOB = 0, + SVGA_OTABLE_MIN = 0, + SVGA_OTABLE_SURFACE = 1, + SVGA_OTABLE_CONTEXT = 2, + SVGA_OTABLE_SHADER = 3, + SVGA_OTABLE_SCREENTARGET = 4, + + SVGA_OTABLE_DX9_MAX = 5, + + SVGA_OTABLE_DXCONTEXT = 5, + SVGA_OTABLE_DX_MAX = 6, + + SVGA_OTABLE_DEVEL_MAX = 6, + SVGA_OTABLE_MAX = 6, + + SVGA_OTABLE_RESERVED1 = 6, + SVGA_OTABLE_RESERVED2 = 7, + SVGA_OTABLE_BUG_1952836_MAX = 8, } SVGAOTableType; typedef enum { - SVGA_COTABLE_MIN = 0, - SVGA_COTABLE_RTVIEW = 0, - SVGA_COTABLE_DSVIEW = 1, - SVGA_COTABLE_SRVIEW = 2, - SVGA_COTABLE_ELEMENTLAYOUT = 3, - SVGA_COTABLE_BLENDSTATE = 4, - SVGA_COTABLE_DEPTHSTENCIL = 5, - SVGA_COTABLE_RASTERIZERSTATE = 6, - SVGA_COTABLE_SAMPLER = 7, - SVGA_COTABLE_STREAMOUTPUT = 8, - SVGA_COTABLE_DXQUERY = 9, - SVGA_COTABLE_DXSHADER = 10, - SVGA_COTABLE_DX10_MAX = 11, - SVGA_COTABLE_UAVIEW = 11, - SVGA_COTABLE_MAX = 12, + SVGA_COTABLE_MIN = 0, + SVGA_COTABLE_RTVIEW = 0, + SVGA_COTABLE_DSVIEW = 1, + SVGA_COTABLE_SRVIEW = 2, + SVGA_COTABLE_ELEMENTLAYOUT = 3, + SVGA_COTABLE_BLENDSTATE = 4, + SVGA_COTABLE_DEPTHSTENCIL = 5, + SVGA_COTABLE_RASTERIZERSTATE = 6, + SVGA_COTABLE_SAMPLER = 7, + SVGA_COTABLE_STREAMOUTPUT = 8, + SVGA_COTABLE_DXQUERY = 9, + SVGA_COTABLE_DXSHADER = 10, + SVGA_COTABLE_DX10_MAX = 11, + SVGA_COTABLE_UAVIEW = 11, + SVGA_COTABLE_MAX = 12, } SVGACOTableType; -/* - * The largest size (number of entries) allowed in a COTable. - */ #define SVGA_COTABLE_MAX_IDS (MAX_UINT16 - 2) typedef enum SVGAMobFormat { - SVGA3D_MOBFMT_INVALID = SVGA3D_INVALID_ID, - SVGA3D_MOBFMT_PTDEPTH_0 = 0, - SVGA3D_MOBFMT_MIN = 0, - SVGA3D_MOBFMT_PTDEPTH_1 = 1, - SVGA3D_MOBFMT_PTDEPTH_2 = 2, - SVGA3D_MOBFMT_RANGE = 3, - SVGA3D_MOBFMT_PTDEPTH64_0 = 4, - SVGA3D_MOBFMT_PTDEPTH64_1 = 5, - SVGA3D_MOBFMT_PTDEPTH64_2 = 6, - SVGA3D_MOBFMT_PREDX_MAX = 7, - SVGA3D_MOBFMT_EMPTY = 7, - SVGA3D_MOBFMT_MAX, - - /* - * This isn't actually used by the guest, but is a mob-format used - * internally by the SVGA device (and is therefore not binary compatible). - */ - SVGA3D_MOBFMT_HB, + SVGA3D_MOBFMT_INVALID = SVGA3D_INVALID_ID, + SVGA3D_MOBFMT_PT_0 = 0, + SVGA3D_MOBFMT_MIN = 0, + SVGA3D_MOBFMT_PT_1 = 1, + SVGA3D_MOBFMT_PT_2 = 2, + SVGA3D_MOBFMT_RANGE = 3, + SVGA3D_MOBFMT_PT64_0 = 4, + SVGA3D_MOBFMT_PT64_1 = 5, + SVGA3D_MOBFMT_PT64_2 = 6, + SVGA3D_MOBFMT_PREDX_MAX = 7, + SVGA3D_MOBFMT_EMPTY = 7, + + SVGA3D_MOBFMT_MAX, + + SVGA3D_MOBFMT_HB, } SVGAMobFormat; #define SVGA3D_MOB_EMPTY_BASE 1 -/* - * Multisample pattern types. - */ - typedef enum SVGA3dMSPattern { - SVGA3D_MS_PATTERN_NONE = 0, - SVGA3D_MS_PATTERN_MIN = 0, - SVGA3D_MS_PATTERN_STANDARD = 1, - SVGA3D_MS_PATTERN_CENTER = 2, - SVGA3D_MS_PATTERN_MAX = 3, + SVGA3D_MS_PATTERN_NONE = 0, + SVGA3D_MS_PATTERN_MIN = 0, + SVGA3D_MS_PATTERN_STANDARD = 1, + SVGA3D_MS_PATTERN_CENTER = 2, + SVGA3D_MS_PATTERN_MAX = 3, } SVGA3dMSPattern; -/* - * Precision settings for each sample. - */ - typedef enum SVGA3dMSQualityLevel { - SVGA3D_MS_QUALITY_NONE = 0, - SVGA3D_MS_QUALITY_MIN = 0, - SVGA3D_MS_QUALITY_FULL = 1, - SVGA3D_MS_QUALITY_MAX = 2, + SVGA3D_MS_QUALITY_NONE = 0, + SVGA3D_MS_QUALITY_MIN = 0, + SVGA3D_MS_QUALITY_FULL = 1, + SVGA3D_MS_QUALITY_RESOLVED = 2, + SVGA3D_MS_QUALITY_MAX = 3, } SVGA3dMSQualityLevel; -/* - * Screen Target Update Flags - */ - typedef enum SVGA3dFrameUpdateType { - SVGA3D_FRAME_END = 0, - SVGA3D_FRAME_PARTIAL = 1, - SVGA3D_FRAME_UNKNOWN = 2, - SVGA3D_FRAME_MAX = 3, + SVGA3D_FRAME_END = 0, + SVGA3D_FRAME_MIN = 0, + SVGA3D_FRAME_PARTIAL = 1, + SVGA3D_FRAME_UNKNOWN = 2, + SVGA3D_FRAME_MAX = 3, } SVGA3dFrameUpdateType; -#endif /* _SVGA3D_TYPES_H_ */ +#endif diff --git a/drivers/gpu/drm/vmwgfx/device_include/svga_escape.h b/drivers/gpu/drm/vmwgfx/device_include/svga_escape.h index acb41e28e46f..bf242c21f352 100644 --- a/drivers/gpu/drm/vmwgfx/device_include/svga_escape.h +++ b/drivers/gpu/drm/vmwgfx/device_include/svga_escape.h @@ -1,6 +1,6 @@ -/* SPDX-License-Identifier: GPL-2.0 OR MIT */ /********************************************************** - * Copyright 2007-2015 VMware, Inc. + * Copyright 2007,2020 VMware, Inc. + * SPDX-License-Identifier: GPL-2.0 OR MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation @@ -30,61 +30,27 @@ * Definitions for our own (vendor-specific) SVGA Escape commands. */ -#ifndef _SVGA_ESCAPE_H_ -#define _SVGA_ESCAPE_H_ -/* - * Namespace IDs for the escape command - */ +#ifndef _SVGA_ESCAPE_H_ +#define _SVGA_ESCAPE_H_ #define SVGA_ESCAPE_NSID_VMWARE 0x00000000 -#define SVGA_ESCAPE_NSID_DEVEL 0xFFFFFFFF - +#define SVGA_ESCAPE_NSID_DEVEL 0xFFFFFFFF -/* - * Within SVGA_ESCAPE_NSID_VMWARE, we multiplex commands according to - * the first DWORD of escape data (after the nsID and size). As a - * guideline we're using the high word and low word as a major and - * minor command number, respectively. - * - * Major command number allocation: - * - * 0000: Reserved - * 0001: SVGA_ESCAPE_VMWARE_LOG (svga_binary_logger.h) - * 0002: SVGA_ESCAPE_VMWARE_VIDEO (svga_overlay.h) - * 0003: SVGA_ESCAPE_VMWARE_HINT (svga_escape.h) - */ - -#define SVGA_ESCAPE_VMWARE_MAJOR_MASK 0xFFFF0000 - - -/* - * SVGA Hint commands. - * - * These escapes let the SVGA driver provide optional information to - * he host about the state of the guest or guest applications. The - * host can use these hints to make user interface or performance - * decisions. - * - * Notes: - * - * - SVGA_ESCAPE_VMWARE_HINT_FULLSCREEN is deprecated for guests - * that use the SVGA Screen Object extension. Instead of sending - * this escape, use the SVGA_SCREEN_FULLSCREEN_HINT flag on your - * Screen Object. - */ +#define SVGA_ESCAPE_VMWARE_MAJOR_MASK 0xFFFF0000 -#define SVGA_ESCAPE_VMWARE_HINT 0x00030000 -#define SVGA_ESCAPE_VMWARE_HINT_FULLSCREEN 0x00030001 /* Deprecated */ +#define SVGA_ESCAPE_VMWARE_HINT 0x00030000 +#define SVGA_ESCAPE_VMWARE_HINT_FULLSCREEN 0x00030001 -typedef -struct { - uint32 command; - uint32 fullscreen; - struct { - int32 x, y; - } monitorPosition; +#pragma pack(push, 1) +typedef struct { + uint32 command; + uint32 fullscreen; + struct { + int32 x, y; + } monitorPosition; } SVGAEscapeHintFullscreen; +#pragma pack(pop) -#endif /* _SVGA_ESCAPE_H_ */ +#endif diff --git a/drivers/gpu/drm/vmwgfx/device_include/svga_overlay.h b/drivers/gpu/drm/vmwgfx/device_include/svga_overlay.h index e5385146e7fc..aec17c3c6c29 100644 --- a/drivers/gpu/drm/vmwgfx/device_include/svga_overlay.h +++ b/drivers/gpu/drm/vmwgfx/device_include/svga_overlay.h @@ -1,6 +1,6 @@ -/* SPDX-License-Identifier: GPL-2.0 OR MIT */ /********************************************************** - * Copyright 2007-2015 VMware, Inc. + * Copyright 2007-2021 VMware, Inc. + * SPDX-License-Identifier: GPL-2.0 OR MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation @@ -30,171 +30,88 @@ * Definitions for video-overlay support. */ + + #ifndef _SVGA_OVERLAY_H_ #define _SVGA_OVERLAY_H_ #include "svga_reg.h" -/* - * Video formats we support - */ +#if defined __cplusplus +extern "C" { +#endif -#define VMWARE_FOURCC_YV12 0x32315659 /* 'Y' 'V' '1' '2' */ -#define VMWARE_FOURCC_YUY2 0x32595559 /* 'Y' 'U' 'Y' '2' */ -#define VMWARE_FOURCC_UYVY 0x59565955 /* 'U' 'Y' 'V' 'Y' */ +#define VMWARE_FOURCC_YV12 0x32315659 +#define VMWARE_FOURCC_YUY2 0x32595559 +#define VMWARE_FOURCC_UYVY 0x59565955 typedef enum { - SVGA_OVERLAY_FORMAT_INVALID = 0, - SVGA_OVERLAY_FORMAT_YV12 = VMWARE_FOURCC_YV12, - SVGA_OVERLAY_FORMAT_YUY2 = VMWARE_FOURCC_YUY2, - SVGA_OVERLAY_FORMAT_UYVY = VMWARE_FOURCC_UYVY, + SVGA_OVERLAY_FORMAT_INVALID = 0, + SVGA_OVERLAY_FORMAT_YV12 = VMWARE_FOURCC_YV12, + SVGA_OVERLAY_FORMAT_YUY2 = VMWARE_FOURCC_YUY2, + SVGA_OVERLAY_FORMAT_UYVY = VMWARE_FOURCC_UYVY, } SVGAOverlayFormat; -#define SVGA_VIDEO_COLORKEY_MASK 0x00ffffff +#define SVGA_VIDEO_COLORKEY_MASK 0x00ffffff -#define SVGA_ESCAPE_VMWARE_VIDEO 0x00020000 +#define SVGA_ESCAPE_VMWARE_VIDEO 0x00020000 -#define SVGA_ESCAPE_VMWARE_VIDEO_SET_REGS 0x00020001 - /* FIFO escape layout: - * Type, Stream Id, (Register Id, Value) pairs */ +#define SVGA_ESCAPE_VMWARE_VIDEO_SET_REGS 0x00020001 -#define SVGA_ESCAPE_VMWARE_VIDEO_FLUSH 0x00020002 - /* FIFO escape layout: - * Type, Stream Id */ +#define SVGA_ESCAPE_VMWARE_VIDEO_FLUSH 0x00020002 -typedef -struct SVGAEscapeVideoSetRegs { - struct { - uint32 cmdType; - uint32 streamId; - } header; +typedef struct SVGAEscapeVideoSetRegs { + struct { + uint32 cmdType; + uint32 streamId; + } header; - /* May include zero or more items. */ - struct { - uint32 registerId; - uint32 value; - } items[1]; + struct { + uint32 registerId; + uint32 value; + } items[1]; } SVGAEscapeVideoSetRegs; -typedef -struct SVGAEscapeVideoFlush { - uint32 cmdType; - uint32 streamId; +typedef struct SVGAEscapeVideoFlush { + uint32 cmdType; + uint32 streamId; } SVGAEscapeVideoFlush; - -/* - * Struct definitions for the video overlay commands built on - * SVGAFifoCmdEscape. - */ -typedef -struct { - uint32 command; - uint32 overlay; +#pragma pack(push, 1) +typedef struct { + uint32 command; + uint32 overlay; } SVGAFifoEscapeCmdVideoBase; +#pragma pack(pop) -typedef -struct { - SVGAFifoEscapeCmdVideoBase videoCmd; +#pragma pack(push, 1) +typedef struct { + SVGAFifoEscapeCmdVideoBase videoCmd; } SVGAFifoEscapeCmdVideoFlush; - -typedef -struct { - SVGAFifoEscapeCmdVideoBase videoCmd; - struct { - uint32 regId; - uint32 value; - } items[1]; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct { + SVGAFifoEscapeCmdVideoBase videoCmd; + struct { + uint32 regId; + uint32 value; + } items[1]; } SVGAFifoEscapeCmdVideoSetRegs; - -typedef -struct { - SVGAFifoEscapeCmdVideoBase videoCmd; - struct { - uint32 regId; - uint32 value; - } items[SVGA_VIDEO_NUM_REGS]; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct { + SVGAFifoEscapeCmdVideoBase videoCmd; + struct { + uint32 regId; + uint32 value; + } items[SVGA_VIDEO_NUM_REGS]; } SVGAFifoEscapeCmdVideoSetAllRegs; +#pragma pack(pop) - -/* - *---------------------------------------------------------------------- - * - * VMwareVideoGetAttributes -- - * - * Computes the size, pitches and offsets for YUV frames. - * - * Results: - * TRUE on success; otherwise FALSE on failure. - * - * Side effects: - * Pitches and offsets for the given YUV frame are put in 'pitches' - * and 'offsets' respectively. They are both optional though. - * - *---------------------------------------------------------------------- - */ - -static inline bool -VMwareVideoGetAttributes(const SVGAOverlayFormat format, /* IN */ - uint32 *width, /* IN / OUT */ - uint32 *height, /* IN / OUT */ - uint32 *size, /* OUT */ - uint32 *pitches, /* OUT (optional) */ - uint32 *offsets) /* OUT (optional) */ -{ - int tmp; - - *width = (*width + 1) & ~1; - - if (offsets) { - offsets[0] = 0; - } - - switch (format) { - case VMWARE_FOURCC_YV12: - *height = (*height + 1) & ~1; - *size = (*width) * (*height); - - if (pitches) { - pitches[0] = *width; - } - - if (offsets) { - offsets[1] = *size; - } - - tmp = *width >> 1; - - if (pitches) { - pitches[1] = pitches[2] = tmp; - } - - tmp *= (*height >> 1); - *size += tmp; - - if (offsets) { - offsets[2] = *size; - } - - *size += tmp; - break; - - case VMWARE_FOURCC_YUY2: - case VMWARE_FOURCC_UYVY: - *size = *width * 2; - - if (pitches) { - pitches[0] = *size; - } - - *size *= *height; - break; - - default: - return false; - } - - return true; +#if defined __cplusplus } +#endif -#endif /* _SVGA_OVERLAY_H_ */ +#endif diff --git a/drivers/gpu/drm/vmwgfx/device_include/svga_reg.h b/drivers/gpu/drm/vmwgfx/device_include/svga_reg.h index 193a57f6aae5..b3602557de2e 100644 --- a/drivers/gpu/drm/vmwgfx/device_include/svga_reg.h +++ b/drivers/gpu/drm/vmwgfx/device_include/svga_reg.h @@ -1,6 +1,6 @@ -/* SPDX-License-Identifier: GPL-2.0 OR MIT */ /********************************************************** * Copyright 1998-2021 VMware, Inc. + * SPDX-License-Identifier: GPL-2.0 OR MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation @@ -30,2277 +30,870 @@ * Virtual hardware definitions for the VMware SVGA II device. */ -#ifndef _SVGA_REG_H_ -#define _SVGA_REG_H_ -#include <linux/pci_ids.h> -#define INCLUDE_ALLOW_MODULE -#define INCLUDE_ALLOW_USERLEVEL -#define INCLUDE_ALLOW_VMCORE -#include "includeCheck.h" +#ifndef _SVGA_REG_H_ +#define _SVGA_REG_H_ -#include "svga_types.h" +#include "vm_basic_types.h" -/* - * SVGA_REG_ENABLE bit definitions. - */ typedef enum { - SVGA_REG_ENABLE_DISABLE = 0, - SVGA_REG_ENABLE_ENABLE = (1 << 0), - SVGA_REG_ENABLE_HIDE = (1 << 1), + SVGA_REG_ENABLE_DISABLE = 0, + SVGA_REG_ENABLE_ENABLE = (1 << 0), + SVGA_REG_ENABLE_HIDE = (1 << 1), } SvgaRegEnable; typedef uint32 SVGAMobId; -/* - * Arbitrary and meaningless limits. Please ignore these when writing - * new drivers. - */ -#define SVGA_MAX_WIDTH 2560 -#define SVGA_MAX_HEIGHT 1600 - +#define SVGA_MAX_WIDTH 2560 +#define SVGA_MAX_HEIGHT 1600 -#define SVGA_MAX_BITS_PER_PIXEL 32 -#define SVGA_MAX_DEPTH 24 -#define SVGA_MAX_DISPLAYS 10 -#define SVGA_MAX_SCREEN_SIZE 8192 +#define SVGA_MAX_BITS_PER_PIXEL 32 +#define SVGA_MAX_DEPTH 24 +#define SVGA_MAX_DISPLAYS 10 +#define SVGA_MAX_SCREEN_SIZE 8192 #define SVGA_SCREEN_ROOT_LIMIT (SVGA_MAX_SCREEN_SIZE * SVGA_MAX_DISPLAYS) +#define SVGA_CURSOR_ON_HIDE 0x0 +#define SVGA_CURSOR_ON_SHOW 0x1 -/* - * Legal values for the SVGA_REG_CURSOR_ON register in old-fashioned - * cursor bypass mode. - */ -#define SVGA_CURSOR_ON_HIDE 0x0 -#define SVGA_CURSOR_ON_SHOW 0x1 - -/* - * Remove the cursor from the framebuffer - * because we need to see what's under it - */ -#define SVGA_CURSOR_ON_REMOVE_FROM_FB 0x2 +#define SVGA_CURSOR_ON_REMOVE_FROM_FB 0x2 -/* Put the cursor back in the framebuffer so the user can see it */ -#define SVGA_CURSOR_ON_RESTORE_TO_FB 0x3 +#define SVGA_CURSOR_ON_RESTORE_TO_FB 0x3 -/* - * The maximum framebuffer size that can traced for guests unless the - * SVGA_CAP_GBOBJECTS is set in SVGA_REG_CAPABILITIES. In that case - * the full framebuffer can be traced independent of this limit. - */ -#define SVGA_FB_MAX_TRACEABLE_SIZE 0x1000000 +#define SVGA_FB_MAX_TRACEABLE_SIZE 0x1000000 -#define SVGA_MAX_PSEUDOCOLOR_DEPTH 8 -#define SVGA_MAX_PSEUDOCOLORS (1 << SVGA_MAX_PSEUDOCOLOR_DEPTH) -#define SVGA_NUM_PALETTE_REGS (3 * SVGA_MAX_PSEUDOCOLORS) +#define SVGA_MAX_PSEUDOCOLOR_DEPTH 8 +#define SVGA_MAX_PSEUDOCOLORS (1 << SVGA_MAX_PSEUDOCOLOR_DEPTH) +#define SVGA_NUM_PALETTE_REGS (3 * SVGA_MAX_PSEUDOCOLORS) -#define SVGA_MAGIC 0x900000UL -#define SVGA_MAKE_ID(ver) (SVGA_MAGIC << 8 | (ver)) +#define SVGA_MAGIC 0x900000UL +#define SVGA_MAKE_ID(ver) (SVGA_MAGIC << 8 | (ver)) -/* Version 3 has the control bar instead of the FIFO */ -#define SVGA_VERSION_3 3 -#define SVGA_ID_3 SVGA_MAKE_ID(SVGA_VERSION_3) +#define SVGA_VERSION_3 3 +#define SVGA_ID_3 SVGA_MAKE_ID(SVGA_VERSION_3) -/* Version 2 let the address of the frame buffer be unsigned on Win32 */ -#define SVGA_VERSION_2 2 -#define SVGA_ID_2 SVGA_MAKE_ID(SVGA_VERSION_2) +#define SVGA_VERSION_2 2 +#define SVGA_ID_2 SVGA_MAKE_ID(SVGA_VERSION_2) -/* Version 1 has new registers starting with SVGA_REG_CAPABILITIES so - PALETTE_BASE has moved */ -#define SVGA_VERSION_1 1 -#define SVGA_ID_1 SVGA_MAKE_ID(SVGA_VERSION_1) +#define SVGA_VERSION_1 1 +#define SVGA_ID_1 SVGA_MAKE_ID(SVGA_VERSION_1) -/* Version 0 is the initial version */ -#define SVGA_VERSION_0 0 -#define SVGA_ID_0 SVGA_MAKE_ID(SVGA_VERSION_0) +#define SVGA_VERSION_0 0 +#define SVGA_ID_0 SVGA_MAKE_ID(SVGA_VERSION_0) -/* - * "Invalid" value for all SVGA IDs. - * (Version ID, screen object ID, surface ID...) - */ -#define SVGA_ID_INVALID 0xFFFFFFFF +#define SVGA_ID_INVALID 0xFFFFFFFF -/* Port offsets, relative to BAR0 */ -#define SVGA_INDEX_PORT 0x0 -#define SVGA_VALUE_PORT 0x1 -#define SVGA_BIOS_PORT 0x2 -#define SVGA_IRQSTATUS_PORT 0x8 +#define SVGA_INDEX_PORT 0x0 +#define SVGA_VALUE_PORT 0x1 +#define SVGA_BIOS_PORT 0x2 +#define SVGA_IRQSTATUS_PORT 0x8 -/* - * Interrupt source flags for IRQSTATUS_PORT and IRQMASK. - * - * Interrupts are only supported when the - * SVGA_CAP_IRQMASK capability is present. - */ -#define SVGA_IRQFLAG_ANY_FENCE (1 << 0) /* Any fence was passed */ -#define SVGA_IRQFLAG_FIFO_PROGRESS (1 << 1) /* Made forward progress in the FIFO */ -#define SVGA_IRQFLAG_FENCE_GOAL (1 << 2) /* SVGA_FIFO_FENCE_GOAL reached */ -#define SVGA_IRQFLAG_COMMAND_BUFFER (1 << 3) /* Command buffer completed */ -#define SVGA_IRQFLAG_ERROR (1 << 4) /* Error while processing commands */ -#define SVGA_IRQFLAG_MAX (1 << 5) +#define SVGA_IRQFLAG_ANY_FENCE (1 << 0) +#define SVGA_IRQFLAG_FIFO_PROGRESS (1 << 1) +#define SVGA_IRQFLAG_FENCE_GOAL (1 << 2) +#define SVGA_IRQFLAG_COMMAND_BUFFER (1 << 3) +#define SVGA_IRQFLAG_ERROR (1 << 4) +#define SVGA_IRQFLAG_REG_FENCE_GOAL (1 << 5) +#define SVGA_IRQFLAG_MAX (1 << 6) -/* - * The byte-size is the size of the actual cursor data, - * possibly after expanding it to the current bit depth. - * - * 40K is sufficient memory for two 32-bit planes for a 64 x 64 cursor. - * - * The dimension limit is a bound on the maximum width or height. - */ -#define SVGA_MAX_CURSOR_CMD_BYTES (40 * 1024) +#define SVGA_MAX_CURSOR_CMD_BYTES (40 * 1024) #define SVGA_MAX_CURSOR_CMD_DIMENSION 1024 -/* - * Registers - */ - enum { - SVGA_REG_ID = 0, - SVGA_REG_ENABLE = 1, - SVGA_REG_WIDTH = 2, - SVGA_REG_HEIGHT = 3, - SVGA_REG_MAX_WIDTH = 4, - SVGA_REG_MAX_HEIGHT = 5, - SVGA_REG_DEPTH = 6, - SVGA_REG_BITS_PER_PIXEL = 7, /* Current bpp in the guest */ - SVGA_REG_PSEUDOCOLOR = 8, - SVGA_REG_RED_MASK = 9, - SVGA_REG_GREEN_MASK = 10, - SVGA_REG_BLUE_MASK = 11, - SVGA_REG_BYTES_PER_LINE = 12, - SVGA_REG_FB_START = 13, /* (Deprecated) */ - SVGA_REG_FB_OFFSET = 14, - SVGA_REG_VRAM_SIZE = 15, - SVGA_REG_FB_SIZE = 16, - - /* ID 0 implementation only had the above registers, then the palette */ - SVGA_REG_ID_0_TOP = 17, - - SVGA_REG_CAPABILITIES = 17, - SVGA_REG_MEM_START = 18, /* (Deprecated) */ - SVGA_REG_MEM_SIZE = 19, - SVGA_REG_CONFIG_DONE = 20, /* Set when memory area configured */ - SVGA_REG_SYNC = 21, /* See "FIFO Synchronization Registers" */ - SVGA_REG_BUSY = 22, /* See "FIFO Synchronization Registers" */ - SVGA_REG_GUEST_ID = 23, /* (Deprecated) */ - SVGA_REG_DEAD = 24, /* Drivers should never write this. */ - SVGA_REG_CURSOR_X = 25, /* (Deprecated) */ - SVGA_REG_CURSOR_Y = 26, /* (Deprecated) */ - SVGA_REG_CURSOR_ON = 27, /* (Deprecated) */ - SVGA_REG_HOST_BITS_PER_PIXEL = 28, /* (Deprecated) */ - SVGA_REG_SCRATCH_SIZE = 29, /* Number of scratch registers */ - SVGA_REG_MEM_REGS = 30, /* Number of FIFO registers */ - SVGA_REG_NUM_DISPLAYS = 31, /* (Deprecated) */ - SVGA_REG_PITCHLOCK = 32, /* Fixed pitch for all modes */ - SVGA_REG_IRQMASK = 33, /* Interrupt mask */ - - /* Legacy multi-monitor support */ - SVGA_REG_NUM_GUEST_DISPLAYS = 34,/* Number of guest displays in X/Y direction */ - SVGA_REG_DISPLAY_ID = 35, /* Display ID for the following display attributes */ - SVGA_REG_DISPLAY_IS_PRIMARY = 36,/* Whether this is a primary display */ - SVGA_REG_DISPLAY_POSITION_X = 37,/* The display position x */ - SVGA_REG_DISPLAY_POSITION_Y = 38,/* The display position y */ - SVGA_REG_DISPLAY_WIDTH = 39, /* The display's width */ - SVGA_REG_DISPLAY_HEIGHT = 40, /* The display's height */ - - /* See "Guest memory regions" below. */ - SVGA_REG_GMR_ID = 41, - SVGA_REG_GMR_DESCRIPTOR = 42, - SVGA_REG_GMR_MAX_IDS = 43, - SVGA_REG_GMR_MAX_DESCRIPTOR_LENGTH = 44, - - SVGA_REG_TRACES = 45, /* Enable trace-based updates even when FIFO is on */ - SVGA_REG_GMRS_MAX_PAGES = 46, /* Maximum number of 4KB pages for all GMRs */ - SVGA_REG_MEMORY_SIZE = 47, /* Total dedicated device memory excluding FIFO */ - SVGA_REG_COMMAND_LOW = 48, /* Lower 32 bits and submits commands */ - SVGA_REG_COMMAND_HIGH = 49, /* Upper 32 bits of command buffer PA */ - - /* - * Max primary memory. - * See SVGA_CAP_NO_BB_RESTRICTION. - */ - SVGA_REG_MAX_PRIMARY_MEM = 50, - SVGA_REG_MAX_PRIMARY_BOUNDING_BOX_MEM = 50, - - /* - * Legacy version of SVGA_REG_GBOBJECT_MEM_SIZE_KB for drivers that - * don't know how to convert to a 64-bit byte value without overflowing. - * (See SVGA_REG_GBOBJECT_MEM_SIZE_KB). - */ - SVGA_REG_SUGGESTED_GBOBJECT_MEM_SIZE_KB = 51, - - SVGA_REG_DEV_CAP = 52, /* Write dev cap index, read value */ - SVGA_REG_CMD_PREPEND_LOW = 53, - SVGA_REG_CMD_PREPEND_HIGH = 54, - SVGA_REG_SCREENTARGET_MAX_WIDTH = 55, - SVGA_REG_SCREENTARGET_MAX_HEIGHT = 56, - SVGA_REG_MOB_MAX_SIZE = 57, - SVGA_REG_BLANK_SCREEN_TARGETS = 58, - SVGA_REG_CAP2 = 59, - SVGA_REG_DEVEL_CAP = 60, - - /* - * Allow the guest to hint to the device which driver is running. - * - * This should not generally change device behavior, but might be - * convenient to work-around specific bugs in guest drivers. - * - * Drivers should first write their id value into SVGA_REG_GUEST_DRIVER_ID, - * and then fill out all of the version registers that they have defined. - * - * After the driver has written all of the registers, they should - * then write the value SVGA_REG_GUEST_DRIVER_ID_SUBMIT to the - * SVGA_REG_GUEST_DRIVER_ID register, to signal that they have finished. - * - * The SVGA_REG_GUEST_DRIVER_ID values are defined below by the - * SVGARegGuestDriverId enum. - * - * The SVGA_REG_GUEST_DRIVER_VERSION fields are driver-specific, - * but ideally should encode a monotonically increasing number that allows - * the device to perform inequality checks against ranges of driver versions. - */ - SVGA_REG_GUEST_DRIVER_ID = 61, - SVGA_REG_GUEST_DRIVER_VERSION1 = 62, - SVGA_REG_GUEST_DRIVER_VERSION2 = 63, - SVGA_REG_GUEST_DRIVER_VERSION3 = 64, - SVGA_REG_CURSOR_MOBID = 65, - SVGA_REG_CURSOR_MAX_BYTE_SIZE = 66, - SVGA_REG_CURSOR_MAX_DIMENSION = 67, - - SVGA_REG_FIFO_CAPS = 68, - SVGA_REG_FENCE = 69, - - SVGA_REG_RESERVED1 = 70, - SVGA_REG_RESERVED2 = 71, - SVGA_REG_RESERVED3 = 72, - SVGA_REG_RESERVED4 = 73, - SVGA_REG_RESERVED5 = 74, - SVGA_REG_SCREENDMA = 75, - - /* - * The maximum amount of guest-backed objects that the device can have - * resident at a time. Guest-drivers should keep their working set size - * below this limit for best performance. - * - * Note that this value is in kilobytes, and not bytes, because the actual - * number of bytes might be larger than can fit in a 32-bit register. - * - * PLEASE USE A 64-BIT VALUE WHEN CONVERTING THIS INTO BYTES. - * (See SVGA_REG_SUGGESTED_GBOBJECT_MEM_SIZE_KB). - */ - SVGA_REG_GBOBJECT_MEM_SIZE_KB = 76, - - /* - + * These registers are for the addresses of the memory BARs for SVGA3 - */ - SVGA_REG_REGS_START_HIGH32 = 77, - SVGA_REG_REGS_START_LOW32 = 78, - SVGA_REG_FB_START_HIGH32 = 79, - SVGA_REG_FB_START_LOW32 = 80, - - /* - * A hint register that recommends which quality level the guest should - * currently use to define multisample surfaces. - * - * If the register is SVGA_REG_MSHINT_DISABLED, - * the guest is only allowed to use SVGA3D_MS_QUALITY_FULL. - * - * Otherwise, this is a live value that can change while the VM is - * powered on with the hint suggestion for which quality level the guest - * should be using. Guests are free to ignore the hint and use either - * RESOLVE or FULL quality. - */ - SVGA_REG_MSHINT = 81, - - SVGA_REG_IRQ_STATUS = 82, - SVGA_REG_DIRTY_TRACKING = 83, - - SVGA_REG_TOP = 84, /* Must be 1 more than the last register */ - - SVGA_PALETTE_BASE = 1024, /* Base of SVGA color map */ - /* Next 768 (== 256*3) registers exist for colormap */ - SVGA_SCRATCH_BASE = SVGA_PALETTE_BASE + SVGA_NUM_PALETTE_REGS - /* Base of scratch registers */ - /* Next reg[SVGA_REG_SCRATCH_SIZE] registers exist for scratch usage: - First 4 are reserved for VESA BIOS Extension; any remaining are for - the use of the current SVGA driver. */ -}; + SVGA_REG_ID = 0, + SVGA_REG_ENABLE = 1, + SVGA_REG_WIDTH = 2, + SVGA_REG_HEIGHT = 3, + SVGA_REG_MAX_WIDTH = 4, + SVGA_REG_MAX_HEIGHT = 5, + SVGA_REG_DEPTH = 6, + SVGA_REG_BITS_PER_PIXEL = 7, + SVGA_REG_PSEUDOCOLOR = 8, + SVGA_REG_RED_MASK = 9, + SVGA_REG_GREEN_MASK = 10, + SVGA_REG_BLUE_MASK = 11, + SVGA_REG_BYTES_PER_LINE = 12, + SVGA_REG_FB_START = 13, + SVGA_REG_FB_OFFSET = 14, + SVGA_REG_VRAM_SIZE = 15, + SVGA_REG_FB_SIZE = 16, + + SVGA_REG_ID_0_TOP = 17, + + SVGA_REG_CAPABILITIES = 17, + SVGA_REG_MEM_START = 18, + SVGA_REG_MEM_SIZE = 19, + SVGA_REG_CONFIG_DONE = 20, + SVGA_REG_SYNC = 21, + SVGA_REG_BUSY = 22, + SVGA_REG_GUEST_ID = 23, + SVGA_REG_DEAD = 24, + SVGA_REG_CURSOR_X = 25, + SVGA_REG_CURSOR_Y = 26, + SVGA_REG_CURSOR_ON = 27, + SVGA_REG_HOST_BITS_PER_PIXEL = 28, + SVGA_REG_SCRATCH_SIZE = 29, + SVGA_REG_MEM_REGS = 30, + SVGA_REG_NUM_DISPLAYS = 31, + SVGA_REG_PITCHLOCK = 32, + SVGA_REG_IRQMASK = 33, + + SVGA_REG_NUM_GUEST_DISPLAYS = 34, + SVGA_REG_DISPLAY_ID = 35, + SVGA_REG_DISPLAY_IS_PRIMARY = 36, + SVGA_REG_DISPLAY_POSITION_X = 37, + SVGA_REG_DISPLAY_POSITION_Y = 38, + SVGA_REG_DISPLAY_WIDTH = 39, + SVGA_REG_DISPLAY_HEIGHT = 40, + + SVGA_REG_GMR_ID = 41, + SVGA_REG_GMR_DESCRIPTOR = 42, + SVGA_REG_GMR_MAX_IDS = 43, + SVGA_REG_GMR_MAX_DESCRIPTOR_LENGTH = 44, + + SVGA_REG_TRACES = 45, + SVGA_REG_GMRS_MAX_PAGES = 46, + SVGA_REG_MEMORY_SIZE = 47, + SVGA_REG_COMMAND_LOW = 48, + SVGA_REG_COMMAND_HIGH = 49, + + SVGA_REG_MAX_PRIMARY_MEM = 50, + + SVGA_REG_SUGGESTED_GBOBJECT_MEM_SIZE_KB = 51, + + SVGA_REG_DEV_CAP = 52, + SVGA_REG_CMD_PREPEND_LOW = 53, + SVGA_REG_CMD_PREPEND_HIGH = 54, + SVGA_REG_SCREENTARGET_MAX_WIDTH = 55, + SVGA_REG_SCREENTARGET_MAX_HEIGHT = 56, + SVGA_REG_MOB_MAX_SIZE = 57, + SVGA_REG_BLANK_SCREEN_TARGETS = 58, + SVGA_REG_CAP2 = 59, + SVGA_REG_DEVEL_CAP = 60, + + SVGA_REG_GUEST_DRIVER_ID = 61, + SVGA_REG_GUEST_DRIVER_VERSION1 = 62, + SVGA_REG_GUEST_DRIVER_VERSION2 = 63, + SVGA_REG_GUEST_DRIVER_VERSION3 = 64, + + SVGA_REG_CURSOR_MOBID = 65, + SVGA_REG_CURSOR_MAX_BYTE_SIZE = 66, + SVGA_REG_CURSOR_MAX_DIMENSION = 67, + + SVGA_REG_FIFO_CAPS = 68, + SVGA_REG_FENCE = 69, + + SVGA_REG_CURSOR4_ON = 70, + SVGA_REG_CURSOR4_X = 71, + SVGA_REG_CURSOR4_Y = 72, + SVGA_REG_CURSOR4_SCREEN_ID = 73, + SVGA_REG_CURSOR4_SUBMIT = 74, + + SVGA_REG_SCREENDMA = 75, + + SVGA_REG_GBOBJECT_MEM_SIZE_KB = 76, + + SVGA_REG_REGS_START_HIGH32 = 77, + SVGA_REG_REGS_START_LOW32 = 78, + SVGA_REG_FB_START_HIGH32 = 79, + SVGA_REG_FB_START_LOW32 = 80, + + SVGA_REG_MSHINT = 81, + + SVGA_REG_IRQ_STATUS = 82, + + SVGA_REG_DIRTY_TRACKING = 83, + SVGA_REG_FENCE_GOAL = 84, + + SVGA_REG_TOP = 85, + + SVGA_PALETTE_BASE = 1024, + + SVGA_SCRATCH_BASE = SVGA_PALETTE_BASE + SVGA_NUM_PALETTE_REGS +}; -/* - * Values for SVGA_REG_GUEST_DRIVER_ID. - */ typedef enum SVGARegGuestDriverId { - SVGA_REG_GUEST_DRIVER_ID_UNKNOWN = 0, - SVGA_REG_GUEST_DRIVER_ID_WDDM = 1, - SVGA_REG_GUEST_DRIVER_ID_LINUX = 2, - SVGA_REG_GUEST_DRIVER_ID_MAX, + SVGA_REG_GUEST_DRIVER_ID_UNKNOWN = 0, + SVGA_REG_GUEST_DRIVER_ID_WDDM = 1, + SVGA_REG_GUEST_DRIVER_ID_LINUX = 2, + SVGA_REG_GUEST_DRIVER_ID_MAX, - SVGA_REG_GUEST_DRIVER_ID_SUBMIT = MAX_UINT32, + SVGA_REG_GUEST_DRIVER_ID_SUBMIT = MAX_UINT32, } SVGARegGuestDriverId; typedef enum SVGARegMSHint { - SVGA_REG_MSHINT_DISABLED = 0, - SVGA_REG_MSHINT_FULL = 1, - SVGA_REG_MSHINT_RESOLVED = 2, + SVGA_REG_MSHINT_DISABLED = 0, + SVGA_REG_MSHINT_FULL = 1, + SVGA_REG_MSHINT_RESOLVED = 2, } SVGARegMSHint; typedef enum SVGARegDirtyTracking { - SVGA_REG_DIRTY_TRACKING_PER_IMAGE = 0, - SVGA_REG_DIRTY_TRACKING_PER_SURFACE = 1, + SVGA_REG_DIRTY_TRACKING_PER_IMAGE = 0, + SVGA_REG_DIRTY_TRACKING_PER_SURFACE = 1, } SVGARegDirtyTracking; - -/* - * Guest memory regions (GMRs): - * - * This is a new memory mapping feature available in SVGA devices - * which have the SVGA_CAP_GMR bit set. Previously, there were two - * fixed memory regions available with which to share data between the - * device and the driver: the FIFO ('MEM') and the framebuffer. GMRs - * are our name for an extensible way of providing arbitrary DMA - * buffers for use between the driver and the SVGA device. They are a - * new alternative to framebuffer memory, usable for both 2D and 3D - * graphics operations. - * - * Since GMR mapping must be done synchronously with guest CPU - * execution, we use a new pair of SVGA registers: - * - * SVGA_REG_GMR_ID -- - * - * Read/write. - * This register holds the 32-bit ID (a small positive integer) - * of a GMR to create, delete, or redefine. Writing this register - * has no side-effects. - * - * SVGA_REG_GMR_DESCRIPTOR -- - * - * Write-only. - * Writing this register will create, delete, or redefine the GMR - * specified by the above ID register. If this register is zero, - * the GMR is deleted. Any pointers into this GMR (including those - * currently being processed by FIFO commands) will be - * synchronously invalidated. - * - * If this register is nonzero, it must be the physical page - * number (PPN) of a data structure which describes the physical - * layout of the memory region this GMR should describe. The - * descriptor structure will be read synchronously by the SVGA - * device when this register is written. The descriptor need not - * remain allocated for the lifetime of the GMR. - * - * The guest driver should write SVGA_REG_GMR_ID first, then - * SVGA_REG_GMR_DESCRIPTOR. - * - * SVGA_REG_GMR_MAX_IDS -- - * - * Read-only. - * The SVGA device may choose to support a maximum number of - * user-defined GMR IDs. This register holds the number of supported - * IDs. (The maximum supported ID plus 1) - * - * SVGA_REG_GMR_MAX_DESCRIPTOR_LENGTH -- - * - * Read-only. - * The SVGA device may choose to put a limit on the total number - * of SVGAGuestMemDescriptor structures it will read when defining - * a single GMR. - * - * The descriptor structure is an array of SVGAGuestMemDescriptor - * structures. Each structure may do one of three things: - * - * - Terminate the GMR descriptor list. - * (ppn==0, numPages==0) - * - * - Add a PPN or range of PPNs to the GMR's virtual address space. - * (ppn != 0, numPages != 0) - * - * - Provide the PPN of the next SVGAGuestMemDescriptor, in order to - * support multi-page GMR descriptor tables without forcing the - * driver to allocate physically contiguous memory. - * (ppn != 0, numPages == 0) - * - * Note that each physical page of SVGAGuestMemDescriptor structures - * can describe at least 2MB of guest memory. If the driver needs to - * use more than one page of descriptor structures, it must use one of - * its SVGAGuestMemDescriptors to point to an additional page. The - * device will never automatically cross a page boundary. - * - * Once the driver has described a GMR, it is immediately available - * for use via any FIFO command that uses an SVGAGuestPtr structure. - * These pointers include a GMR identifier plus an offset into that - * GMR. - * - * The driver must check the SVGA_CAP_GMR bit before using the GMR - * registers. - */ - -/* - * Special GMR IDs, allowing SVGAGuestPtrs to point to framebuffer - * memory as well. In the future, these IDs could even be used to - * allow legacy memory regions to be redefined by the guest as GMRs. - * - * Using the guest framebuffer (GFB) at BAR1 for general purpose DMA - * is being phased out. Please try to use user-defined GMRs whenever - * possible. - */ -#define SVGA_GMR_NULL ((uint32) -1) -#define SVGA_GMR_FRAMEBUFFER ((uint32) -2) /* Guest Framebuffer (GFB) */ - -typedef -#include "vmware_pack_begin.h" -struct SVGAGuestMemDescriptor { - uint32 ppn; - uint32 numPages; -} -#include "vmware_pack_end.h" -SVGAGuestMemDescriptor; - -typedef -#include "vmware_pack_begin.h" -struct SVGAGuestPtr { - uint32 gmrId; - uint32 offset; -} -#include "vmware_pack_end.h" -SVGAGuestPtr; - -/* - * Register based command buffers -- - * - * Provide an SVGA device interface that allows the guest to submit - * command buffers to the SVGA device through an SVGA device register. - * The metadata for each command buffer is contained in the - * SVGACBHeader structure along with the return status codes. - * - * The SVGA device supports command buffers if - * SVGA_CAP_COMMAND_BUFFERS is set in the device caps register. The - * fifo must be enabled for command buffers to be submitted. - * - * Command buffers are submitted when the guest writing the 64 byte - * aligned physical address into the SVGA_REG_COMMAND_LOW and - * SVGA_REG_COMMAND_HIGH. SVGA_REG_COMMAND_HIGH contains the upper 32 - * bits of the physical address. SVGA_REG_COMMAND_LOW contains the - * lower 32 bits of the physical address, since the command buffer - * headers are required to be 64 byte aligned the lower 6 bits are - * used for the SVGACBContext value. Writing to SVGA_REG_COMMAND_LOW - * submits the command buffer to the device and queues it for - * execution. The SVGA device supports at least - * SVGA_CB_MAX_QUEUED_PER_CONTEXT command buffers that can be queued - * per context and if that limit is reached the device will write the - * status SVGA_CB_STATUS_QUEUE_FULL to the status value of the command - * buffer header synchronously and not raise any IRQs. - * - * It is invalid to submit a command buffer without a valid physical - * address and results are undefined. - * - * The device guarantees that command buffers of size SVGA_CB_MAX_SIZE - * will be supported. If a larger command buffer is submitted results - * are unspecified and the device will either complete the command - * buffer or return an error. - * - * The device guarantees that any individual command in a command - * buffer can be up to SVGA_CB_MAX_COMMAND_SIZE in size which is - * enough to fit a 64x64 color-cursor definition. If the command is - * too large the device is allowed to process the command or return an - * error. - * - * The device context is a special SVGACBContext that allows for - * synchronous register like accesses with the flexibility of - * commands. There is a different command set defined by - * SVGADeviceContextCmdId. The commands in each command buffer is not - * allowed to straddle physical pages. - * - * The offset field which is available starting with the - * SVGA_CAP_CMD_BUFFERS_2 cap bit can be set by the guest to bias the - * start of command processing into the buffer. If an error is - * encountered the errorOffset will still be relative to the specific - * PA, not biased by the offset. When the command buffer is finished - * the guest should not read the offset field as there is no guarantee - * what it will set to. - * - * When the SVGA_CAP_HP_CMD_QUEUE cap bit is set a new command queue - * SVGA_CB_CONTEXT_1 is available. Commands submitted to this queue - * will be executed as quickly as possible by the SVGA device - * potentially before already queued commands on SVGA_CB_CONTEXT_0. - * The SVGA device guarantees that any command buffers submitted to - * SVGA_CB_CONTEXT_0 will be executed after any _already_ submitted - * command buffers to SVGA_CB_CONTEXT_1. - */ - -#define SVGA_CB_MAX_SIZE (512 * 1024) /* 512 KB */ +#define SVGA_GMR_NULL ((uint32)-1) +#define SVGA_GMR_FRAMEBUFFER ((uint32)-2) + +#pragma pack(push, 1) +typedef struct SVGAGuestMemDescriptor { + uint32 ppn; + uint32 numPages; +} SVGAGuestMemDescriptor; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct SVGAGuestPtr { + uint32 gmrId; + uint32 offset; +} SVGAGuestPtr; +#pragma pack(pop) + +#define SVGA_CB_MAX_SIZE_DEFAULT (KBYTES_2_BYTES(512)) +#define SVGA_CB_MAX_SIZE_4MB (MBYTES_2_BYTES(4)) +#define SVGA_CB_MAX_SIZE SVGA_CB_MAX_SIZE_4MB #define SVGA_CB_MAX_QUEUED_PER_CONTEXT 32 -#define SVGA_CB_MAX_COMMAND_SIZE (32 * 1024) /* 32 KB */ +#define SVGA_CB_MAX_COMMAND_SIZE (32 * 1024) #define SVGA_CB_CONTEXT_MASK 0x3f typedef enum { - SVGA_CB_CONTEXT_DEVICE = 0x3f, - SVGA_CB_CONTEXT_0 = 0x0, - SVGA_CB_CONTEXT_1 = 0x1, /* Supported with SVGA_CAP_HP_CMD_QUEUE */ - SVGA_CB_CONTEXT_MAX = 0x2, + SVGA_CB_CONTEXT_DEVICE = 0x3f, + SVGA_CB_CONTEXT_0 = 0x0, + SVGA_CB_CONTEXT_1 = 0x1, + SVGA_CB_CONTEXT_MAX = 0x2, } SVGACBContext; - typedef enum { - /* - * The guest is supposed to write SVGA_CB_STATUS_NONE to the status - * field before submitting the command buffer header, the host will - * change the value when it is done with the command buffer. - */ - SVGA_CB_STATUS_NONE = 0, - - /* - * Written by the host when a command buffer completes successfully. - * The device raises an IRQ with SVGA_IRQFLAG_COMMAND_BUFFER unless - * the SVGA_CB_FLAG_NO_IRQ flag is set. - */ - SVGA_CB_STATUS_COMPLETED = 1, - - /* - * Written by the host synchronously with the command buffer - * submission to indicate the command buffer was not submitted. No - * IRQ is raised. - */ - SVGA_CB_STATUS_QUEUE_FULL = 2, - - /* - * Written by the host when an error was detected parsing a command - * in the command buffer, errorOffset is written to contain the - * offset to the first byte of the failing command. The device - * raises the IRQ with both SVGA_IRQFLAG_ERROR and - * SVGA_IRQFLAG_COMMAND_BUFFER. Some of the commands may have been - * processed. - */ - SVGA_CB_STATUS_COMMAND_ERROR = 3, - - /* - * Written by the host if there is an error parsing the command - * buffer header. The device raises the IRQ with both - * SVGA_IRQFLAG_ERROR and SVGA_IRQFLAG_COMMAND_BUFFER. The device - * did not processes any of the command buffer. - */ - SVGA_CB_STATUS_CB_HEADER_ERROR = 4, - - /* - * Written by the host if the guest requested the host to preempt - * the command buffer. The device will not raise any IRQs and the - * command buffer was not processed. - */ - SVGA_CB_STATUS_PREEMPTED = 5, - - /* - * Written by the host synchronously with the command buffer - * submission to indicate the the command buffer was not submitted - * due to an error. No IRQ is raised. - */ - SVGA_CB_STATUS_SUBMISSION_ERROR = 6, - - /* - * Written by the host when the host finished a - * SVGA_DC_CMD_ASYNC_STOP_QUEUE request for this command buffer - * queue. The offset of the first byte not processed is stored in - * the errorOffset field of the command buffer header. All guest - * visible side effects of commands till that point are guaranteed - * to be finished before this is written. The - * SVGA_IRQFLAG_COMMAND_BUFFER IRQ is raised as long as the - * SVGA_CB_FLAG_NO_IRQ is not set. - */ - SVGA_CB_STATUS_PARTIAL_COMPLETE = 7, + + SVGA_CB_STATUS_NONE = 0, + + SVGA_CB_STATUS_COMPLETED = 1, + + SVGA_CB_STATUS_QUEUE_FULL = 2, + + SVGA_CB_STATUS_COMMAND_ERROR = 3, + + SVGA_CB_STATUS_CB_HEADER_ERROR = 4, + + SVGA_CB_STATUS_PREEMPTED = 5, + + SVGA_CB_STATUS_SUBMISSION_ERROR = 6, + + SVGA_CB_STATUS_PARTIAL_COMPLETE = 7, } SVGACBStatus; typedef enum { - SVGA_CB_FLAG_NONE = 0, - SVGA_CB_FLAG_NO_IRQ = 1 << 0, - SVGA_CB_FLAG_DX_CONTEXT = 1 << 1, - SVGA_CB_FLAG_MOB = 1 << 2, + SVGA_CB_FLAG_NONE = 0, + SVGA_CB_FLAG_NO_IRQ = 1 << 0, + SVGA_CB_FLAG_DX_CONTEXT = 1 << 1, + SVGA_CB_FLAG_MOB = 1 << 2, } SVGACBFlags; -typedef -#include "vmware_pack_begin.h" -struct { - volatile SVGACBStatus status; /* Modified by device. */ - volatile uint32 errorOffset; /* Modified by device. */ - uint64 id; - SVGACBFlags flags; - uint32 length; - union { - PA pa; - struct { - SVGAMobId mobid; - uint32 mobOffset; - } mob; - } ptr; - uint32 offset; /* Valid if CMD_BUFFERS_2 cap set, must be zero otherwise, - * modified by device. - */ - uint32 dxContext; /* Valid if DX_CONTEXT flag set, must be zero otherwise */ - uint32 mustBeZero[6]; -} -#include "vmware_pack_end.h" -SVGACBHeader; +#pragma pack(push, 1) +typedef struct { + volatile SVGACBStatus status; + volatile uint32 errorOffset; + uint64 id; + SVGACBFlags flags; + uint32 length; + union { + PA pa; + struct { + SVGAMobId mobid; + uint32 mobOffset; + } mob; + } ptr; + uint32 offset; + uint32 dxContext; + uint32 mustBeZero[6]; +} SVGACBHeader; +#pragma pack(pop) typedef enum { - SVGA_DC_CMD_NOP = 0, - SVGA_DC_CMD_START_STOP_CONTEXT = 1, - SVGA_DC_CMD_PREEMPT = 2, - SVGA_DC_CMD_START_QUEUE = 3, /* Requires SVGA_CAP_HP_CMD_QUEUE */ - SVGA_DC_CMD_ASYNC_STOP_QUEUE = 4, /* Requires SVGA_CAP_HP_CMD_QUEUE */ - SVGA_DC_CMD_EMPTY_CONTEXT_QUEUE = 5, /* Requires SVGA_CAP_HP_CMD_QUEUE */ - SVGA_DC_CMD_MAX = 6, + SVGA_DC_CMD_NOP = 0, + SVGA_DC_CMD_START_STOP_CONTEXT = 1, + SVGA_DC_CMD_PREEMPT = 2, + SVGA_DC_CMD_START_QUEUE = 3, + SVGA_DC_CMD_ASYNC_STOP_QUEUE = 4, + SVGA_DC_CMD_EMPTY_CONTEXT_QUEUE = 5, + SVGA_DC_CMD_MAX = 6 } SVGADeviceContextCmdId; -/* - * Starts or stops both SVGA_CB_CONTEXT_0 and SVGA_CB_CONTEXT_1. - */ - typedef struct SVGADCCmdStartStop { - uint32 enable; - SVGACBContext context; /* Must be zero */ + uint32 enable; + SVGACBContext context; } SVGADCCmdStartStop; -/* - * SVGADCCmdPreempt -- - * - * This command allows the guest to request that all command buffers - * on SVGA_CB_CONTEXT_0 be preempted that can be. After execution - * of this command all command buffers that were preempted will - * already have SVGA_CB_STATUS_PREEMPTED written into the status - * field. The device might still be processing a command buffer, - * assuming execution of it started before the preemption request was - * received. Specifying the ignoreIDZero flag to TRUE will cause the - * device to not preempt command buffers with the id field in the - * command buffer header set to zero. - */ - typedef struct SVGADCCmdPreempt { - SVGACBContext context; /* Must be zero */ - uint32 ignoreIDZero; + SVGACBContext context; + uint32 ignoreIDZero; } SVGADCCmdPreempt; -/* - * Starts the requested command buffer processing queue. Valid only - * if the SVGA_CAP_HP_CMD_QUEUE cap is set. - * - * For a command queue to be considered runnable it must be enabled - * and any corresponding higher priority queues must also be enabled. - * For example in order for command buffers to be processed on - * SVGA_CB_CONTEXT_0 both SVGA_CB_CONTEXT_0 and SVGA_CB_CONTEXT_1 must - * be enabled. But for commands to be runnable on SVGA_CB_CONTEXT_1 - * only that queue must be enabled. - */ - typedef struct SVGADCCmdStartQueue { - SVGACBContext context; + SVGACBContext context; } SVGADCCmdStartQueue; -/* - * Requests the SVGA device to stop processing the requested command - * buffer queue as soon as possible. The guest knows the stop has - * completed when one of the following happens. - * - * 1) A command buffer status of SVGA_CB_STATUS_PARTIAL_COMPLETE is returned - * 2) A command buffer error is encountered with would stop the queue - * regardless of the async stop request. - * 3) All command buffers that have been submitted complete successfully. - * 4) The stop completes synchronously if no command buffers are - * active on the queue when it is issued. - * - * If the command queue is not in a runnable state there is no - * guarentee this async stop will finish. For instance if the high - * priority queue is not enabled and a stop is requested on the low - * priority queue, the high priority queue must be reenabled to - * guarantee that the async stop will finish. - * - * This command along with SVGA_DC_CMD_EMPTY_CONTEXT_QUEUE can be used - * to implement mid command buffer preemption. - * - * Valid only if the SVGA_CAP_HP_CMD_QUEUE cap is set. - */ - typedef struct SVGADCCmdAsyncStopQueue { - SVGACBContext context; + SVGACBContext context; } SVGADCCmdAsyncStopQueue; -/* - * Requests the SVGA device to throw away any full command buffers on - * the requested command queue that have not been started. For a - * driver to know which command buffers were thrown away a driver - * should only issue this command when the queue is stopped, for - * whatever reason. - */ - typedef struct SVGADCCmdEmptyQueue { - SVGACBContext context; + SVGACBContext context; } SVGADCCmdEmptyQueue; - -/* - * SVGAGMRImageFormat -- - * - * This is a packed representation of the source 2D image format - * for a GMR-to-screen blit. Currently it is defined as an encoding - * of the screen's color depth and bits-per-pixel, however, 16 bits - * are reserved for future use to identify other encodings (such as - * RGBA or higher-precision images). - * - * Currently supported formats: - * - * bpp depth Format Name - * --- ----- ----------- - * 32 24 32-bit BGRX - * 24 24 24-bit BGR - * 16 16 RGB 5-6-5 - * 16 15 RGB 5-5-5 - * - */ - typedef struct SVGAGMRImageFormat { - union { - struct { - uint32 bitsPerPixel : 8; - uint32 colorDepth : 8; - uint32 reserved : 16; /* Must be zero */ - }; - - uint32 value; - }; + union { + struct { + uint32 bitsPerPixel : 8; + uint32 colorDepth : 8; + uint32 reserved : 16; + }; + + uint32 value; + }; } SVGAGMRImageFormat; -typedef -#include "vmware_pack_begin.h" -struct SVGAGuestImage { - SVGAGuestPtr ptr; - - /* - * A note on interpretation of pitch: This value of pitch is the - * number of bytes between vertically adjacent image - * blocks. Normally this is the number of bytes between the first - * pixel of two adjacent scanlines. With compressed textures, - * however, this may represent the number of bytes between - * compression blocks rather than between rows of pixels. - * - * XXX: Compressed textures currently must be tightly packed in guest memory. - * - * If the image is 1-dimensional, pitch is ignored. - * - * If 'pitch' is zero, the SVGA3D device calculates a pitch value - * assuming each row of blocks is tightly packed. - */ - uint32 pitch; -} -#include "vmware_pack_end.h" -SVGAGuestImage; +#pragma pack(push, 1) +typedef struct SVGAGuestImage { + SVGAGuestPtr ptr; -/* - * SVGAColorBGRX -- - * - * A 24-bit color format (BGRX), which does not depend on the - * format of the legacy guest framebuffer (GFB) or the current - * GMRFB state. - */ + uint32 pitch; +} SVGAGuestImage; +#pragma pack(pop) typedef struct SVGAColorBGRX { - union { - struct { - uint32 b : 8; - uint32 g : 8; - uint32 r : 8; - uint32 x : 8; /* Unused */ - }; - - uint32 value; - }; + union { + struct { + uint32 b : 8; + uint32 g : 8; + uint32 r : 8; + uint32 x : 8; + }; + + uint32 value; + }; } SVGAColorBGRX; - -/* - * SVGASignedRect -- - * SVGASignedPoint -- - * - * Signed rectangle and point primitives. These are used by the new - * 2D primitives for drawing to Screen Objects, which can occupy a - * signed virtual coordinate space. - * - * SVGASignedRect specifies a half-open interval: the (left, top) - * pixel is part of the rectangle, but the (right, bottom) pixel is - * not. - */ - -typedef -#include "vmware_pack_begin.h" -struct { - int32 left; - int32 top; - int32 right; - int32 bottom; -} -#include "vmware_pack_end.h" -SVGASignedRect; - -typedef -#include "vmware_pack_begin.h" -struct { - int32 x; - int32 y; -} -#include "vmware_pack_end.h" -SVGASignedPoint; - - -/* - * SVGA Device Capabilities - * - * Note the holes in the bitfield. Missing bits have been deprecated, - * and must not be reused. Those capabilities will never be reported - * by new versions of the SVGA device. - * - * SVGA_CAP_IRQMASK -- - * Provides device interrupts. Adds device register SVGA_REG_IRQMASK - * to set interrupt mask and direct I/O port SVGA_IRQSTATUS_PORT to - * set/clear pending interrupts. - * - * SVGA_CAP_GMR -- - * Provides synchronous mapping of guest memory regions (GMR). - * Adds device registers SVGA_REG_GMR_ID, SVGA_REG_GMR_DESCRIPTOR, - * SVGA_REG_GMR_MAX_IDS, and SVGA_REG_GMR_MAX_DESCRIPTOR_LENGTH. - * - * SVGA_CAP_TRACES -- - * Allows framebuffer trace-based updates even when FIFO is enabled. - * Adds device register SVGA_REG_TRACES. - * - * SVGA_CAP_GMR2 -- - * Provides asynchronous commands to define and remap guest memory - * regions. Adds device registers SVGA_REG_GMRS_MAX_PAGES and - * SVGA_REG_MEMORY_SIZE. - * - * SVGA_CAP_SCREEN_OBJECT_2 -- - * Allow screen object support, and require backing stores from the - * guest for each screen object. - * - * SVGA_CAP_COMMAND_BUFFERS -- - * Enable register based command buffer submission. - * - * SVGA_CAP_DEAD1 -- - * This cap was incorrectly used by old drivers and should not be - * reused. - * - * SVGA_CAP_CMD_BUFFERS_2 -- - * Enable support for the prepend command buffer submision - * registers. SVGA_REG_CMD_PREPEND_LOW and - * SVGA_REG_CMD_PREPEND_HIGH. - * - * SVGA_CAP_GBOBJECTS -- - * Enable guest-backed objects and surfaces. - * - * SVGA_CAP_DX -- - * Enable support for DX commands, and command buffers in a mob. - * - * SVGA_CAP_HP_CMD_QUEUE -- - * Enable support for the high priority command queue, and the - * ScreenCopy command. - * - * SVGA_CAP_NO_BB_RESTRICTION -- - * Allow ScreenTargets to be defined without regard to the 32-bpp - * bounding-box memory restrictions. ie: - * - * The summed memory usage of all screens (assuming they were defined as - * 32-bpp) must always be less than the value of the - * SVGA_REG_MAX_PRIMARY_MEM register. - * - * If this cap is not present, the 32-bpp bounding box around all screens - * must additionally be under the value of the SVGA_REG_MAX_PRIMARY_MEM - * register. - * - * If the cap is present, the bounding box restriction is lifted (and only - * the screen-sum limit applies). - * - * (Note that this is a slight lie... there is still a sanity limit on any - * dimension of the topology to be less than SVGA_SCREEN_ROOT_LIMIT, even - * when SVGA_CAP_NO_BB_RESTRICTION is present, but that should be - * large enough to express any possible topology without holes between - * monitors.) - * - * SVGA_CAP_CAP2_REGISTER -- - * If this cap is present, the SVGA_REG_CAP2 register is supported. - */ - -#define SVGA_CAP_NONE 0x00000000 -#define SVGA_CAP_RECT_COPY 0x00000002 -#define SVGA_CAP_CURSOR 0x00000020 -#define SVGA_CAP_CURSOR_BYPASS 0x00000040 -#define SVGA_CAP_CURSOR_BYPASS_2 0x00000080 -#define SVGA_CAP_8BIT_EMULATION 0x00000100 -#define SVGA_CAP_ALPHA_CURSOR 0x00000200 -#define SVGA_CAP_3D 0x00004000 -#define SVGA_CAP_EXTENDED_FIFO 0x00008000 -#define SVGA_CAP_MULTIMON 0x00010000 -#define SVGA_CAP_PITCHLOCK 0x00020000 -#define SVGA_CAP_IRQMASK 0x00040000 -#define SVGA_CAP_DISPLAY_TOPOLOGY 0x00080000 -#define SVGA_CAP_GMR 0x00100000 -#define SVGA_CAP_TRACES 0x00200000 -#define SVGA_CAP_GMR2 0x00400000 -#define SVGA_CAP_SCREEN_OBJECT_2 0x00800000 -#define SVGA_CAP_COMMAND_BUFFERS 0x01000000 -#define SVGA_CAP_DEAD1 0x02000000 -#define SVGA_CAP_CMD_BUFFERS_2 0x04000000 -#define SVGA_CAP_GBOBJECTS 0x08000000 -#define SVGA_CAP_DX 0x10000000 -#define SVGA_CAP_HP_CMD_QUEUE 0x20000000 -#define SVGA_CAP_NO_BB_RESTRICTION 0x40000000 -#define SVGA_CAP_CAP2_REGISTER 0x80000000 - -/* - * The SVGA_REG_CAP2 register is an additional set of SVGA capability bits. - * - * SVGA_CAP2_GROW_OTABLE -- - * Allow the GrowOTable/DXGrowCOTable commands. - * - * SVGA_CAP2_INTRA_SURFACE_COPY -- - * Allow the IntraSurfaceCopy command. - * - * SVGA_CAP2_DX2 -- - * Allow the DefineGBSurface_v3, WholeSurfaceCopy, WriteZeroSurface, and - * HintZeroSurface commands, and the SVGA_REG_GUEST_DRIVER_ID register. - * - * SVGA_CAP2_GB_MEMSIZE_2 -- - * Allow the SVGA_REG_GBOBJECT_MEM_SIZE_KB register. - * - * SVGA_CAP2_SCREENDMA_REG -- - * Allow the SVGA_REG_SCREENDMA register. - * - * SVGA_CAP2_OTABLE_PTDEPTH_2 -- - * Allow 2 level page tables for OTable commands. - * - * SVGA_CAP2_NON_MS_TO_MS_STRETCHBLT -- - * Allow a stretch blt from a non-multisampled surface to a multisampled - * surface. - * - * SVGA_CAP2_CURSOR_MOB -- - * Allow the SVGA_REG_CURSOR_MOBID register. - * - * SVGA_CAP2_MSHINT -- - * Allow the SVGA_REG_MSHINT register. - * - * SVGA_CAP2_DX3 -- - * Allows the DefineGBSurface_v4 command. - * Allows the DXDefineDepthStencilView_v2, DXDefineStreamOutputWithMob, - * and DXBindStreamOutput commands if 3D is also available. - * Allows the DXPredStagingCopy and DXStagingCopy commands if SM41 - * is also available. - * - * SVGA_CAP2_RESERVED -- - * Reserve the last bit for extending the SVGA capabilities to some - * future mechanisms. - */ -#define SVGA_CAP2_NONE 0x00000000 -#define SVGA_CAP2_GROW_OTABLE 0x00000001 -#define SVGA_CAP2_INTRA_SURFACE_COPY 0x00000002 -#define SVGA_CAP2_DX2 0x00000004 -#define SVGA_CAP2_GB_MEMSIZE_2 0x00000008 -#define SVGA_CAP2_SCREENDMA_REG 0x00000010 -#define SVGA_CAP2_OTABLE_PTDEPTH_2 0x00000020 +#pragma pack(push, 1) +typedef struct { + int32 left; + int32 top; + int32 right; + int32 bottom; +} SVGASignedRect; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct { + int32 x; + int32 y; +} SVGASignedPoint; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct { + uint32 x; + uint32 y; +} SVGAUnsignedPoint; +#pragma pack(pop) + +#define SVGA_CAP_NONE 0x00000000 +#define SVGA_CAP_RECT_COPY 0x00000002 +#define SVGA_CAP_CURSOR 0x00000020 +#define SVGA_CAP_CURSOR_BYPASS 0x00000040 +#define SVGA_CAP_CURSOR_BYPASS_2 0x00000080 +#define SVGA_CAP_8BIT_EMULATION 0x00000100 +#define SVGA_CAP_ALPHA_CURSOR 0x00000200 +#define SVGA_CAP_3D 0x00004000 +#define SVGA_CAP_EXTENDED_FIFO 0x00008000 +#define SVGA_CAP_MULTIMON 0x00010000 +#define SVGA_CAP_PITCHLOCK 0x00020000 +#define SVGA_CAP_IRQMASK 0x00040000 +#define SVGA_CAP_DISPLAY_TOPOLOGY 0x00080000 +#define SVGA_CAP_GMR 0x00100000 +#define SVGA_CAP_TRACES 0x00200000 +#define SVGA_CAP_GMR2 0x00400000 +#define SVGA_CAP_SCREEN_OBJECT_2 0x00800000 +#define SVGA_CAP_COMMAND_BUFFERS 0x01000000 +#define SVGA_CAP_DEAD1 0x02000000 +#define SVGA_CAP_CMD_BUFFERS_2 0x04000000 +#define SVGA_CAP_GBOBJECTS 0x08000000 +#define SVGA_CAP_DX 0x10000000 +#define SVGA_CAP_HP_CMD_QUEUE 0x20000000 +#define SVGA_CAP_NO_BB_RESTRICTION 0x40000000 +#define SVGA_CAP_CAP2_REGISTER 0x80000000 + +#define SVGA_CAP2_NONE 0x00000000 +#define SVGA_CAP2_GROW_OTABLE 0x00000001 +#define SVGA_CAP2_INTRA_SURFACE_COPY 0x00000002 +#define SVGA_CAP2_DX2 0x00000004 +#define SVGA_CAP2_GB_MEMSIZE_2 0x00000008 +#define SVGA_CAP2_SCREENDMA_REG 0x00000010 +#define SVGA_CAP2_OTABLE_PTDEPTH_2 0x00000020 #define SVGA_CAP2_NON_MS_TO_MS_STRETCHBLT 0x00000040 -#define SVGA_CAP2_CURSOR_MOB 0x00000080 -#define SVGA_CAP2_MSHINT 0x00000100 -#define SVGA_CAP2_DX3 0x00000400 -#define SVGA_CAP2_RESERVED 0x80000000 - - -/* - * The Guest can optionally read some SVGA device capabilities through - * the backdoor with command BDOOR_CMD_GET_SVGA_CAPABILITIES before - * the SVGA device is initialized. The type of capability the guest - * is requesting from the SVGABackdoorCapType enum should be placed in - * the upper 16 bits of the backdoor command id (ECX). On success the - * the value of EBX will be set to BDOOR_MAGIC and EAX will be set to - * the requested capability. If the command is not supported then EBX - * will be left unchanged and EAX will be set to -1. Because it is - * possible that -1 is the value of the requested cap the correct way - * to check if the command was successful is to check if EBX was changed - * to BDOOR_MAGIC making sure to initialize the register to something - * else first. - */ +#define SVGA_CAP2_CURSOR_MOB 0x00000080 +#define SVGA_CAP2_MSHINT 0x00000100 +#define SVGA_CAP2_CB_MAX_SIZE_4MB 0x00000200 +#define SVGA_CAP2_DX3 0x00000400 +#define SVGA_CAP2_FRAME_TYPE 0x00000800 +#define SVGA_CAP2_COTABLE_COPY 0x00001000 +#define SVGA_CAP2_TRACE_FULL_FB 0x00002000 +#define SVGA_CAP2_EXTRA_REGS 0x00004000 +#define SVGA_CAP2_LO_STAGING 0x00008000 +#define SVGA_CAP2_RESERVED 0x80000000 typedef enum { - SVGABackdoorCapDeviceCaps = 0, - SVGABackdoorCapFifoCaps = 1, - SVGABackdoorCap3dHWVersion = 2, - SVGABackdoorCapDeviceCaps2 = 3, - SVGABackdoorCapDevelCaps = 4, - SVGABackdoorDevelRenderer = 5, - SVGABackdoorCapMax = 6, + SVGABackdoorCapDeviceCaps = 0, + SVGABackdoorCapFifoCaps = 1, + SVGABackdoorCap3dHWVersion = 2, + SVGABackdoorCapDeviceCaps2 = 3, + SVGABackdoorCapDevelCaps = 4, + SVGABackdoorDevelRenderer = 5, + SVGABackdoorDevelUsingISB = 6, + SVGABackdoorCapMax = 7, } SVGABackdoorCapType; - -/* - * FIFO register indices. - * - * The FIFO is a chunk of device memory mapped into guest physmem. It - * is always treated as 32-bit words. - * - * The guest driver gets to decide how to partition it between - * - FIFO registers (there are always at least 4, specifying where the - * following data area is and how much data it contains; there may be - * more registers following these, depending on the FIFO protocol - * version in use) - * - FIFO data, written by the guest and slurped out by the VMX. - * These indices are 32-bit word offsets into the FIFO. - */ - enum { - /* - * Block 1 (basic registers): The originally defined FIFO registers. - * These exist and are valid for all versions of the FIFO protocol. - */ - - SVGA_FIFO_MIN = 0, - SVGA_FIFO_MAX, /* The distance from MIN to MAX must be at least 10K */ - SVGA_FIFO_NEXT_CMD, - SVGA_FIFO_STOP, - - /* - * Block 2 (extended registers): Mandatory registers for the extended - * FIFO. These exist if the SVGA caps register includes - * SVGA_CAP_EXTENDED_FIFO; some of them are valid only if their - * associated capability bit is enabled. - * - * Note that when originally defined, SVGA_CAP_EXTENDED_FIFO implied - * support only for (FIFO registers) CAPABILITIES, FLAGS, and FENCE. - * This means that the guest has to test individually (in most cases - * using FIFO caps) for the presence of registers after this; the VMX - * can define "extended FIFO" to mean whatever it wants, and currently - * won't enable it unless there's room for that set and much more. - */ - - SVGA_FIFO_CAPABILITIES = 4, - SVGA_FIFO_FLAGS, - /* Valid with SVGA_FIFO_CAP_FENCE: */ - SVGA_FIFO_FENCE, - - /* - * Block 3a (optional extended registers): Additional registers for the - * extended FIFO, whose presence isn't actually implied by - * SVGA_CAP_EXTENDED_FIFO; these exist if SVGA_FIFO_MIN is high enough to - * leave room for them. - * - * These in block 3a, the VMX currently considers mandatory for the - * extended FIFO. - */ - - /* Valid if exists (i.e. if extended FIFO enabled): */ - SVGA_FIFO_3D_HWVERSION, /* See SVGA3dHardwareVersion in svga3d_reg.h */ - /* Valid with SVGA_FIFO_CAP_PITCHLOCK: */ - SVGA_FIFO_PITCHLOCK, - - /* Valid with SVGA_FIFO_CAP_CURSOR_BYPASS_3: */ - SVGA_FIFO_CURSOR_ON, /* Cursor bypass 3 show/hide register */ - SVGA_FIFO_CURSOR_X, /* Cursor bypass 3 x register */ - SVGA_FIFO_CURSOR_Y, /* Cursor bypass 3 y register */ - SVGA_FIFO_CURSOR_COUNT, /* Incremented when any of the other 3 change */ - SVGA_FIFO_CURSOR_LAST_UPDATED,/* Last time the host updated the cursor */ - - /* Valid with SVGA_FIFO_CAP_RESERVE: */ - SVGA_FIFO_RESERVED, /* Bytes past NEXT_CMD with real contents */ - - /* - * Valid with SVGA_FIFO_CAP_SCREEN_OBJECT or SVGA_FIFO_CAP_SCREEN_OBJECT_2: - * - * By default this is SVGA_ID_INVALID, to indicate that the cursor - * coordinates are specified relative to the virtual root. If this - * is set to a specific screen ID, cursor position is reinterpreted - * as a signed offset relative to that screen's origin. - */ - SVGA_FIFO_CURSOR_SCREEN_ID, - - /* - * Valid with SVGA_FIFO_CAP_DEAD - * - * An arbitrary value written by the host, drivers should not use it. - */ - SVGA_FIFO_DEAD, - - /* - * Valid with SVGA_FIFO_CAP_3D_HWVERSION_REVISED: - * - * Contains 3D HWVERSION (see SVGA3dHardwareVersion in svga3d_reg.h) - * on platforms that can enforce graphics resource limits. - */ - SVGA_FIFO_3D_HWVERSION_REVISED, - - /* - * XXX: The gap here, up until SVGA_FIFO_3D_CAPS, can be used for new - * registers, but this must be done carefully and with judicious use of - * capability bits, since comparisons based on SVGA_FIFO_MIN aren't - * enough to tell you whether the register exists: we've shipped drivers - * and products that used SVGA_FIFO_3D_CAPS but didn't know about some of - * the earlier ones. The actual order of introduction was: - * - PITCHLOCK - * - 3D_CAPS - * - CURSOR_* (cursor bypass 3) - * - RESERVED - * So, code that wants to know whether it can use any of the - * aforementioned registers, or anything else added after PITCHLOCK and - * before 3D_CAPS, needs to reason about something other than - * SVGA_FIFO_MIN. - */ - - /* - * 3D caps block space; valid with 3D hardware version >= - * SVGA3D_HWVERSION_WS6_B1. - */ - SVGA_FIFO_3D_CAPS = 32, - SVGA_FIFO_3D_CAPS_LAST = 32 + 255, - - /* - * End of VMX's current definition of "extended-FIFO registers". - * Registers before here are always enabled/disabled as a block; either - * the extended FIFO is enabled and includes all preceding registers, or - * it's disabled entirely. - * - * Block 3b (truly optional extended registers): Additional registers for - * the extended FIFO, which the VMX already knows how to enable and - * disable with correct granularity. - * - * Registers after here exist if and only if the guest SVGA driver - * sets SVGA_FIFO_MIN high enough to leave room for them. - */ - - /* Valid if register exists: */ - SVGA_FIFO_GUEST_3D_HWVERSION, /* Guest driver's 3D version */ - SVGA_FIFO_FENCE_GOAL, /* Matching target for SVGA_IRQFLAG_FENCE_GOAL */ - SVGA_FIFO_BUSY, /* See "FIFO Synchronization Registers" */ - - /* - * Always keep this last. This defines the maximum number of - * registers we know about. At power-on, this value is placed in - * the SVGA_REG_MEM_REGS register, and we expect the guest driver - * to allocate this much space in FIFO memory for registers. - */ - SVGA_FIFO_NUM_REGS -}; + SVGA_FIFO_MIN = 0, + SVGA_FIFO_MAX, + SVGA_FIFO_NEXT_CMD, + SVGA_FIFO_STOP, -/* - * Definition of registers included in extended FIFO support. - * - * The guest SVGA driver gets to allocate the FIFO between registers - * and data. It must always allocate at least 4 registers, but old - * drivers stopped there. - * - * The VMX will enable extended FIFO support if and only if the guest - * left enough room for all registers defined as part of the mandatory - * set for the extended FIFO. - * - * Note that the guest drivers typically allocate the FIFO only at - * initialization time, not at mode switches, so it's likely that the - * number of FIFO registers won't change without a reboot. - * - * All registers less than this value are guaranteed to be present if - * svgaUser->fifo.extended is set. Any later registers must be tested - * individually for compatibility at each use (in the VMX). - * - * This value is used only by the VMX, so it can change without - * affecting driver compatibility; keep it that way? - */ -#define SVGA_FIFO_EXTENDED_MANDATORY_REGS (SVGA_FIFO_3D_CAPS_LAST + 1) + SVGA_FIFO_CAPABILITIES = 4, + SVGA_FIFO_FLAGS, + SVGA_FIFO_FENCE, -/* - * FIFO Synchronization Registers - * - * SVGA_REG_SYNC -- - * - * The SYNC register can be used by the guest driver to signal to the - * device that the guest driver is waiting for previously submitted - * commands to complete. - * - * When the guest driver writes to the SYNC register, the device sets - * the BUSY register to TRUE, and starts processing the submitted commands - * (if it was not already doing so). When all previously submitted - * commands are finished and the device is idle again, it sets the BUSY - * register back to FALSE. (If the guest driver submits new commands - * after writing the SYNC register, the new commands are not guaranteed - * to have been procesesd.) - * - * When guest drivers are submitting commands using the FIFO, the device - * periodically polls to check for new FIFO commands when idle, which may - * introduce a delay in command processing. If the guest-driver wants - * the commands to be processed quickly (which it typically does), it - * should write SYNC after each batch of commands is committed to the - * FIFO to immediately wake up the device. For even better performance, - * the guest can use the SVGA_FIFO_BUSY register to avoid these extra - * SYNC writes if the device is already active, using the technique known - * as "Ringing the Doorbell" (described below). (Note that command - * buffer submission implicitly wakes up the device, and so doesn't - * suffer from this problem.) - * - * The SYNC register can also be used in combination with BUSY to - * synchronously ensure that all SVGA commands are processed (with both - * the FIFO and command-buffers). To do this, the guest driver should - * write to SYNC, and then loop reading BUSY until BUSY returns FALSE. - * This technique is known as a "Legacy Sync". - * - * SVGA_REG_BUSY -- - * - * This register is set to TRUE when SVGA_REG_SYNC is written, - * and is set back to FALSE when the device has finished processing - * all commands and is idle again. - * - * Every read from the BUSY reigster will block for an undefined - * amount of time (normally until the device finishes some interesting - * work unit), or the device is idle. - * - * Guest drivers can also do a partial Legacy Sync to check for some - * particular condition, for instance by stopping early when a fence - * passes before BUSY has been set back to FALSE. This is particularly - * useful if the guest-driver knows that it is blocked waiting on the - * device, because it will yield CPU time back to the host. - * - * SVGA_FIFO_BUSY -- - * - * The SVGA_FIFO_BUSY register is a fast way for the guest driver to check - * whether the device is actively processing FIFO commands before writing - * the more expensive SYNC register. - * - * If this register reads as TRUE, the device is actively processing - * FIFO commands. - * - * If this register reads as FALSE, the device may not be actively - * processing commands, and the guest driver should try - * "Ringing the Doorbell". - * - * To Ring the Doorbell, the guest should: - * - * 1. Have already written their batch of commands into the FIFO. - * 2. Check if the SVGA_FIFO_BUSY register is available by reading - * SVGA_FIFO_MIN. - * 3. Read SVGA_FIFO_BUSY. If it reads as TRUE, the device is actively - * processing FIFO commands, and no further action is necessary. - * 4. If SVGA_FIFO_BUSY was FALSE, write TRUE to SVGA_REG_SYNC. - * - * For maximum performance, this procedure should be followed after - * every meaningful batch of commands has been written into the FIFO. - * (Normally when the underlying application signals it's finished a - * meaningful work unit by calling Flush.) - */ + SVGA_FIFO_3D_HWVERSION, + SVGA_FIFO_PITCHLOCK, -/* - * FIFO Capabilities - * - * Fence -- Fence register and command are supported - * Accel Front -- Front buffer only commands are supported - * Pitch Lock -- Pitch lock register is supported - * Video -- SVGA Video overlay units are supported - * Escape -- Escape command is supported - * - * SVGA_FIFO_CAP_SCREEN_OBJECT -- - * - * Provides dynamic multi-screen rendering, for improved Unity and - * multi-monitor modes. With Screen Object, the guest can - * dynamically create and destroy 'screens', which can represent - * Unity windows or virtual monitors. Screen Object also provides - * strong guarantees that DMA operations happen only when - * guest-initiated. Screen Object deprecates the BAR1 guest - * framebuffer (GFB) and all commands that work only with the GFB. - * - * New registers: - * FIFO_CURSOR_SCREEN_ID, VIDEO_DATA_GMRID, VIDEO_DST_SCREEN_ID - * - * New 2D commands: - * DEFINE_SCREEN, DESTROY_SCREEN, DEFINE_GMRFB, BLIT_GMRFB_TO_SCREEN, - * BLIT_SCREEN_TO_GMRFB, ANNOTATION_FILL, ANNOTATION_COPY - * - * New 3D commands: - * BLIT_SURFACE_TO_SCREEN - * - * New guarantees: - * - * - The host will not read or write guest memory, including the GFB, - * except when explicitly initiated by a DMA command. - * - * - All DMA, including legacy DMA like UPDATE and PRESENT_READBACK, - * is guaranteed to complete before any subsequent FENCEs. - * - * - All legacy commands which affect a Screen (UPDATE, PRESENT, - * PRESENT_READBACK) as well as new Screen blit commands will - * all behave consistently as blits, and memory will be read - * or written in FIFO order. - * - * For example, if you PRESENT from one SVGA3D surface to multiple - * places on the screen, the data copied will always be from the - * SVGA3D surface at the time the PRESENT was issued in the FIFO. - * This was not necessarily true on devices without Screen Object. - * - * This means that on devices that support Screen Object, the - * PRESENT_READBACK command should not be necessary unless you - * actually want to read back the results of 3D rendering into - * system memory. (And for that, the BLIT_SCREEN_TO_GMRFB - * command provides a strict superset of functionality.) - * - * - When a screen is resized, either using Screen Object commands or - * legacy multimon registers, its contents are preserved. - * - * SVGA_FIFO_CAP_GMR2 -- - * - * Provides new commands to define and remap guest memory regions (GMR). - * - * New 2D commands: - * DEFINE_GMR2, REMAP_GMR2. - * - * SVGA_FIFO_CAP_3D_HWVERSION_REVISED -- - * - * Indicates new register SVGA_FIFO_3D_HWVERSION_REVISED exists. - * This register may replace SVGA_FIFO_3D_HWVERSION on platforms - * that enforce graphics resource limits. This allows the platform - * to clear SVGA_FIFO_3D_HWVERSION and disable 3D in legacy guest - * drivers that do not limit their resources. - * - * Note this is an alias to SVGA_FIFO_CAP_GMR2 because these indicators - * are codependent (and thus we use a single capability bit). - * - * SVGA_FIFO_CAP_SCREEN_OBJECT_2 -- - * - * Modifies the DEFINE_SCREEN command to include a guest provided - * backing store in GMR memory and the bytesPerLine for the backing - * store. This capability requires the use of a backing store when - * creating screen objects. However if SVGA_FIFO_CAP_SCREEN_OBJECT - * is present then backing stores are optional. - * - * SVGA_FIFO_CAP_DEAD -- - * - * Drivers should not use this cap bit. This cap bit can not be - * reused since some hosts already expose it. - */ + SVGA_FIFO_CURSOR_ON, + SVGA_FIFO_CURSOR_X, + SVGA_FIFO_CURSOR_Y, + SVGA_FIFO_CURSOR_COUNT, + SVGA_FIFO_CURSOR_LAST_UPDATED, -#define SVGA_FIFO_CAP_NONE 0 -#define SVGA_FIFO_CAP_FENCE (1<<0) -#define SVGA_FIFO_CAP_ACCELFRONT (1<<1) -#define SVGA_FIFO_CAP_PITCHLOCK (1<<2) -#define SVGA_FIFO_CAP_VIDEO (1<<3) -#define SVGA_FIFO_CAP_CURSOR_BYPASS_3 (1<<4) -#define SVGA_FIFO_CAP_ESCAPE (1<<5) -#define SVGA_FIFO_CAP_RESERVE (1<<6) -#define SVGA_FIFO_CAP_SCREEN_OBJECT (1<<7) -#define SVGA_FIFO_CAP_GMR2 (1<<8) -#define SVGA_FIFO_CAP_3D_HWVERSION_REVISED SVGA_FIFO_CAP_GMR2 -#define SVGA_FIFO_CAP_SCREEN_OBJECT_2 (1<<9) -#define SVGA_FIFO_CAP_DEAD (1<<10) + SVGA_FIFO_RESERVED, + SVGA_FIFO_CURSOR_SCREEN_ID, -/* - * FIFO Flags - * - * Accel Front -- Driver should use front buffer only commands - */ + SVGA_FIFO_DEAD, -#define SVGA_FIFO_FLAG_NONE 0 -#define SVGA_FIFO_FLAG_ACCELFRONT (1<<0) -#define SVGA_FIFO_FLAG_RESERVED (1<<31) /* Internal use only */ + SVGA_FIFO_3D_HWVERSION_REVISED, -/* - * FIFO reservation sentinel value - */ + SVGA_FIFO_3D_CAPS = 32, + SVGA_FIFO_3D_CAPS_LAST = 32 + 255, -#define SVGA_FIFO_RESERVED_UNKNOWN 0xffffffff + SVGA_FIFO_GUEST_3D_HWVERSION, + SVGA_FIFO_FENCE_GOAL, + SVGA_FIFO_BUSY, + SVGA_FIFO_NUM_REGS +}; -/* - * ScreenDMA Register Values - */ +#define SVGA_FIFO_3D_CAPS_SIZE (SVGA_FIFO_3D_CAPS_LAST - SVGA_FIFO_3D_CAPS + 1) -#define SVGA_SCREENDMA_REG_UNDEFINED 0 -#define SVGA_SCREENDMA_REG_NOT_PRESENT 1 -#define SVGA_SCREENDMA_REG_PRESENT 2 -#define SVGA_SCREENDMA_REG_MAX 3 +#define SVGA3D_FIFO_CAPS_RECORD_DEVCAPS 0x100 +typedef uint32 SVGA3dFifoCapsRecordType; -/* - * Video overlay support - */ +typedef uint32 SVGA3dFifoCapPair[2]; -#define SVGA_NUM_OVERLAY_UNITS 32 +#pragma pack(push, 1) +typedef struct SVGA3dFifoCapsRecordHeader { + uint32 length; + SVGA3dFifoCapsRecordType type; +} SVGA3dFifoCapsRecordHeader; +#pragma pack(pop) -/* - * Video capabilities that the guest is currently using - */ +#define SVGA_FIFO_EXTENDED_MANDATORY_REGS (SVGA_FIFO_3D_CAPS_LAST + 1) -#define SVGA_VIDEO_FLAG_COLORKEY 0x0001 +#define SVGA_FIFO_CAP_NONE 0 +#define SVGA_FIFO_CAP_FENCE (1 << 0) +#define SVGA_FIFO_CAP_ACCELFRONT (1 << 1) +#define SVGA_FIFO_CAP_PITCHLOCK (1 << 2) +#define SVGA_FIFO_CAP_VIDEO (1 << 3) +#define SVGA_FIFO_CAP_CURSOR_BYPASS_3 (1 << 4) +#define SVGA_FIFO_CAP_ESCAPE (1 << 5) +#define SVGA_FIFO_CAP_RESERVE (1 << 6) +#define SVGA_FIFO_CAP_SCREEN_OBJECT (1 << 7) +#define SVGA_FIFO_CAP_GMR2 (1 << 8) +#define SVGA_FIFO_CAP_3D_HWVERSION_REVISED SVGA_FIFO_CAP_GMR2 +#define SVGA_FIFO_CAP_SCREEN_OBJECT_2 (1 << 9) +#define SVGA_FIFO_CAP_DEAD (1 << 10) +#define SVGA_FIFO_FLAG_NONE 0 +#define SVGA_FIFO_FLAG_ACCELFRONT (1 << 0) +#define SVGA_FIFO_FLAG_RESERVED (1 << 31) -/* - * Offsets for the video overlay registers - */ +#define SVGA_FIFO_RESERVED_UNKNOWN 0xffffffff -enum { - SVGA_VIDEO_ENABLED = 0, - SVGA_VIDEO_FLAGS, - SVGA_VIDEO_DATA_OFFSET, - SVGA_VIDEO_FORMAT, - SVGA_VIDEO_COLORKEY, - SVGA_VIDEO_SIZE, /* Deprecated */ - SVGA_VIDEO_WIDTH, - SVGA_VIDEO_HEIGHT, - SVGA_VIDEO_SRC_X, - SVGA_VIDEO_SRC_Y, - SVGA_VIDEO_SRC_WIDTH, - SVGA_VIDEO_SRC_HEIGHT, - SVGA_VIDEO_DST_X, /* Signed int32 */ - SVGA_VIDEO_DST_Y, /* Signed int32 */ - SVGA_VIDEO_DST_WIDTH, - SVGA_VIDEO_DST_HEIGHT, - SVGA_VIDEO_PITCH_1, - SVGA_VIDEO_PITCH_2, - SVGA_VIDEO_PITCH_3, - SVGA_VIDEO_DATA_GMRID, /* Optional, defaults to SVGA_GMR_FRAMEBUFFER */ - SVGA_VIDEO_DST_SCREEN_ID, /* Optional, defaults to virtual coords */ - /* (SVGA_ID_INVALID) */ - SVGA_VIDEO_NUM_REGS -}; +#define SVGA_SCREENDMA_REG_UNDEFINED 0 +#define SVGA_SCREENDMA_REG_NOT_PRESENT 1 +#define SVGA_SCREENDMA_REG_PRESENT 2 +#define SVGA_SCREENDMA_REG_MAX 3 +#define SVGA_NUM_OVERLAY_UNITS 32 -/* - * SVGA Overlay Units - * - * width and height relate to the entire source video frame. - * srcX, srcY, srcWidth and srcHeight represent subset of the source - * video frame to be displayed. - */ +#define SVGA_VIDEO_FLAG_COLORKEY 0x0001 -typedef -#include "vmware_pack_begin.h" -struct SVGAOverlayUnit { - uint32 enabled; - uint32 flags; - uint32 dataOffset; - uint32 format; - uint32 colorKey; - uint32 size; - uint32 width; - uint32 height; - uint32 srcX; - uint32 srcY; - uint32 srcWidth; - uint32 srcHeight; - int32 dstX; - int32 dstY; - uint32 dstWidth; - uint32 dstHeight; - uint32 pitches[3]; - uint32 dataGMRId; - uint32 dstScreenId; -} -#include "vmware_pack_end.h" -SVGAOverlayUnit; +enum { + SVGA_VIDEO_ENABLED = 0, + SVGA_VIDEO_FLAGS, + SVGA_VIDEO_DATA_OFFSET, + SVGA_VIDEO_FORMAT, + SVGA_VIDEO_COLORKEY, + SVGA_VIDEO_SIZE, + SVGA_VIDEO_WIDTH, + SVGA_VIDEO_HEIGHT, + SVGA_VIDEO_SRC_X, + SVGA_VIDEO_SRC_Y, + SVGA_VIDEO_SRC_WIDTH, + SVGA_VIDEO_SRC_HEIGHT, + SVGA_VIDEO_DST_X, + SVGA_VIDEO_DST_Y, + SVGA_VIDEO_DST_WIDTH, + SVGA_VIDEO_DST_HEIGHT, + SVGA_VIDEO_PITCH_1, + SVGA_VIDEO_PITCH_2, + SVGA_VIDEO_PITCH_3, + SVGA_VIDEO_DATA_GMRID, + SVGA_VIDEO_DST_SCREEN_ID, + SVGA_VIDEO_NUM_REGS +}; +#pragma pack(push, 1) +typedef struct SVGAOverlayUnit { + uint32 enabled; + uint32 flags; + uint32 dataOffset; + uint32 format; + uint32 colorKey; + uint32 size; + uint32 width; + uint32 height; + uint32 srcX; + uint32 srcY; + uint32 srcWidth; + uint32 srcHeight; + int32 dstX; + int32 dstY; + uint32 dstWidth; + uint32 dstHeight; + uint32 pitches[3]; + uint32 dataGMRId; + uint32 dstScreenId; +} SVGAOverlayUnit; +#pragma pack(pop) -/* - * Guest display topology - * - * XXX: This structure is not part of the SVGA device's interface, and - * doesn't really belong here. - */ #define SVGA_INVALID_DISPLAY_ID ((uint32)-1) typedef struct SVGADisplayTopology { - uint16 displayId; - uint16 isPrimary; - uint32 width; - uint32 height; - uint32 positionX; - uint32 positionY; + uint16 displayId; + uint16 isPrimary; + uint32 width; + uint32 height; + uint32 positionX; + uint32 positionY; } SVGADisplayTopology; - -/* - * SVGAScreenObject -- - * - * This is a new way to represent a guest's multi-monitor screen or - * Unity window. Screen objects are only supported if the - * SVGA_FIFO_CAP_SCREEN_OBJECT capability bit is set. - * - * If Screen Objects are supported, they can be used to fully - * replace the functionality provided by the framebuffer registers - * (SVGA_REG_WIDTH, HEIGHT, etc.) and by SVGA_CAP_DISPLAY_TOPOLOGY. - * - * The screen object is a struct with guaranteed binary - * compatibility. New flags can be added, and the struct may grow, - * but existing fields must retain their meaning. - * - * Added with SVGA_FIFO_CAP_SCREEN_OBJECT_2 are required fields of - * a SVGAGuestPtr that is used to back the screen contents. This - * memory must come from the GFB. The guest is not allowed to - * access the memory and doing so will have undefined results. The - * backing store is required to be page aligned and the size is - * padded to the next page boundry. The number of pages is: - * (bytesPerLine * size.width * 4 + PAGE_SIZE - 1) / PAGE_SIZE - * - * The pitch in the backingStore is required to be at least large - * enough to hold a 32bbp scanline. It is recommended that the - * driver pad bytesPerLine for a potential performance win. - * - * The cloneCount field is treated as a hint from the guest that - * the user wants this display to be cloned, countCount times. A - * value of zero means no cloning should happen. - */ - -#define SVGA_SCREEN_MUST_BE_SET (1 << 0) -#define SVGA_SCREEN_HAS_ROOT SVGA_SCREEN_MUST_BE_SET /* Deprecated */ -#define SVGA_SCREEN_IS_PRIMARY (1 << 1) +#define SVGA_SCREEN_MUST_BE_SET (1 << 0) +#define SVGA_SCREEN_HAS_ROOT SVGA_SCREEN_MUST_BE_SET +#define SVGA_SCREEN_IS_PRIMARY (1 << 1) #define SVGA_SCREEN_FULLSCREEN_HINT (1 << 2) -/* - * Added with SVGA_FIFO_CAP_SCREEN_OBJECT_2. When the screen is - * deactivated the base layer is defined to lose all contents and - * become black. When a screen is deactivated the backing store is - * optional. When set backingPtr and bytesPerLine will be ignored. - */ -#define SVGA_SCREEN_DEACTIVATE (1 << 3) +#define SVGA_SCREEN_DEACTIVATE (1 << 3) -/* - * Added with SVGA_FIFO_CAP_SCREEN_OBJECT_2. When this flag is set - * the screen contents will be outputted as all black to the user - * though the base layer contents is preserved. The screen base layer - * can still be read and written to like normal though the no visible - * effect will be seen by the user. When the flag is changed the - * screen will be blanked or redrawn to the current contents as needed - * without any extra commands from the driver. This flag only has an - * effect when the screen is not deactivated. - */ #define SVGA_SCREEN_BLANKING (1 << 4) -typedef -#include "vmware_pack_begin.h" -struct { - uint32 structSize; /* sizeof(SVGAScreenObject) */ - uint32 id; - uint32 flags; - struct { - uint32 width; - uint32 height; - } size; - struct { - int32 x; - int32 y; - } root; - - /* - * Added and required by SVGA_FIFO_CAP_SCREEN_OBJECT_2, optional - * with SVGA_FIFO_CAP_SCREEN_OBJECT. - */ - SVGAGuestImage backingStore; - - /* - * The cloneCount field is treated as a hint from the guest that - * the user wants this display to be cloned, cloneCount times. - * - * A value of zero means no cloning should happen. - */ - uint32 cloneCount; -} -#include "vmware_pack_end.h" -SVGAScreenObject; - - -/* - * Commands in the command FIFO: - * - * Command IDs defined below are used for the traditional 2D FIFO - * communication (not all commands are available for all versions of the - * SVGA FIFO protocol). - * - * Note the holes in the command ID numbers: These commands have been - * deprecated, and the old IDs must not be reused. - * - * Command IDs from 1000 to 2999 are reserved for use by the SVGA3D - * protocol. - * - * Each command's parameters are described by the comments and - * structs below. - */ +#pragma pack(push, 1) +typedef struct { + uint32 structSize; + uint32 id; + uint32 flags; + struct { + uint32 width; + uint32 height; + } size; + struct { + int32 x; + int32 y; + } root; + + SVGAGuestImage backingStore; + + uint32 cloneCount; +} SVGAScreenObject; +#pragma pack(pop) typedef enum { - SVGA_CMD_INVALID_CMD = 0, - SVGA_CMD_UPDATE = 1, - SVGA_CMD_RECT_COPY = 3, - SVGA_CMD_RECT_ROP_COPY = 14, - SVGA_CMD_DEFINE_CURSOR = 19, - SVGA_CMD_DEFINE_ALPHA_CURSOR = 22, - SVGA_CMD_UPDATE_VERBOSE = 25, - SVGA_CMD_FRONT_ROP_FILL = 29, - SVGA_CMD_FENCE = 30, - SVGA_CMD_ESCAPE = 33, - SVGA_CMD_DEFINE_SCREEN = 34, - SVGA_CMD_DESTROY_SCREEN = 35, - SVGA_CMD_DEFINE_GMRFB = 36, - SVGA_CMD_BLIT_GMRFB_TO_SCREEN = 37, - SVGA_CMD_BLIT_SCREEN_TO_GMRFB = 38, - SVGA_CMD_ANNOTATION_FILL = 39, - SVGA_CMD_ANNOTATION_COPY = 40, - SVGA_CMD_DEFINE_GMR2 = 41, - SVGA_CMD_REMAP_GMR2 = 42, - SVGA_CMD_DEAD = 43, - SVGA_CMD_DEAD_2 = 44, - SVGA_CMD_NOP = 45, - SVGA_CMD_NOP_ERROR = 46, - SVGA_CMD_MAX + SVGA_CMD_INVALID_CMD = 0, + SVGA_CMD_UPDATE = 1, + SVGA_CMD_RECT_COPY = 3, + SVGA_CMD_RECT_ROP_COPY = 14, + SVGA_CMD_DEFINE_CURSOR = 19, + SVGA_CMD_DEFINE_ALPHA_CURSOR = 22, + SVGA_CMD_UPDATE_VERBOSE = 25, + SVGA_CMD_FRONT_ROP_FILL = 29, + SVGA_CMD_FENCE = 30, + SVGA_CMD_ESCAPE = 33, + SVGA_CMD_DEFINE_SCREEN = 34, + SVGA_CMD_DESTROY_SCREEN = 35, + SVGA_CMD_DEFINE_GMRFB = 36, + SVGA_CMD_BLIT_GMRFB_TO_SCREEN = 37, + SVGA_CMD_BLIT_SCREEN_TO_GMRFB = 38, + SVGA_CMD_ANNOTATION_FILL = 39, + SVGA_CMD_ANNOTATION_COPY = 40, + SVGA_CMD_DEFINE_GMR2 = 41, + SVGA_CMD_REMAP_GMR2 = 42, + SVGA_CMD_DEAD = 43, + SVGA_CMD_DEAD_2 = 44, + SVGA_CMD_NOP = 45, + SVGA_CMD_NOP_ERROR = 46, + SVGA_CMD_MAX } SVGAFifoCmdId; -#define SVGA_CMD_MAX_DATASIZE (256 * 1024) -#define SVGA_CMD_MAX_ARGS 64 - - -/* - * SVGA_CMD_UPDATE -- - * - * This is a DMA transfer which copies from the Guest Framebuffer - * (GFB) at BAR1 + SVGA_REG_FB_OFFSET to any screens which - * intersect with the provided virtual rectangle. - * - * This command does not support using arbitrary guest memory as a - * data source- it only works with the pre-defined GFB memory. - * This command also does not support signed virtual coordinates. - * If you have defined screens (using SVGA_CMD_DEFINE_SCREEN) with - * negative root x/y coordinates, the negative portion of those - * screens will not be reachable by this command. - * - * This command is not necessary when using framebuffer - * traces. Traces are automatically enabled if the SVGA FIFO is - * disabled, and you may explicitly enable/disable traces using - * SVGA_REG_TRACES. With traces enabled, any write to the GFB will - * automatically act as if a subsequent SVGA_CMD_UPDATE was issued. - * - * Traces and SVGA_CMD_UPDATE are the only supported ways to render - * pseudocolor screen updates. The newer Screen Object commands - * only support true color formats. - * - * Availability: - * Always available. - */ - -typedef -#include "vmware_pack_begin.h" -struct { - uint32 x; - uint32 y; - uint32 width; - uint32 height; -} -#include "vmware_pack_end.h" -SVGAFifoCmdUpdate; - - -/* - * SVGA_CMD_RECT_COPY -- - * - * Perform a rectangular DMA transfer from one area of the GFB to - * another, and copy the result to any screens which intersect it. - * - * Availability: - * SVGA_CAP_RECT_COPY - */ - -typedef -#include "vmware_pack_begin.h" -struct { - uint32 srcX; - uint32 srcY; - uint32 destX; - uint32 destY; - uint32 width; - uint32 height; -} -#include "vmware_pack_end.h" -SVGAFifoCmdRectCopy; - - -/* - * SVGA_CMD_RECT_ROP_COPY -- - * - * Perform a rectangular DMA transfer from one area of the GFB to - * another, and copy the result to any screens which intersect it. - * The value of ROP may only be SVGA_ROP_COPY, and this command is - * only supported for backwards compatibility reasons. - * - * Availability: - * SVGA_CAP_RECT_COPY - */ - -typedef -#include "vmware_pack_begin.h" -struct { - uint32 srcX; - uint32 srcY; - uint32 destX; - uint32 destY; - uint32 width; - uint32 height; - uint32 rop; -} -#include "vmware_pack_end.h" -SVGAFifoCmdRectRopCopy; - - -/* - * SVGA_CMD_DEFINE_CURSOR -- - * - * Provide a new cursor image, as an AND/XOR mask. - * - * The recommended way to position the cursor overlay is by using - * the SVGA_FIFO_CURSOR_* registers, supported by the - * SVGA_FIFO_CAP_CURSOR_BYPASS_3 capability. - * - * Availability: - * SVGA_CAP_CURSOR - */ - -typedef -#include "vmware_pack_begin.h" -struct { - uint32 id; /* Reserved, must be zero. */ - uint32 hotspotX; - uint32 hotspotY; - uint32 width; - uint32 height; - uint32 andMaskDepth; /* Value must be 1 or equal to BITS_PER_PIXEL */ - uint32 xorMaskDepth; /* Value must be 1 or equal to BITS_PER_PIXEL */ - /* - * Followed by scanline data for AND mask, then XOR mask. - * Each scanline is padded to a 32-bit boundary. - */ -} -#include "vmware_pack_end.h" -SVGAFifoCmdDefineCursor; - - -/* - * SVGA_CMD_DEFINE_ALPHA_CURSOR -- - * - * Provide a new cursor image, in 32-bit BGRA format. - * - * The recommended way to position the cursor overlay is by using - * the SVGA_FIFO_CURSOR_* registers, supported by the - * SVGA_FIFO_CAP_CURSOR_BYPASS_3 capability. - * - * Availability: - * SVGA_CAP_ALPHA_CURSOR - */ - -typedef -#include "vmware_pack_begin.h" -struct { - uint32 id; /* Reserved, must be zero. */ - uint32 hotspotX; - uint32 hotspotY; - uint32 width; - uint32 height; - /* Followed by scanline data */ -} -#include "vmware_pack_end.h" -SVGAFifoCmdDefineAlphaCursor; - - -/* - * Provide a new large cursor image, as an AND/XOR mask. - * - * Should only be used for CursorMob functionality - */ - -typedef -#include "vmware_pack_begin.h" -struct { - uint32 hotspotX; - uint32 hotspotY; - uint32 width; - uint32 height; - uint32 andMaskDepth; - uint32 xorMaskDepth; - /* - * Followed by scanline data for AND mask, then XOR mask. - * Each scanline is padded to a 32-bit boundary. - */ -} -#include "vmware_pack_end.h" -SVGAGBColorCursorHeader; - - -/* - * Provide a new large cursor image, in 32-bit BGRA format. - * - * Should only be used for CursorMob functionality - */ - -typedef -#include "vmware_pack_begin.h" -struct { - uint32 hotspotX; - uint32 hotspotY; - uint32 width; - uint32 height; - /* Followed by scanline data */ -} -#include "vmware_pack_end.h" -SVGAGBAlphaCursorHeader; - - /* - * Define the SVGA guest backed cursor types - */ +#define SVGA_CMD_MAX_DATASIZE (256 * 1024) +#define SVGA_CMD_MAX_ARGS 64 + +#pragma pack(push, 1) +typedef struct { + uint32 x; + uint32 y; + uint32 width; + uint32 height; +} SVGAFifoCmdUpdate; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct { + uint32 srcX; + uint32 srcY; + uint32 destX; + uint32 destY; + uint32 width; + uint32 height; +} SVGAFifoCmdRectCopy; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct { + uint32 srcX; + uint32 srcY; + uint32 destX; + uint32 destY; + uint32 width; + uint32 height; + uint32 rop; +} SVGAFifoCmdRectRopCopy; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct { + uint32 id; + uint32 hotspotX; + uint32 hotspotY; + uint32 width; + uint32 height; + uint32 andMaskDepth; + uint32 xorMaskDepth; + +} SVGAFifoCmdDefineCursor; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct { + uint32 id; + uint32 hotspotX; + uint32 hotspotY; + uint32 width; + uint32 height; + +} SVGAFifoCmdDefineAlphaCursor; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct { + uint32 hotspotX; + uint32 hotspotY; + uint32 width; + uint32 height; + uint32 andMaskDepth; + uint32 xorMaskDepth; + +} SVGAGBColorCursorHeader; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct { + uint32 hotspotX; + uint32 hotspotY; + uint32 width; + uint32 height; + +} SVGAGBAlphaCursorHeader; +#pragma pack(pop) typedef enum { - SVGA_COLOR_CURSOR = 0, - SVGA_ALPHA_CURSOR = 1, + SVGA_COLOR_CURSOR = 0, + SVGA_ALPHA_CURSOR = 1, } SVGAGBCursorType; -/* - * Provide a new large cursor image. - * - * Should only be used for CursorMob functionality - */ - -typedef -#include "vmware_pack_begin.h" -struct { - SVGAGBCursorType type; - union { - SVGAGBColorCursorHeader colorHeader; - SVGAGBAlphaCursorHeader alphaHeader; - } header; - uint32 sizeInBytes; - /* - * Followed by the cursor data - */ -} -#include "vmware_pack_end.h" -SVGAGBCursorHeader; - - -/* - * SVGA_CMD_UPDATE_VERBOSE -- - * - * Just like SVGA_CMD_UPDATE, but also provide a per-rectangle - * 'reason' value, an opaque cookie which is used by internal - * debugging tools. Third party drivers should not use this - * command. - * - * Availability: - * SVGA_CAP_EXTENDED_FIFO - */ - -typedef -#include "vmware_pack_begin.h" -struct { - uint32 x; - uint32 y; - uint32 width; - uint32 height; - uint32 reason; -} -#include "vmware_pack_end.h" -SVGAFifoCmdUpdateVerbose; - - -/* - * SVGA_CMD_FRONT_ROP_FILL -- - * - * This is a hint which tells the SVGA device that the driver has - * just filled a rectangular region of the GFB with a solid - * color. Instead of reading these pixels from the GFB, the device - * can assume that they all equal 'color'. This is primarily used - * for remote desktop protocols. - * - * Availability: - * SVGA_FIFO_CAP_ACCELFRONT - */ - -#define SVGA_ROP_COPY 0x03 - -typedef -#include "vmware_pack_begin.h" -struct { - uint32 color; /* In the same format as the GFB */ - uint32 x; - uint32 y; - uint32 width; - uint32 height; - uint32 rop; /* Must be SVGA_ROP_COPY */ -} -#include "vmware_pack_end.h" -SVGAFifoCmdFrontRopFill; - - -/* - * SVGA_CMD_FENCE -- - * - * Insert a synchronization fence. When the SVGA device reaches - * this command, it will copy the 'fence' value into the - * SVGA_FIFO_FENCE register. It will also compare the fence against - * SVGA_FIFO_FENCE_GOAL. If the fence matches the goal and the - * SVGA_IRQFLAG_FENCE_GOAL interrupt is enabled, the device will - * raise this interrupt. - * - * Availability: - * SVGA_FIFO_FENCE for this command, - * SVGA_CAP_IRQMASK for SVGA_FIFO_FENCE_GOAL. - */ - -typedef -#include "vmware_pack_begin.h" -struct { - uint32 fence; -} -#include "vmware_pack_end.h" -SVGAFifoCmdFence; - - -/* - * SVGA_CMD_ESCAPE -- - * - * Send an extended or vendor-specific variable length command. - * This is used for video overlay, third party plugins, and - * internal debugging tools. See svga_escape.h - * - * Availability: - * SVGA_FIFO_CAP_ESCAPE - */ - -typedef -#include "vmware_pack_begin.h" -struct { - uint32 nsid; - uint32 size; - /* followed by 'size' bytes of data */ -} -#include "vmware_pack_end.h" -SVGAFifoCmdEscape; - - -/* - * SVGA_CMD_DEFINE_SCREEN -- - * - * Define or redefine an SVGAScreenObject. See the description of - * SVGAScreenObject above. The video driver is responsible for - * generating new screen IDs. They should be small positive - * integers. The virtual device will have an implementation - * specific upper limit on the number of screen IDs - * supported. Drivers are responsible for recycling IDs. The first - * valid ID is zero. - * - * - Interaction with other registers: - * - * For backwards compatibility, when the GFB mode registers (WIDTH, - * HEIGHT, PITCHLOCK, BITS_PER_PIXEL) are modified, the SVGA device - * deletes all screens other than screen #0, and redefines screen - * #0 according to the specified mode. Drivers that use - * SVGA_CMD_DEFINE_SCREEN should destroy or redefine screen #0. - * - * If you use screen objects, do not use the legacy multi-mon - * registers (SVGA_REG_NUM_GUEST_DISPLAYS, SVGA_REG_DISPLAY_*). - * - * Availability: - * SVGA_FIFO_CAP_SCREEN_OBJECT or SVGA_FIFO_CAP_SCREEN_OBJECT_2 - */ - -typedef -#include "vmware_pack_begin.h" -struct { - SVGAScreenObject screen; /* Variable-length according to version */ -} -#include "vmware_pack_end.h" -SVGAFifoCmdDefineScreen; - - -/* - * SVGA_CMD_DESTROY_SCREEN -- - * - * Destroy an SVGAScreenObject. Its ID is immediately available for - * re-use. - * - * Availability: - * SVGA_FIFO_CAP_SCREEN_OBJECT or SVGA_FIFO_CAP_SCREEN_OBJECT_2 - */ - -typedef -#include "vmware_pack_begin.h" -struct { - uint32 screenId; -} -#include "vmware_pack_end.h" -SVGAFifoCmdDestroyScreen; - - -/* - * SVGA_CMD_DEFINE_GMRFB -- - * - * This command sets a piece of SVGA device state called the - * Guest Memory Region Framebuffer, or GMRFB. The GMRFB is a - * piece of light-weight state which identifies the location and - * format of an image in guest memory or in BAR1. The GMRFB has - * an arbitrary size, and it doesn't need to match the geometry - * of the GFB or any screen object. - * - * The GMRFB can be redefined as often as you like. You could - * always use the same GMRFB, you could redefine it before - * rendering from a different guest screen, or you could even - * redefine it before every blit. - * - * There are multiple ways to use this command. The simplest way is - * to use it to move the framebuffer either to elsewhere in the GFB - * (BAR1) memory region, or to a user-defined GMR. This lets a - * driver use a framebuffer allocated entirely out of normal system - * memory, which we encourage. - * - * Another way to use this command is to set up a ring buffer of - * updates in GFB memory. If a driver wants to ensure that no - * frames are skipped by the SVGA device, it is important that the - * driver not modify the source data for a blit until the device is - * done processing the command. One efficient way to accomplish - * this is to use a ring of small DMA buffers. Each buffer is used - * for one blit, then we move on to the next buffer in the - * ring. The FENCE mechanism is used to protect each buffer from - * re-use until the device is finished with that buffer's - * corresponding blit. - * - * This command does not affect the meaning of SVGA_CMD_UPDATE. - * UPDATEs always occur from the legacy GFB memory area. This - * command has no support for pseudocolor GMRFBs. Currently only - * true-color 15, 16, and 24-bit depths are supported. Future - * devices may expose capabilities for additional framebuffer - * formats. - * - * The default GMRFB value is undefined. Drivers must always send - * this command at least once before performing any blit from the - * GMRFB. - * - * Availability: - * SVGA_FIFO_CAP_SCREEN_OBJECT or SVGA_FIFO_CAP_SCREEN_OBJECT_2 - */ - -typedef -#include "vmware_pack_begin.h" -struct { - SVGAGuestPtr ptr; - uint32 bytesPerLine; - SVGAGMRImageFormat format; -} -#include "vmware_pack_end.h" -SVGAFifoCmdDefineGMRFB; - - -/* - * SVGA_CMD_BLIT_GMRFB_TO_SCREEN -- - * - * This is a guest-to-host blit. It performs a DMA operation to - * copy a rectangular region of pixels from the current GMRFB to - * a ScreenObject. - * - * The destination coordinate may be specified relative to a - * screen's origin. The provided screen ID must be valid. - * - * The SVGA device is guaranteed to finish reading from the GMRFB - * by the time any subsequent FENCE commands are reached. - * - * This command consumes an annotation. See the - * SVGA_CMD_ANNOTATION_* commands for details. - * - * Availability: - * SVGA_FIFO_CAP_SCREEN_OBJECT or SVGA_FIFO_CAP_SCREEN_OBJECT_2 - */ - -typedef -#include "vmware_pack_begin.h" -struct { - SVGASignedPoint srcOrigin; - SVGASignedRect destRect; - uint32 destScreenId; -} -#include "vmware_pack_end.h" -SVGAFifoCmdBlitGMRFBToScreen; - - -/* - * SVGA_CMD_BLIT_SCREEN_TO_GMRFB -- - * - * This is a host-to-guest blit. It performs a DMA operation to - * copy a rectangular region of pixels from a single ScreenObject - * back to the current GMRFB. - * - * The source coordinate is specified relative to a screen's - * origin. The provided screen ID must be valid. If any parameters - * are invalid, the resulting pixel values are undefined. - * - * The SVGA device is guaranteed to finish writing to the GMRFB by - * the time any subsequent FENCE commands are reached. - * - * Availability: - * SVGA_FIFO_CAP_SCREEN_OBJECT or SVGA_FIFO_CAP_SCREEN_OBJECT_2 - */ - -typedef -#include "vmware_pack_begin.h" -struct { - SVGASignedPoint destOrigin; - SVGASignedRect srcRect; - uint32 srcScreenId; -} -#include "vmware_pack_end.h" -SVGAFifoCmdBlitScreenToGMRFB; - - -/* - * SVGA_CMD_ANNOTATION_FILL -- - * - * The annotation commands have been deprecated, should not be used - * by new drivers. They used to provide performance hints to the SVGA - * device about the content of screen updates, but newer SVGA devices - * ignore these. - * - * Availability: - * SVGA_FIFO_CAP_SCREEN_OBJECT or SVGA_FIFO_CAP_SCREEN_OBJECT_2 - */ - -typedef -#include "vmware_pack_begin.h" -struct { - SVGAColorBGRX color; -} -#include "vmware_pack_end.h" -SVGAFifoCmdAnnotationFill; - - -/* - * SVGA_CMD_ANNOTATION_COPY -- - * - * The annotation commands have been deprecated, should not be used - * by new drivers. They used to provide performance hints to the SVGA - * device about the content of screen updates, but newer SVGA devices - * ignore these. - * - * Availability: - * SVGA_FIFO_CAP_SCREEN_OBJECT or SVGA_FIFO_CAP_SCREEN_OBJECT_2 - */ - -typedef -#include "vmware_pack_begin.h" -struct { - SVGASignedPoint srcOrigin; - uint32 srcScreenId; -} -#include "vmware_pack_end.h" -SVGAFifoCmdAnnotationCopy; - - -/* - * SVGA_CMD_DEFINE_GMR2 -- - * - * Define guest memory region v2. See the description of GMRs above. - * - * Availability: - * SVGA_CAP_GMR2 - */ - -typedef -#include "vmware_pack_begin.h" -struct { - uint32 gmrId; - uint32 numPages; -} -#include "vmware_pack_end.h" -SVGAFifoCmdDefineGMR2; - - -/* - * SVGA_CMD_REMAP_GMR2 -- - * - * Remap guest memory region v2. See the description of GMRs above. - * - * This command allows guest to modify a portion of an existing GMR by - * invalidating it or reassigning it to different guest physical pages. - * The pages are identified by physical page number (PPN). The pages - * are assumed to be pinned and valid for DMA operations. - * - * Description of command flags: - * - * SVGA_REMAP_GMR2_VIA_GMR: If enabled, references a PPN list in a GMR. - * The PPN list must not overlap with the remap region (this can be - * handled trivially by referencing a separate GMR). If flag is - * disabled, PPN list is appended to SVGARemapGMR command. - * - * SVGA_REMAP_GMR2_PPN64: If set, PPN list is in PPN64 format, otherwise - * it is in PPN32 format. - * - * SVGA_REMAP_GMR2_SINGLE_PPN: If set, PPN list contains a single entry. - * A single PPN can be used to invalidate a portion of a GMR or - * map it to to a single guest scratch page. - * - * Availability: - * SVGA_CAP_GMR2 - */ +#pragma pack(push, 1) +typedef struct { + SVGAGBCursorType type; + union { + SVGAGBColorCursorHeader colorHeader; + SVGAGBAlphaCursorHeader alphaHeader; + } header; + uint32 sizeInBytes; + +} SVGAGBCursorHeader; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct { + uint32 x; + uint32 y; + uint32 width; + uint32 height; + uint32 reason; +} SVGAFifoCmdUpdateVerbose; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct { + uint32 color; + uint32 x; + uint32 y; + uint32 width; + uint32 height; + uint32 rop; +} SVGAFifoCmdFrontRopFill; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct { + uint32 fence; +} SVGAFifoCmdFence; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct { + uint32 nsid; + uint32 size; + +} SVGAFifoCmdEscape; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct { + SVGAScreenObject screen; +} SVGAFifoCmdDefineScreen; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct { + uint32 screenId; +} SVGAFifoCmdDestroyScreen; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct { + SVGAGuestPtr ptr; + uint32 bytesPerLine; + SVGAGMRImageFormat format; +} SVGAFifoCmdDefineGMRFB; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct { + SVGASignedPoint srcOrigin; + SVGASignedRect destRect; + uint32 destScreenId; +} SVGAFifoCmdBlitGMRFBToScreen; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct { + SVGASignedPoint destOrigin; + SVGASignedRect srcRect; + uint32 srcScreenId; +} SVGAFifoCmdBlitScreenToGMRFB; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct { + SVGAColorBGRX color; +} SVGAFifoCmdAnnotationFill; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct { + SVGASignedPoint srcOrigin; + uint32 srcScreenId; +} SVGAFifoCmdAnnotationCopy; +#pragma pack(pop) + +#pragma pack(push, 1) +typedef struct { + uint32 gmrId; + uint32 numPages; +} SVGAFifoCmdDefineGMR2; +#pragma pack(pop) typedef enum { - SVGA_REMAP_GMR2_PPN32 = 0, - SVGA_REMAP_GMR2_VIA_GMR = (1 << 0), - SVGA_REMAP_GMR2_PPN64 = (1 << 1), - SVGA_REMAP_GMR2_SINGLE_PPN = (1 << 2), + SVGA_REMAP_GMR2_PPN32 = 0, + SVGA_REMAP_GMR2_VIA_GMR = (1 << 0), + SVGA_REMAP_GMR2_PPN64 = (1 << 1), + SVGA_REMAP_GMR2_SINGLE_PPN = (1 << 2), } SVGARemapGMR2Flags; -typedef -#include "vmware_pack_begin.h" -struct { - uint32 gmrId; - SVGARemapGMR2Flags flags; - uint32 offsetPages; /* offset in pages to begin remap */ - uint32 numPages; /* number of pages to remap */ - /* - * Followed by additional data depending on SVGARemapGMR2Flags. - * - * If flag SVGA_REMAP_GMR2_VIA_GMR is set, single SVGAGuestPtr follows. - * Otherwise an array of page descriptors in PPN32 or PPN64 format - * (according to flag SVGA_REMAP_GMR2_PPN64) follows. If flag - * SVGA_REMAP_GMR2_SINGLE_PPN is set, array contains a single entry. - */ -} -#include "vmware_pack_end.h" -SVGAFifoCmdRemapGMR2; - - -/* - * Size of SVGA device memory such as frame buffer and FIFO. - */ -#define SVGA_VRAM_MIN_SIZE (4 * 640 * 480) /* bytes */ -#define SVGA_VRAM_MIN_SIZE_3D (16 * 1024 * 1024) -#define SVGA_VRAM_MAX_SIZE (128 * 1024 * 1024) -#define SVGA_MEMORY_SIZE_MAX (1024 * 1024 * 1024) -#define SVGA_FIFO_SIZE_MAX (2 * 1024 * 1024) -#define SVGA_GRAPHICS_MEMORY_KB_MIN (32 * 1024) +#pragma pack(push, 1) +typedef struct { + uint32 gmrId; + SVGARemapGMR2Flags flags; + uint32 offsetPages; + uint32 numPages; + +} SVGAFifoCmdRemapGMR2; +#pragma pack(pop) + +#define SVGA_VRAM_MIN_SIZE (4 * 640 * 480) +#define SVGA_VRAM_MIN_SIZE_3D (16 * 1024 * 1024) +#define SVGA_VRAM_MAX_SIZE (128 * 1024 * 1024) +#define SVGA_MEMORY_SIZE_MAX (1024 * 1024 * 1024) +#define SVGA_FIFO_SIZE_MAX (2 * 1024 * 1024) +#define SVGA_GRAPHICS_MEMORY_KB_MIN (32 * 1024) #define SVGA_GRAPHICS_MEMORY_KB_MAX_2GB (2 * 1024 * 1024) #define SVGA_GRAPHICS_MEMORY_KB_MAX_3GB (3 * 1024 * 1024) #define SVGA_GRAPHICS_MEMORY_KB_MAX_4GB (4 * 1024 * 1024) #define SVGA_GRAPHICS_MEMORY_KB_MAX_8GB (8 * 1024 * 1024) #define SVGA_GRAPHICS_MEMORY_KB_DEFAULT (256 * 1024) -#define SVGA_VRAM_SIZE_W2K (64 * 1024 * 1024) /* 64 MB */ +#define SVGA_VRAM_SIZE_W2K (64 * 1024 * 1024) #if defined(VMX86_SERVER) -#define SVGA_VRAM_SIZE (4 * 1024 * 1024) -#define SVGA_VRAM_SIZE_3D (64 * 1024 * 1024) -#define SVGA_FIFO_SIZE (256 * 1024) -#define SVGA_FIFO_SIZE_3D (516 * 1024) -#define SVGA_MEMORY_SIZE_DEFAULT (160 * 1024 * 1024) -#define SVGA_AUTODETECT_DEFAULT FALSE +#define SVGA_VRAM_SIZE (4 * 1024 * 1024) +#define SVGA_VRAM_SIZE_3D (64 * 1024 * 1024) +#define SVGA_FIFO_SIZE (256 * 1024) +#define SVGA_FIFO_SIZE_3D (516 * 1024) +#define SVGA_MEMORY_SIZE_DEFAULT (160 * 1024 * 1024) +#define SVGA_AUTODETECT_DEFAULT FALSE #else -#define SVGA_VRAM_SIZE (16 * 1024 * 1024) -#define SVGA_VRAM_SIZE_3D SVGA_VRAM_MAX_SIZE -#define SVGA_FIFO_SIZE (2 * 1024 * 1024) -#define SVGA_FIFO_SIZE_3D SVGA_FIFO_SIZE -#define SVGA_MEMORY_SIZE_DEFAULT (768 * 1024 * 1024) -#define SVGA_AUTODETECT_DEFAULT TRUE +#define SVGA_VRAM_SIZE (16 * 1024 * 1024) +#define SVGA_VRAM_SIZE_3D SVGA_VRAM_MAX_SIZE +#define SVGA_FIFO_SIZE (2 * 1024 * 1024) +#define SVGA_FIFO_SIZE_3D SVGA_FIFO_SIZE +#define SVGA_MEMORY_SIZE_DEFAULT (768 * 1024 * 1024) +#define SVGA_AUTODETECT_DEFAULT TRUE #endif -#define SVGA_FIFO_SIZE_GBOBJECTS (256 * 1024) -#define SVGA_VRAM_SIZE_GBOBJECTS (4 * 1024 * 1024) - -#define SVGA_PCI_REGS_PAGES (1) +#define SVGA_FIFO_SIZE_GBOBJECTS (256 * 1024) +#define SVGA_VRAM_SIZE_GBOBJECTS (4 * 1024 * 1024) #endif diff --git a/drivers/gpu/drm/vmwgfx/device_include/svga_types.h b/drivers/gpu/drm/vmwgfx/device_include/svga_types.h deleted file mode 100644 index beddccee40f6..000000000000 --- a/drivers/gpu/drm/vmwgfx/device_include/svga_types.h +++ /dev/null @@ -1,51 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 OR MIT */ -/********************************************************** - * Copyright 2015 VMware, Inc. - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - **********************************************************/ -#ifndef _VM_BASIC_TYPES_H_ -#define _VM_BASIC_TYPES_H_ -#include <linux/kernel.h> - -typedef u32 uint32; -typedef s32 int32; -typedef u64 uint64; -typedef u16 uint16; -typedef s16 int16; -typedef u8 uint8; -typedef s8 int8; - -typedef uint64 PA; -typedef uint32 PPN; -typedef uint32 PPN32; -typedef uint64 PPN64; - -typedef bool Bool; - -#define MAX_UINT64 U64_MAX -#define MAX_UINT32 U32_MAX -#define MAX_UINT16 U16_MAX - -#define CONST64U(x) x##ULL - -#endif diff --git a/drivers/gpu/drm/vmwgfx/device_include/vm_basic_types.h b/drivers/gpu/drm/vmwgfx/device_include/vm_basic_types.h index 3a195e8106b3..1f6e3bbc6605 100644 --- a/drivers/gpu/drm/vmwgfx/device_include/vm_basic_types.h +++ b/drivers/gpu/drm/vmwgfx/device_include/vm_basic_types.h @@ -1,7 +1,34 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _VM_BASIC_TYPES_H_ -#define _VM_BASIC_TYPES_H_ +/* SPDX-License-Identifier: GPL-2.0 OR MIT */ +/********************************************************** + * Copyright 2015-2021 VMware, Inc. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + **********************************************************/ +#ifndef VM_BASIC_TYPES_H +#define VM_BASIC_TYPES_H + #include <linux/kernel.h> +#include <linux/mm.h> +#include <asm/page.h> typedef u32 uint32; typedef s32 int32; @@ -13,10 +40,107 @@ typedef s8 int8; typedef uint64 PA; typedef uint32 PPN; +typedef uint32 PPN32; typedef uint64 PPN64; typedef bool Bool; +#define MAX_UINT64 U64_MAX #define MAX_UINT32 U32_MAX +#define MAX_UINT16 U16_MAX + +#define CONST64U(x) x##ULL + +#ifndef MBYTES_SHIFT +#define MBYTES_SHIFT 20 +#endif +#ifndef MBYTES_2_BYTES +#define MBYTES_2_BYTES(_nbytes) ((uint64)(_nbytes) << MBYTES_SHIFT) +#endif + +/* + * MKS Guest Stats types + */ + +typedef struct MKSGuestStatCounter { + atomic64_t count; +} MKSGuestStatCounter; + +typedef struct MKSGuestStatCounterTime { + MKSGuestStatCounter counter; + atomic64_t selfCycles; + atomic64_t totalCycles; +} MKSGuestStatCounterTime; + +/* + * Flags for MKSGuestStatInfoEntry::flags below + */ + +#define MKS_GUEST_STAT_FLAG_NONE 0 +#define MKS_GUEST_STAT_FLAG_TIME (1U << 0) + +typedef __attribute__((aligned(32))) struct MKSGuestStatInfoEntry { + union { + const char *s; + uint64 u; + } name; + union { + const char *s; + uint64 u; + } description; + uint64 flags; + union { + MKSGuestStatCounter *counter; + MKSGuestStatCounterTime *counterTime; + uint64 u; + } stat; +} MKSGuestStatInfoEntry; + +#define INVALID_PPN64 ((PPN64)0x000fffffffffffffULL) + +#define MKS_GUEST_STAT_INSTANCE_DESC_LENGTH 1024 +#define MKS_GUEST_STAT_INSTANCE_MAX_STATS 4096 +#define MKS_GUEST_STAT_INSTANCE_MAX_STAT_PPNS \ + (PFN_UP(MKS_GUEST_STAT_INSTANCE_MAX_STATS * \ + sizeof(MKSGuestStatCounterTime))) +#define MKS_GUEST_STAT_INSTANCE_MAX_INFO_PPNS \ + (PFN_UP(MKS_GUEST_STAT_INSTANCE_MAX_STATS * \ + sizeof(MKSGuestStatInfoEntry))) +#define MKS_GUEST_STAT_AVERAGE_NAME_LENGTH 40 +#define MKS_GUEST_STAT_INSTANCE_MAX_STRS_PPNS \ + (PFN_UP(MKS_GUEST_STAT_INSTANCE_MAX_STATS * \ + MKS_GUEST_STAT_AVERAGE_NAME_LENGTH)) + +/* + * The MKSGuestStatInstanceDescriptor is used as main interface to + * communicate guest stats back to the host code. The guest must + * allocate an instance of this structure at the start of a page and + * provide the physical address to the host. From there the host code + * can walk this structure to find other (pinned) pages containing the + * stats data. + * + * Since the MKSGuestStatInfoEntry structures contain userlevel + * pointers, the InstanceDescriptor also contains pointers to the + * begining of these sections allowing the host side code to correctly + * interpret the pointers. + * + * Because the host side code never acknowledges anything back to the + * guest there is no strict requirement to maintain compatability + * across releases. If the interface changes the host might not be + * able to log stats, but the guest will continue to run normally. + */ + +typedef struct MKSGuestStatInstanceDescriptor { + uint64 reservedMBZ; /* must be zero for now. */ + uint64 statStartVA; /* VA of the start of the stats section. */ + uint64 strsStartVA; /* VA of the start of the strings section. */ + uint64 statLength; /* length of the stats section in bytes. */ + uint64 infoLength; /* length of the info entry section in bytes. */ + uint64 strsLength; /* length of the strings section in bytes. */ + PPN64 statPPNs[MKS_GUEST_STAT_INSTANCE_MAX_STAT_PPNS]; /* stat counters */ + PPN64 infoPPNs[MKS_GUEST_STAT_INSTANCE_MAX_INFO_PPNS]; /* stat info */ + PPN64 strsPPNs[MKS_GUEST_STAT_INSTANCE_MAX_STRS_PPNS]; /* strings */ + char description[MKS_GUEST_STAT_INSTANCE_DESC_LENGTH]; +} MKSGuestStatInstanceDescriptor; #endif diff --git a/drivers/gpu/drm/vmwgfx/device_include/vmware_pack_begin.h b/drivers/gpu/drm/vmwgfx/device_include/vmware_pack_begin.h deleted file mode 100644 index 75308bd0d970..000000000000 --- a/drivers/gpu/drm/vmwgfx/device_include/vmware_pack_begin.h +++ /dev/null @@ -1,2 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#include <linux/compiler.h> diff --git a/drivers/gpu/drm/vmwgfx/device_include/vmware_pack_end.h b/drivers/gpu/drm/vmwgfx/device_include/vmware_pack_end.h deleted file mode 100644 index e93d6f28b68c..000000000000 --- a/drivers/gpu/drm/vmwgfx/device_include/vmware_pack_end.h +++ /dev/null @@ -1,2 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -__packed diff --git a/drivers/gpu/drm/vmwgfx/ttm_memory.c b/drivers/gpu/drm/vmwgfx/ttm_memory.c index aeb0a22a2c34..edd17c30d5a5 100644 --- a/drivers/gpu/drm/vmwgfx/ttm_memory.c +++ b/drivers/gpu/drm/vmwgfx/ttm_memory.c @@ -435,8 +435,10 @@ int ttm_mem_global_init(struct ttm_mem_global *glob, struct device *dev) si_meminfo(&si); + spin_lock(&glob->lock); /* set it as 0 by default to keep original behavior of OOM */ glob->lower_mem_limit = 0; + spin_unlock(&glob->lock); ret = ttm_mem_init_kernel_zone(glob, &si); if (unlikely(ret != 0)) diff --git a/drivers/gpu/drm/vmwgfx/vmw_surface_cache.h b/drivers/gpu/drm/vmwgfx/vmw_surface_cache.h new file mode 100644 index 000000000000..b0d87c5f58d8 --- /dev/null +++ b/drivers/gpu/drm/vmwgfx/vmw_surface_cache.h @@ -0,0 +1,539 @@ +/********************************************************** + * Copyright 2021 VMware, Inc. + * SPDX-License-Identifier: GPL-2.0 OR MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + **********************************************************/ + +#ifndef VMW_SURFACE_CACHE_H +#define VMW_SURFACE_CACHE_H + +#include "device_include/svga3d_surfacedefs.h" + +#include <drm/vmwgfx_drm.h> + +static inline u32 clamped_umul32(u32 a, u32 b) +{ + uint64_t tmp = (uint64_t) a*b; + return (tmp > (uint64_t) ((u32) -1)) ? (u32) -1 : tmp; +} + +/** + * vmw_surface_get_desc - Look up the appropriate SVGA3dSurfaceDesc for the + * given format. + */ +static inline const SVGA3dSurfaceDesc * +vmw_surface_get_desc(SVGA3dSurfaceFormat format) +{ + if (format < ARRAY_SIZE(g_SVGA3dSurfaceDescs)) + return &g_SVGA3dSurfaceDescs[format]; + + return &g_SVGA3dSurfaceDescs[SVGA3D_FORMAT_INVALID]; +} + +/** + * vmw_surface_get_mip_size - Given a base level size and the mip level, + * compute the size of the mip level. + */ +static inline struct drm_vmw_size +vmw_surface_get_mip_size(struct drm_vmw_size base_level, u32 mip_level) +{ + struct drm_vmw_size size = { + .width = max_t(u32, base_level.width >> mip_level, 1), + .height = max_t(u32, base_level.height >> mip_level, 1), + .depth = max_t(u32, base_level.depth >> mip_level, 1) + }; + + return size; +} + +static inline void +vmw_surface_get_size_in_blocks(const SVGA3dSurfaceDesc *desc, + const struct drm_vmw_size *pixel_size, + SVGA3dSize *block_size) +{ + block_size->width = __KERNEL_DIV_ROUND_UP(pixel_size->width, + desc->blockSize.width); + block_size->height = __KERNEL_DIV_ROUND_UP(pixel_size->height, + desc->blockSize.height); + block_size->depth = __KERNEL_DIV_ROUND_UP(pixel_size->depth, + desc->blockSize.depth); +} + +static inline bool +vmw_surface_is_planar_surface(const SVGA3dSurfaceDesc *desc) +{ + return (desc->blockDesc & SVGA3DBLOCKDESC_PLANAR_YUV) != 0; +} + +static inline u32 +vmw_surface_calculate_pitch(const SVGA3dSurfaceDesc *desc, + const struct drm_vmw_size *size) +{ + u32 pitch; + SVGA3dSize blocks; + + vmw_surface_get_size_in_blocks(desc, size, &blocks); + + pitch = blocks.width * desc->pitchBytesPerBlock; + + return pitch; +} + +/** + * vmw_surface_get_image_buffer_size - Calculates image buffer size. + * + * Return the number of bytes of buffer space required to store one image of a + * surface, optionally using the specified pitch. + * + * If pitch is zero, it is assumed that rows are tightly packed. + * + * This function is overflow-safe. If the result would have overflowed, instead + * we return MAX_UINT32. + */ +static inline u32 +vmw_surface_get_image_buffer_size(const SVGA3dSurfaceDesc *desc, + const struct drm_vmw_size *size, + u32 pitch) +{ + SVGA3dSize image_blocks; + u32 slice_size, total_size; + + vmw_surface_get_size_in_blocks(desc, size, &image_blocks); + + if (vmw_surface_is_planar_surface(desc)) { + total_size = clamped_umul32(image_blocks.width, + image_blocks.height); + total_size = clamped_umul32(total_size, image_blocks.depth); + total_size = clamped_umul32(total_size, desc->bytesPerBlock); + return total_size; + } + + if (pitch == 0) + pitch = vmw_surface_calculate_pitch(desc, size); + + slice_size = clamped_umul32(image_blocks.height, pitch); + total_size = clamped_umul32(slice_size, image_blocks.depth); + + return total_size; +} + +/** + * vmw_surface_get_serialized_size - Get the serialized size for the image. + */ +static inline u32 +vmw_surface_get_serialized_size(SVGA3dSurfaceFormat format, + struct drm_vmw_size base_level_size, + u32 num_mip_levels, + u32 num_layers) +{ + const SVGA3dSurfaceDesc *desc = vmw_surface_get_desc(format); + u32 total_size = 0; + u32 mip; + + for (mip = 0; mip < num_mip_levels; mip++) { + struct drm_vmw_size size = + vmw_surface_get_mip_size(base_level_size, mip); + total_size += vmw_surface_get_image_buffer_size(desc, + &size, 0); + } + + return total_size * num_layers; +} + +/** + * vmw_surface_get_serialized_size_extended - Returns the number of bytes + * required for a surface with given parameters. Support for sample count. + */ +static inline u32 +vmw_surface_get_serialized_size_extended(SVGA3dSurfaceFormat format, + struct drm_vmw_size base_level_size, + u32 num_mip_levels, + u32 num_layers, + u32 num_samples) +{ + uint64_t total_size = + vmw_surface_get_serialized_size(format, + base_level_size, + num_mip_levels, + num_layers); + total_size *= max_t(u32, 1, num_samples); + + return min_t(uint64_t, total_size, (uint64_t)U32_MAX); +} + +/** + * vmw_surface_get_pixel_offset - Compute the offset (in bytes) to a pixel + * in an image (or volume). + * + * @width: The image width in pixels. + * @height: The image height in pixels + */ +static inline u32 +vmw_surface_get_pixel_offset(SVGA3dSurfaceFormat format, + u32 width, u32 height, + u32 x, u32 y, u32 z) +{ + const SVGA3dSurfaceDesc *desc = vmw_surface_get_desc(format); + const u32 bw = desc->blockSize.width, bh = desc->blockSize.height; + const u32 bd = desc->blockSize.depth; + const u32 rowstride = __KERNEL_DIV_ROUND_UP(width, bw) * + desc->bytesPerBlock; + const u32 imgstride = __KERNEL_DIV_ROUND_UP(height, bh) * rowstride; + const u32 offset = (z / bd * imgstride + + y / bh * rowstride + + x / bw * desc->bytesPerBlock); + return offset; +} + +static inline u32 +vmw_surface_get_image_offset(SVGA3dSurfaceFormat format, + struct drm_vmw_size baseLevelSize, + u32 numMipLevels, + u32 face, + u32 mip) + +{ + u32 offset; + u32 mipChainBytes; + u32 mipChainBytesToLevel; + u32 i; + const SVGA3dSurfaceDesc *desc; + struct drm_vmw_size mipSize; + u32 bytes; + + desc = vmw_surface_get_desc(format); + + mipChainBytes = 0; + mipChainBytesToLevel = 0; + for (i = 0; i < numMipLevels; i++) { + mipSize = vmw_surface_get_mip_size(baseLevelSize, i); + bytes = vmw_surface_get_image_buffer_size(desc, &mipSize, 0); + mipChainBytes += bytes; + if (i < mip) + mipChainBytesToLevel += bytes; + } + + offset = mipChainBytes * face + mipChainBytesToLevel; + + return offset; +} + + +/** + * vmw_surface_is_gb_screen_target_format - Is the specified format usable as + * a ScreenTarget? + * (with just the GBObjects cap-bit + * set) + * @format: format to queried + * + * RETURNS: + * true if queried format is valid for screen targets + */ +static inline bool +vmw_surface_is_gb_screen_target_format(SVGA3dSurfaceFormat format) +{ + return (format == SVGA3D_X8R8G8B8 || + format == SVGA3D_A8R8G8B8 || + format == SVGA3D_R5G6B5 || + format == SVGA3D_X1R5G5B5 || + format == SVGA3D_A1R5G5B5 || + format == SVGA3D_P8); +} + + +/** + * vmw_surface_is_dx_screen_target_format - Is the specified format usable as + * a ScreenTarget? + * (with DX10 enabled) + * + * @format: format to queried + * + * Results: + * true if queried format is valid for screen targets + */ +static inline bool +vmw_surface_is_dx_screen_target_format(SVGA3dSurfaceFormat format) +{ + return (format == SVGA3D_R8G8B8A8_UNORM || + format == SVGA3D_B8G8R8A8_UNORM || + format == SVGA3D_B8G8R8X8_UNORM); +} + + +/** + * vmw_surface_is_screen_target_format - Is the specified format usable as a + * ScreenTarget? + * (for some combination of caps) + * + * @format: format to queried + * + * Results: + * true if queried format is valid for screen targets + */ +static inline bool +vmw_surface_is_screen_target_format(SVGA3dSurfaceFormat format) +{ + if (vmw_surface_is_gb_screen_target_format(format)) { + return true; + } + return vmw_surface_is_dx_screen_target_format(format); +} + +/** + * struct vmw_surface_mip - Mimpmap level information + * @bytes: Bytes required in the backing store of this mipmap level. + * @img_stride: Byte stride per image. + * @row_stride: Byte stride per block row. + * @size: The size of the mipmap. + */ +struct vmw_surface_mip { + size_t bytes; + size_t img_stride; + size_t row_stride; + struct drm_vmw_size size; + +}; + +/** + * struct vmw_surface_cache - Cached surface information + * @desc: Pointer to the surface descriptor + * @mip: Array of mipmap level information. Valid size is @num_mip_levels. + * @mip_chain_bytes: Bytes required in the backing store for the whole chain + * of mip levels. + * @sheet_bytes: Bytes required in the backing store for a sheet + * representing a single sample. + * @num_mip_levels: Valid size of the @mip array. Number of mipmap levels in + * a chain. + * @num_layers: Number of slices in an array texture or number of faces in + * a cubemap texture. + */ +struct vmw_surface_cache { + const SVGA3dSurfaceDesc *desc; + struct vmw_surface_mip mip[DRM_VMW_MAX_MIP_LEVELS]; + size_t mip_chain_bytes; + size_t sheet_bytes; + u32 num_mip_levels; + u32 num_layers; +}; + +/** + * struct vmw_surface_loc - Surface location + * @sheet: The multisample sheet. + * @sub_resource: Surface subresource. Defined as layer * num_mip_levels + + * mip_level. + * @x: X coordinate. + * @y: Y coordinate. + * @z: Z coordinate. + */ +struct vmw_surface_loc { + u32 sheet; + u32 sub_resource; + u32 x, y, z; +}; + +/** + * vmw_surface_subres - Compute the subresource from layer and mipmap. + * @cache: Surface layout data. + * @mip_level: The mipmap level. + * @layer: The surface layer (face or array slice). + * + * Return: The subresource. + */ +static inline u32 vmw_surface_subres(const struct vmw_surface_cache *cache, + u32 mip_level, u32 layer) +{ + return cache->num_mip_levels * layer + mip_level; +} + +/** + * vmw_surface_setup_cache - Build a surface cache entry + * @size: The surface base level dimensions. + * @format: The surface format. + * @num_mip_levels: Number of mipmap levels. + * @num_layers: Number of layers. + * @cache: Pointer to a struct vmw_surface_cach object to be filled in. + * + * Return: Zero on success, -EINVAL on invalid surface layout. + */ +static inline int vmw_surface_setup_cache(const struct drm_vmw_size *size, + SVGA3dSurfaceFormat format, + u32 num_mip_levels, + u32 num_layers, + u32 num_samples, + struct vmw_surface_cache *cache) +{ + const SVGA3dSurfaceDesc *desc; + u32 i; + + memset(cache, 0, sizeof(*cache)); + cache->desc = desc = vmw_surface_get_desc(format); + cache->num_mip_levels = num_mip_levels; + cache->num_layers = num_layers; + for (i = 0; i < cache->num_mip_levels; i++) { + struct vmw_surface_mip *mip = &cache->mip[i]; + + mip->size = vmw_surface_get_mip_size(*size, i); + mip->bytes = vmw_surface_get_image_buffer_size + (desc, &mip->size, 0); + mip->row_stride = + __KERNEL_DIV_ROUND_UP(mip->size.width, + desc->blockSize.width) * + desc->bytesPerBlock * num_samples; + if (!mip->row_stride) + goto invalid_dim; + + mip->img_stride = + __KERNEL_DIV_ROUND_UP(mip->size.height, + desc->blockSize.height) * + mip->row_stride; + if (!mip->img_stride) + goto invalid_dim; + + cache->mip_chain_bytes += mip->bytes; + } + cache->sheet_bytes = cache->mip_chain_bytes * num_layers; + if (!cache->sheet_bytes) + goto invalid_dim; + + return 0; + +invalid_dim: + VMW_DEBUG_USER("Invalid surface layout for dirty tracking.\n"); + return -EINVAL; +} + +/** + * vmw_surface_get_loc - Get a surface location from an offset into the + * backing store + * @cache: Surface layout data. + * @loc: Pointer to a struct vmw_surface_loc to be filled in. + * @offset: Offset into the surface backing store. + */ +static inline void +vmw_surface_get_loc(const struct vmw_surface_cache *cache, + struct vmw_surface_loc *loc, + size_t offset) +{ + const struct vmw_surface_mip *mip = &cache->mip[0]; + const SVGA3dSurfaceDesc *desc = cache->desc; + u32 layer; + int i; + + loc->sheet = offset / cache->sheet_bytes; + offset -= loc->sheet * cache->sheet_bytes; + + layer = offset / cache->mip_chain_bytes; + offset -= layer * cache->mip_chain_bytes; + for (i = 0; i < cache->num_mip_levels; ++i, ++mip) { + if (mip->bytes > offset) + break; + offset -= mip->bytes; + } + + loc->sub_resource = vmw_surface_subres(cache, i, layer); + loc->z = offset / mip->img_stride; + offset -= loc->z * mip->img_stride; + loc->z *= desc->blockSize.depth; + loc->y = offset / mip->row_stride; + offset -= loc->y * mip->row_stride; + loc->y *= desc->blockSize.height; + loc->x = offset / desc->bytesPerBlock; + loc->x *= desc->blockSize.width; +} + +/** + * vmw_surface_inc_loc - Clamp increment a surface location with one block + * size + * in each dimension. + * @loc: Pointer to a struct vmw_surface_loc to be incremented. + * + * When computing the size of a range as size = end - start, the range does not + * include the end element. However a location representing the last byte + * of a touched region in the backing store *is* included in the range. + * This function modifies such a location to match the end definition + * given as start + size which is the one used in a SVGA3dBox. + */ +static inline void +vmw_surface_inc_loc(const struct vmw_surface_cache *cache, + struct vmw_surface_loc *loc) +{ + const SVGA3dSurfaceDesc *desc = cache->desc; + u32 mip = loc->sub_resource % cache->num_mip_levels; + const struct drm_vmw_size *size = &cache->mip[mip].size; + + loc->sub_resource++; + loc->x += desc->blockSize.width; + if (loc->x > size->width) + loc->x = size->width; + loc->y += desc->blockSize.height; + if (loc->y > size->height) + loc->y = size->height; + loc->z += desc->blockSize.depth; + if (loc->z > size->depth) + loc->z = size->depth; +} + +/** + * vmw_surface_min_loc - The start location in a subresource + * @cache: Surface layout data. + * @sub_resource: The subresource. + * @loc: Pointer to a struct vmw_surface_loc to be filled in. + */ +static inline void +vmw_surface_min_loc(const struct vmw_surface_cache *cache, + u32 sub_resource, + struct vmw_surface_loc *loc) +{ + loc->sheet = 0; + loc->sub_resource = sub_resource; + loc->x = loc->y = loc->z = 0; +} + +/** + * vmw_surface_min_loc - The end location in a subresource + * @cache: Surface layout data. + * @sub_resource: The subresource. + * @loc: Pointer to a struct vmw_surface_loc to be filled in. + * + * Following the end definition given in vmw_surface_inc_loc(), + * Compute the end location of a surface subresource. + */ +static inline void +vmw_surface_max_loc(const struct vmw_surface_cache *cache, + u32 sub_resource, + struct vmw_surface_loc *loc) +{ + const struct drm_vmw_size *size; + u32 mip; + + loc->sheet = 0; + loc->sub_resource = sub_resource + 1; + mip = sub_resource % cache->num_mip_levels; + size = &cache->mip[mip].size; + loc->x = size->width; + loc->y = size->height; + loc->z = size->depth; +} + + +#endif /* VMW_SURFACE_CACHE_H */ diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_binding.c b/drivers/gpu/drm/vmwgfx/vmwgfx_binding.c index 05b324825900..6f27d69bad0e 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_binding.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_binding.c @@ -715,7 +715,7 @@ static int vmw_binding_scrub_cb(struct vmw_ctx_bindinfo *bi, bool rebind) * without checking which bindings actually need to be emitted * * @cbs: Pointer to the context's struct vmw_ctx_binding_state - * @bi: Pointer to where the binding info array is stored in @cbs + * @biv: Pointer to where the binding info array is stored in @cbs * @max_num: Maximum number of entries in the @bi array. * * Scans the @bi array for bindings and builds a buffer of view id data. @@ -725,11 +725,9 @@ static int vmw_binding_scrub_cb(struct vmw_ctx_bindinfo *bi, bool rebind) * contains the command data. */ static void vmw_collect_view_ids(struct vmw_ctx_binding_state *cbs, - const struct vmw_ctx_bindinfo *bi, + const struct vmw_ctx_bindinfo_view *biv, u32 max_num) { - const struct vmw_ctx_bindinfo_view *biv = - container_of(bi, struct vmw_ctx_bindinfo_view, bi); unsigned long i; cbs->bind_cmd_count = 0; @@ -838,7 +836,7 @@ static int vmw_emit_set_sr(struct vmw_ctx_binding_state *cbs, */ static int vmw_emit_set_rt(struct vmw_ctx_binding_state *cbs) { - const struct vmw_ctx_bindinfo *loc = &cbs->render_targets[0].bi; + const struct vmw_ctx_bindinfo_view *loc = &cbs->render_targets[0]; struct { SVGA3dCmdHeader header; SVGA3dCmdDXSetRenderTargets body; @@ -846,7 +844,7 @@ static int vmw_emit_set_rt(struct vmw_ctx_binding_state *cbs) size_t cmd_size, view_id_size; const struct vmw_resource *ctx = vmw_cbs_context(cbs); - vmw_collect_view_ids(cbs, loc, SVGA3D_MAX_SIMULTANEOUS_RENDER_TARGETS); + vmw_collect_view_ids(cbs, loc, SVGA3D_DX_MAX_RENDER_TARGETS); view_id_size = cbs->bind_cmd_count*sizeof(uint32); cmd_size = sizeof(*cmd) + view_id_size; cmd = VMW_CMD_CTX_RESERVE(ctx->dev_priv, cmd_size, ctx->id); @@ -874,7 +872,7 @@ static int vmw_emit_set_rt(struct vmw_ctx_binding_state *cbs) * without checking which bindings actually need to be emitted * * @cbs: Pointer to the context's struct vmw_ctx_binding_state - * @bi: Pointer to where the binding info array is stored in @cbs + * @biso: Pointer to where the binding info array is stored in @cbs * @max_num: Maximum number of entries in the @bi array. * * Scans the @bi array for bindings and builds a buffer of SVGA3dSoTarget data. @@ -884,11 +882,9 @@ static int vmw_emit_set_rt(struct vmw_ctx_binding_state *cbs) * contains the command data. */ static void vmw_collect_so_targets(struct vmw_ctx_binding_state *cbs, - const struct vmw_ctx_bindinfo *bi, + const struct vmw_ctx_bindinfo_so_target *biso, u32 max_num) { - const struct vmw_ctx_bindinfo_so_target *biso = - container_of(bi, struct vmw_ctx_bindinfo_so_target, bi); unsigned long i; SVGA3dSoTarget *so_buffer = (SVGA3dSoTarget *) cbs->bind_cmd_buffer; @@ -919,7 +915,7 @@ static void vmw_collect_so_targets(struct vmw_ctx_binding_state *cbs, */ static int vmw_emit_set_so_target(struct vmw_ctx_binding_state *cbs) { - const struct vmw_ctx_bindinfo *loc = &cbs->so_targets[0].bi; + const struct vmw_ctx_bindinfo_so_target *loc = &cbs->so_targets[0]; struct { SVGA3dCmdHeader header; SVGA3dCmdDXSetSOTargets body; @@ -1066,7 +1062,7 @@ static int vmw_emit_set_vb(struct vmw_ctx_binding_state *cbs) static int vmw_emit_set_uav(struct vmw_ctx_binding_state *cbs) { - const struct vmw_ctx_bindinfo *loc = &cbs->ua_views[0].views[0].bi; + const struct vmw_ctx_bindinfo_view *loc = &cbs->ua_views[0].views[0]; struct { SVGA3dCmdHeader header; SVGA3dCmdDXSetUAViews body; @@ -1096,7 +1092,7 @@ static int vmw_emit_set_uav(struct vmw_ctx_binding_state *cbs) static int vmw_emit_set_cs_uav(struct vmw_ctx_binding_state *cbs) { - const struct vmw_ctx_bindinfo *loc = &cbs->ua_views[1].views[0].bi; + const struct vmw_ctx_bindinfo_view *loc = &cbs->ua_views[1].views[0]; struct { SVGA3dCmdHeader header; SVGA3dCmdDXSetCSUAViews body; @@ -1444,7 +1440,7 @@ u32 vmw_binding_dirtying(enum vmw_ctx_binding_type binding_type) static void vmw_binding_build_asserts(void) { BUILD_BUG_ON(SVGA3D_NUM_SHADERTYPE_DX10 != 3); - BUILD_BUG_ON(SVGA3D_MAX_SIMULTANEOUS_RENDER_TARGETS > SVGA3D_RT_MAX); + BUILD_BUG_ON(SVGA3D_DX_MAX_RENDER_TARGETS > SVGA3D_RT_MAX); BUILD_BUG_ON(sizeof(uint32) != sizeof(u32)); /* diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c index 362f56d5b12b..9e3e1429db94 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c @@ -405,7 +405,7 @@ static size_t vmw_bo_acc_size(struct vmw_private *dev_priv, size_t size, bool user) { static size_t struct_size, user_struct_size; - size_t num_pages = PAGE_ALIGN(size) >> PAGE_SHIFT; + size_t num_pages = PFN_UP(size); size_t page_array_size = ttm_round_pot(num_pages * sizeof(void *)); if (unlikely(struct_size == 0)) { @@ -474,7 +474,6 @@ int vmw_bo_create_kernel(struct vmw_private *dev_priv, unsigned long size, struct ttm_placement *placement, struct ttm_buffer_object **p_bo) { - unsigned npages = PAGE_ALIGN(size) >> PAGE_SHIFT; struct ttm_operation_ctx ctx = { false, false }; struct ttm_buffer_object *bo; size_t acc_size; @@ -485,7 +484,7 @@ int vmw_bo_create_kernel(struct vmw_private *dev_priv, unsigned long size, return -ENOMEM; acc_size = ttm_round_pot(sizeof(*bo)); - acc_size += ttm_round_pot(npages * sizeof(void *)); + acc_size += ttm_round_pot(PFN_UP(size) * sizeof(void *)); acc_size += ttm_round_pot(sizeof(struct ttm_tt)); ret = ttm_mem_global_alloc(&ttm_mem_glob, acc_size, &ctx); diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_cmd.c b/drivers/gpu/drm/vmwgfx/vmwgfx_cmd.c index 956b85e35cef..67db472d3493 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_cmd.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_cmd.c @@ -30,6 +30,7 @@ #include <drm/ttm/ttm_placement.h> #include "vmwgfx_drv.h" +#include "vmwgfx_devcaps.h" bool vmw_supports_3d(struct vmw_private *dev_priv) { @@ -45,10 +46,7 @@ bool vmw_supports_3d(struct vmw_private *dev_priv) if (!dev_priv->has_mob) return false; - spin_lock(&dev_priv->cap_lock); - vmw_write(dev_priv, SVGA_REG_DEV_CAP, SVGA3D_DEVCAP_3D); - result = vmw_read(dev_priv, SVGA_REG_DEV_CAP); - spin_unlock(&dev_priv->cap_lock); + result = vmw_devcap_get(dev_priv, SVGA3D_DEVCAP_3D); return (result != 0); } @@ -142,7 +140,8 @@ struct vmw_fifo_state *vmw_fifo_create(struct vmw_private *dev_priv) min = vmw_fifo_mem_read(dev_priv, SVGA_FIFO_MIN); fifo->capabilities = vmw_fifo_mem_read(dev_priv, SVGA_FIFO_CAPABILITIES); - DRM_INFO("Fifo max 0x%08x min 0x%08x cap 0x%08x\n", + drm_info(&dev_priv->drm, + "Fifo max 0x%08x min 0x%08x cap 0x%08x\n", (unsigned int) max, (unsigned int) min, (unsigned int) fifo->capabilities); diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_cmdbuf.c b/drivers/gpu/drm/vmwgfx/vmwgfx_cmdbuf.c index 6bb4961e64a5..3c06df2a5474 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_cmdbuf.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_cmdbuf.c @@ -358,8 +358,7 @@ static void vmw_cmdbuf_ctx_submit(struct vmw_cmdbuf_man *man, break; } - list_del(&entry->list); - list_add_tail(&entry->list, &ctx->hw_submitted); + list_move_tail(&entry->list, &ctx->hw_submitted); ctx->num_hw_submitted++; } @@ -516,7 +515,7 @@ static void vmw_cmdbuf_work_func(struct work_struct *work) struct vmw_cmdbuf_man *man = container_of(work, struct vmw_cmdbuf_man, work); struct vmw_cmdbuf_header *entry, *next; - uint32_t dummy; + uint32_t dummy = 0; bool send_fence = false; struct list_head restart_head[SVGA_CB_CONTEXT_MAX]; int i; @@ -802,7 +801,7 @@ static int vmw_cmdbuf_alloc_space(struct vmw_cmdbuf_man *man, { struct vmw_cmdbuf_alloc_info info; - info.page_size = PAGE_ALIGN(size) >> PAGE_SHIFT; + info.page_size = PFN_UP(size); info.node = node; info.done = false; @@ -1272,7 +1271,8 @@ int vmw_cmdbuf_set_pool_size(struct vmw_cmdbuf_man *man, size_t size) * submissions to be able to free up space. */ man->default_size = VMW_CMDBUF_INLINE_SIZE; - DRM_INFO("Using command buffers with %s pool.\n", + drm_info(&dev_priv->drm, + "Using command buffers with %s pool.\n", (man->using_mob) ? "MOB" : "DMA"); return 0; diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_cmdbuf_res.c b/drivers/gpu/drm/vmwgfx/vmwgfx_cmdbuf_res.c index b262d61d839d..8381750db81b 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_cmdbuf_res.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_cmdbuf_res.c @@ -159,6 +159,7 @@ void vmw_cmdbuf_res_commit(struct list_head *list) void vmw_cmdbuf_res_revert(struct list_head *list) { struct vmw_cmdbuf_res *entry, *next; + int ret; list_for_each_entry_safe(entry, next, list, head) { switch (entry->state) { @@ -166,9 +167,9 @@ void vmw_cmdbuf_res_revert(struct list_head *list) vmw_cmdbuf_res_free(entry->man, entry); break; case VMW_CMDBUF_RES_DEL: - drm_ht_insert_item(&entry->man->resources, &entry->hash); - list_del(&entry->head); - list_add_tail(&entry->head, &entry->man->list); + ret = drm_ht_insert_item(&entry->man->resources, &entry->hash); + BUG_ON(ret); + list_move_tail(&entry->head, &entry->man->list); entry->state = VMW_CMDBUF_RES_COMMITTED; break; default: diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_context.c b/drivers/gpu/drm/vmwgfx/vmwgfx_context.c index dffe3804ad3e..4446758b6880 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_context.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_context.c @@ -185,7 +185,7 @@ static int vmw_gb_context_init(struct vmw_private *dev_priv, container_of(res, struct vmw_user_context, res); res->backup_size = (dx ? sizeof(SVGADXContextMobFormat) : - SVGA3D_CONTEXT_DATA_SIZE); + sizeof(SVGAGBContextData)); ret = vmw_resource_init(dev_priv, res, true, res_free, dx ? &vmw_dx_context_func : @@ -259,7 +259,7 @@ static int vmw_context_init(struct vmw_private *dev_priv, goto out_early; } - if (unlikely(res->id >= SVGA3D_MAX_CONTEXT_IDS)) { + if (unlikely(res->id >= SVGA3D_HB_MAX_CONTEXT_IDS)) { DRM_ERROR("Out of hw context ids.\n"); vmw_resource_unreference(&res); return -ENOMEM; diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_cotable.c b/drivers/gpu/drm/vmwgfx/vmwgfx_cotable.c index c84a16c1def0..17a98db00017 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_cotable.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_cotable.c @@ -607,8 +607,7 @@ struct vmw_resource *vmw_cotable_alloc(struct vmw_private *dev_priv, if (num_entries < co_info[type].min_initial_entries) { vcotbl->res.backup_size = co_info[type].min_initial_entries * co_info[type].size; - vcotbl->res.backup_size = - (vcotbl->res.backup_size + PAGE_SIZE - 1) & PAGE_MASK; + vcotbl->res.backup_size = PFN_ALIGN(vcotbl->res.backup_size); } vcotbl->scrubbed = true; diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_devcaps.c b/drivers/gpu/drm/vmwgfx/vmwgfx_devcaps.c new file mode 100644 index 000000000000..829df395c2ed --- /dev/null +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_devcaps.c @@ -0,0 +1,142 @@ +/* SPDX-License-Identifier: GPL-2.0 OR MIT */ +/************************************************************************** + * + * Copyright 2021 VMware, Inc., Palo Alto, CA., USA + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "vmwgfx_devcaps.h" + +#include "vmwgfx_drv.h" + + +struct svga_3d_compat_cap { + SVGA3dFifoCapsRecordHeader header; + SVGA3dFifoCapPair pairs[SVGA3D_DEVCAP_MAX]; +}; + + +static u32 vmw_mask_legacy_multisample(unsigned int cap, u32 fmt_value) +{ + /* + * A version of user-space exists which use MULTISAMPLE_MASKABLESAMPLES + * to check the sample count supported by virtual device. Since there + * never was support for multisample count for backing MOB return 0. + * + * MULTISAMPLE_MASKABLESAMPLES devcap is marked as deprecated by virtual + * device. + */ + if (cap == SVGA3D_DEVCAP_DEAD5) + return 0; + + return fmt_value; +} + +static int vmw_fill_compat_cap(struct vmw_private *dev_priv, void *bounce, + size_t size) +{ + struct svga_3d_compat_cap *compat_cap = + (struct svga_3d_compat_cap *) bounce; + unsigned int i; + size_t pair_offset = offsetof(struct svga_3d_compat_cap, pairs); + unsigned int max_size; + + if (size < pair_offset) + return -EINVAL; + + max_size = (size - pair_offset) / sizeof(SVGA3dFifoCapPair); + + if (max_size > SVGA3D_DEVCAP_MAX) + max_size = SVGA3D_DEVCAP_MAX; + + compat_cap->header.length = + (pair_offset + max_size * sizeof(SVGA3dFifoCapPair)) / sizeof(u32); + compat_cap->header.type = SVGA3D_FIFO_CAPS_RECORD_DEVCAPS; + + for (i = 0; i < max_size; ++i) { + compat_cap->pairs[i][0] = i; + compat_cap->pairs[i][1] = vmw_mask_legacy_multisample + (i, dev_priv->devcaps[i]); + } + + return 0; +} + +int vmw_devcaps_create(struct vmw_private *vmw) +{ + bool gb_objects = !!(vmw->capabilities & SVGA_CAP_GBOBJECTS); + uint32_t i; + + if (gb_objects) { + vmw->devcaps = vzalloc(sizeof(uint32_t) * SVGA3D_DEVCAP_MAX); + if (!vmw->devcaps) + return -ENOMEM; + for (i = 0; i < SVGA3D_DEVCAP_MAX; ++i) { + vmw_write(vmw, SVGA_REG_DEV_CAP, i); + vmw->devcaps[i] = vmw_read(vmw, SVGA_REG_DEV_CAP); + } + } + return 0; +} + +void vmw_devcaps_destroy(struct vmw_private *vmw) +{ + vfree(vmw->devcaps); + vmw->devcaps = NULL; +} + + +uint32 vmw_devcaps_size(const struct vmw_private *vmw, + bool gb_aware) +{ + bool gb_objects = !!(vmw->capabilities & SVGA_CAP_GBOBJECTS); + if (gb_objects && gb_aware) + return SVGA3D_DEVCAP_MAX * sizeof(uint32_t); + else if (gb_objects) + return sizeof(struct svga_3d_compat_cap) + + sizeof(uint32_t); + else if (vmw->fifo_mem != NULL) + return (SVGA_FIFO_3D_CAPS_LAST - SVGA_FIFO_3D_CAPS + 1) * + sizeof(uint32_t); + else + return 0; +} + +int vmw_devcaps_copy(struct vmw_private *vmw, bool gb_aware, + void *dst, uint32_t dst_size) +{ + int ret; + bool gb_objects = !!(vmw->capabilities & SVGA_CAP_GBOBJECTS); + if (gb_objects && gb_aware) { + memcpy(dst, vmw->devcaps, dst_size); + } else if (gb_objects) { + ret = vmw_fill_compat_cap(vmw, dst, dst_size); + if (unlikely(ret != 0)) + return ret; + } else if (vmw->fifo_mem) { + u32 *fifo_mem = vmw->fifo_mem; + memcpy(dst, &fifo_mem[SVGA_FIFO_3D_CAPS], dst_size); + } else + return -EINVAL; + return 0; +} diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_devcaps.h b/drivers/gpu/drm/vmwgfx/vmwgfx_devcaps.h new file mode 100644 index 000000000000..f70e923ac3e6 --- /dev/null +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_devcaps.h @@ -0,0 +1,50 @@ +/* SPDX-License-Identifier: GPL-2.0 OR MIT */ +/************************************************************************** + * + * Copyright 2021 VMware, Inc., Palo Alto, CA., USA + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef _VMWGFX_DEVCAPS_H_ +#define _VMWGFX_DEVCAPS_H_ + +#include "vmwgfx_drv.h" + +#include "device_include/svga_reg.h" + +int vmw_devcaps_create(struct vmw_private *vmw); +void vmw_devcaps_destroy(struct vmw_private *vmw); +uint32_t vmw_devcaps_size(const struct vmw_private *vmw, bool gb_aware); +int vmw_devcaps_copy(struct vmw_private *vmw, bool gb_aware, + void *dst, uint32_t dst_size); + +static inline uint32_t vmw_devcap_get(struct vmw_private *vmw, + uint32_t devcap) +{ + bool gb_objects = !!(vmw->capabilities & SVGA_CAP_GBOBJECTS); + if (gb_objects) + return vmw->devcaps[devcap]; + return 0; +} + +#endif diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c index 45aeeca9b8f6..ab9a1750e1df 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c @@ -42,7 +42,9 @@ #include "ttm_object.h" #include "vmwgfx_binding.h" +#include "vmwgfx_devcaps.h" #include "vmwgfx_drv.h" +#include "vmwgfx_mksstat.h" #define VMWGFX_DRIVER_DESC "Linux drm driver for VMware graphics devices" @@ -148,102 +150,111 @@ #define DRM_IOCTL_VMW_MSG \ DRM_IOWR(DRM_COMMAND_BASE + DRM_VMW_MSG, \ struct drm_vmw_msg_arg) - -/* - * The core DRM version of this macro doesn't account for - * DRM_COMMAND_BASE. - */ - -#define VMW_IOCTL_DEF(ioctl, func, flags) \ - [DRM_IOCTL_NR(DRM_IOCTL_##ioctl) - DRM_COMMAND_BASE] = {DRM_IOCTL_##ioctl, flags, func} +#define DRM_IOCTL_VMW_MKSSTAT_RESET \ + DRM_IO(DRM_COMMAND_BASE + DRM_VMW_MKSSTAT_RESET) +#define DRM_IOCTL_VMW_MKSSTAT_ADD \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_VMW_MKSSTAT_ADD, \ + struct drm_vmw_mksstat_add_arg) +#define DRM_IOCTL_VMW_MKSSTAT_REMOVE \ + DRM_IOW(DRM_COMMAND_BASE + DRM_VMW_MKSSTAT_REMOVE, \ + struct drm_vmw_mksstat_remove_arg) /* * Ioctl definitions. */ static const struct drm_ioctl_desc vmw_ioctls[] = { - VMW_IOCTL_DEF(VMW_GET_PARAM, vmw_getparam_ioctl, - DRM_RENDER_ALLOW), - VMW_IOCTL_DEF(VMW_ALLOC_DMABUF, vmw_bo_alloc_ioctl, - DRM_RENDER_ALLOW), - VMW_IOCTL_DEF(VMW_UNREF_DMABUF, vmw_bo_unref_ioctl, - DRM_RENDER_ALLOW), - VMW_IOCTL_DEF(VMW_CURSOR_BYPASS, - vmw_kms_cursor_bypass_ioctl, - DRM_MASTER), - - VMW_IOCTL_DEF(VMW_CONTROL_STREAM, vmw_overlay_ioctl, - DRM_MASTER), - VMW_IOCTL_DEF(VMW_CLAIM_STREAM, vmw_stream_claim_ioctl, - DRM_MASTER), - VMW_IOCTL_DEF(VMW_UNREF_STREAM, vmw_stream_unref_ioctl, - DRM_MASTER), - - VMW_IOCTL_DEF(VMW_CREATE_CONTEXT, vmw_context_define_ioctl, - DRM_RENDER_ALLOW), - VMW_IOCTL_DEF(VMW_UNREF_CONTEXT, vmw_context_destroy_ioctl, - DRM_RENDER_ALLOW), - VMW_IOCTL_DEF(VMW_CREATE_SURFACE, vmw_surface_define_ioctl, - DRM_RENDER_ALLOW), - VMW_IOCTL_DEF(VMW_UNREF_SURFACE, vmw_surface_destroy_ioctl, - DRM_RENDER_ALLOW), - VMW_IOCTL_DEF(VMW_REF_SURFACE, vmw_surface_reference_ioctl, - DRM_RENDER_ALLOW), - VMW_IOCTL_DEF(VMW_EXECBUF, vmw_execbuf_ioctl, - DRM_RENDER_ALLOW), - VMW_IOCTL_DEF(VMW_FENCE_WAIT, vmw_fence_obj_wait_ioctl, - DRM_RENDER_ALLOW), - VMW_IOCTL_DEF(VMW_FENCE_SIGNALED, - vmw_fence_obj_signaled_ioctl, - DRM_RENDER_ALLOW), - VMW_IOCTL_DEF(VMW_FENCE_UNREF, vmw_fence_obj_unref_ioctl, - DRM_RENDER_ALLOW), - VMW_IOCTL_DEF(VMW_FENCE_EVENT, vmw_fence_event_ioctl, - DRM_RENDER_ALLOW), - VMW_IOCTL_DEF(VMW_GET_3D_CAP, vmw_get_cap_3d_ioctl, - DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(VMW_GET_PARAM, vmw_getparam_ioctl, + DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(VMW_ALLOC_DMABUF, vmw_bo_alloc_ioctl, + DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(VMW_UNREF_DMABUF, vmw_bo_unref_ioctl, + DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(VMW_CURSOR_BYPASS, + vmw_kms_cursor_bypass_ioctl, + DRM_MASTER), + + DRM_IOCTL_DEF_DRV(VMW_CONTROL_STREAM, vmw_overlay_ioctl, + DRM_MASTER), + DRM_IOCTL_DEF_DRV(VMW_CLAIM_STREAM, vmw_stream_claim_ioctl, + DRM_MASTER), + DRM_IOCTL_DEF_DRV(VMW_UNREF_STREAM, vmw_stream_unref_ioctl, + DRM_MASTER), + + DRM_IOCTL_DEF_DRV(VMW_CREATE_CONTEXT, vmw_context_define_ioctl, + DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(VMW_UNREF_CONTEXT, vmw_context_destroy_ioctl, + DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(VMW_CREATE_SURFACE, vmw_surface_define_ioctl, + DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(VMW_UNREF_SURFACE, vmw_surface_destroy_ioctl, + DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(VMW_REF_SURFACE, vmw_surface_reference_ioctl, + DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(VMW_EXECBUF, vmw_execbuf_ioctl, + DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(VMW_FENCE_WAIT, vmw_fence_obj_wait_ioctl, + DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(VMW_FENCE_SIGNALED, + vmw_fence_obj_signaled_ioctl, + DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(VMW_FENCE_UNREF, vmw_fence_obj_unref_ioctl, + DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(VMW_FENCE_EVENT, vmw_fence_event_ioctl, + DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(VMW_GET_3D_CAP, vmw_get_cap_3d_ioctl, + DRM_RENDER_ALLOW), /* these allow direct access to the framebuffers mark as master only */ - VMW_IOCTL_DEF(VMW_PRESENT, vmw_present_ioctl, - DRM_MASTER | DRM_AUTH), - VMW_IOCTL_DEF(VMW_PRESENT_READBACK, - vmw_present_readback_ioctl, - DRM_MASTER | DRM_AUTH), + DRM_IOCTL_DEF_DRV(VMW_PRESENT, vmw_present_ioctl, + DRM_MASTER | DRM_AUTH), + DRM_IOCTL_DEF_DRV(VMW_PRESENT_READBACK, + vmw_present_readback_ioctl, + DRM_MASTER | DRM_AUTH), /* * The permissions of the below ioctl are overridden in * vmw_generic_ioctl(). We require either * DRM_MASTER or capable(CAP_SYS_ADMIN). */ - VMW_IOCTL_DEF(VMW_UPDATE_LAYOUT, - vmw_kms_update_layout_ioctl, - DRM_RENDER_ALLOW), - VMW_IOCTL_DEF(VMW_CREATE_SHADER, - vmw_shader_define_ioctl, - DRM_RENDER_ALLOW), - VMW_IOCTL_DEF(VMW_UNREF_SHADER, - vmw_shader_destroy_ioctl, - DRM_RENDER_ALLOW), - VMW_IOCTL_DEF(VMW_GB_SURFACE_CREATE, - vmw_gb_surface_define_ioctl, - DRM_RENDER_ALLOW), - VMW_IOCTL_DEF(VMW_GB_SURFACE_REF, - vmw_gb_surface_reference_ioctl, - DRM_RENDER_ALLOW), - VMW_IOCTL_DEF(VMW_SYNCCPU, - vmw_user_bo_synccpu_ioctl, - DRM_RENDER_ALLOW), - VMW_IOCTL_DEF(VMW_CREATE_EXTENDED_CONTEXT, - vmw_extended_context_define_ioctl, - DRM_RENDER_ALLOW), - VMW_IOCTL_DEF(VMW_GB_SURFACE_CREATE_EXT, - vmw_gb_surface_define_ext_ioctl, - DRM_RENDER_ALLOW), - VMW_IOCTL_DEF(VMW_GB_SURFACE_REF_EXT, - vmw_gb_surface_reference_ext_ioctl, - DRM_RENDER_ALLOW), - VMW_IOCTL_DEF(VMW_MSG, - vmw_msg_ioctl, - DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(VMW_UPDATE_LAYOUT, + vmw_kms_update_layout_ioctl, + DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(VMW_CREATE_SHADER, + vmw_shader_define_ioctl, + DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(VMW_UNREF_SHADER, + vmw_shader_destroy_ioctl, + DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(VMW_GB_SURFACE_CREATE, + vmw_gb_surface_define_ioctl, + DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(VMW_GB_SURFACE_REF, + vmw_gb_surface_reference_ioctl, + DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(VMW_SYNCCPU, + vmw_user_bo_synccpu_ioctl, + DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(VMW_CREATE_EXTENDED_CONTEXT, + vmw_extended_context_define_ioctl, + DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(VMW_GB_SURFACE_CREATE_EXT, + vmw_gb_surface_define_ext_ioctl, + DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(VMW_GB_SURFACE_REF_EXT, + vmw_gb_surface_reference_ext_ioctl, + DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(VMW_MSG, + vmw_msg_ioctl, + DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(VMW_MKSSTAT_RESET, + vmw_mksstat_reset_ioctl, + DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(VMW_MKSSTAT_ADD, + vmw_mksstat_add_ioctl, + DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(VMW_MKSSTAT_REMOVE, + vmw_mksstat_remove_ioctl, + DRM_RENDER_ALLOW), }; static const struct pci_device_id vmw_pci_id_list[] = { @@ -254,7 +265,6 @@ static const struct pci_device_id vmw_pci_id_list[] = { MODULE_DEVICE_TABLE(pci, vmw_pci_id_list); static int enable_fbdev = IS_ENABLED(CONFIG_DRM_VMWGFX_FBCON); -static int vmw_force_iommu; static int vmw_restrict_iommu; static int vmw_force_coherent; static int vmw_restrict_dma_mask; @@ -266,8 +276,6 @@ static int vmwgfx_pm_notifier(struct notifier_block *nb, unsigned long val, MODULE_PARM_DESC(enable_fbdev, "Enable vmwgfx fbdev"); module_param_named(enable_fbdev, enable_fbdev, int, 0600); -MODULE_PARM_DESC(force_dma_api, "Force using the DMA API for TTM pages"); -module_param_named(force_dma_api, vmw_force_iommu, int, 0600); MODULE_PARM_DESC(restrict_iommu, "Try to limit IOMMU usage for TTM pages"); module_param_named(restrict_iommu, vmw_restrict_iommu, int, 0600); MODULE_PARM_DESC(force_coherent, "Force coherent TTM pages"); @@ -278,62 +286,92 @@ MODULE_PARM_DESC(assume_16bpp, "Assume 16-bpp when filtering modes"); module_param_named(assume_16bpp, vmw_assume_16bpp, int, 0600); -static void vmw_print_capabilities2(uint32_t capabilities2) +struct bitmap_name { + uint32 value; + const char *name; +}; + +static const struct bitmap_name cap1_names[] = { + { SVGA_CAP_RECT_COPY, "rect copy" }, + { SVGA_CAP_CURSOR, "cursor" }, + { SVGA_CAP_CURSOR_BYPASS, "cursor bypass" }, + { SVGA_CAP_CURSOR_BYPASS_2, "cursor bypass 2" }, + { SVGA_CAP_8BIT_EMULATION, "8bit emulation" }, + { SVGA_CAP_ALPHA_CURSOR, "alpha cursor" }, + { SVGA_CAP_3D, "3D" }, + { SVGA_CAP_EXTENDED_FIFO, "extended fifo" }, + { SVGA_CAP_MULTIMON, "multimon" }, + { SVGA_CAP_PITCHLOCK, "pitchlock" }, + { SVGA_CAP_IRQMASK, "irq mask" }, + { SVGA_CAP_DISPLAY_TOPOLOGY, "display topology" }, + { SVGA_CAP_GMR, "gmr" }, + { SVGA_CAP_TRACES, "traces" }, + { SVGA_CAP_GMR2, "gmr2" }, + { SVGA_CAP_SCREEN_OBJECT_2, "screen object 2" }, + { SVGA_CAP_COMMAND_BUFFERS, "command buffers" }, + { SVGA_CAP_CMD_BUFFERS_2, "command buffers 2" }, + { SVGA_CAP_GBOBJECTS, "gbobject" }, + { SVGA_CAP_DX, "dx" }, + { SVGA_CAP_HP_CMD_QUEUE, "hp cmd queue" }, + { SVGA_CAP_NO_BB_RESTRICTION, "no bb restriction" }, + { SVGA_CAP_CAP2_REGISTER, "cap2 register" }, +}; + + +static const struct bitmap_name cap2_names[] = { + { SVGA_CAP2_GROW_OTABLE, "grow otable" }, + { SVGA_CAP2_INTRA_SURFACE_COPY, "intra surface copy" }, + { SVGA_CAP2_DX2, "dx2" }, + { SVGA_CAP2_GB_MEMSIZE_2, "gb memsize 2" }, + { SVGA_CAP2_SCREENDMA_REG, "screendma reg" }, + { SVGA_CAP2_OTABLE_PTDEPTH_2, "otable ptdepth2" }, + { SVGA_CAP2_NON_MS_TO_MS_STRETCHBLT, "non ms to ms stretchblt" }, + { SVGA_CAP2_CURSOR_MOB, "cursor mob" }, + { SVGA_CAP2_MSHINT, "mshint" }, + { SVGA_CAP2_CB_MAX_SIZE_4MB, "cb max size 4mb" }, + { SVGA_CAP2_DX3, "dx3" }, + { SVGA_CAP2_FRAME_TYPE, "frame type" }, + { SVGA_CAP2_COTABLE_COPY, "cotable copy" }, + { SVGA_CAP2_TRACE_FULL_FB, "trace full fb" }, + { SVGA_CAP2_EXTRA_REGS, "extra regs" }, + { SVGA_CAP2_LO_STAGING, "lo staging" }, +}; + +static void vmw_print_bitmap(struct drm_device *drm, + const char *prefix, uint32_t bitmap, + const struct bitmap_name *bnames, + uint32_t num_names) { - DRM_INFO("Capabilities2:\n"); - if (capabilities2 & SVGA_CAP2_GROW_OTABLE) - DRM_INFO(" Grow oTable.\n"); - if (capabilities2 & SVGA_CAP2_INTRA_SURFACE_COPY) - DRM_INFO(" IntraSurface copy.\n"); - if (capabilities2 & SVGA_CAP2_DX3) - DRM_INFO(" DX3.\n"); + char buf[512]; + uint32_t i; + uint32_t offset = 0; + for (i = 0; i < num_names; ++i) { + if ((bitmap & bnames[i].value) != 0) { + offset += snprintf(buf + offset, + ARRAY_SIZE(buf) - offset, + "%s, ", bnames[i].name); + bitmap &= ~bnames[i].value; + } + } + + drm_info(drm, "%s: %s\n", prefix, buf); + if (bitmap != 0) + drm_dbg(drm, "%s: unknown enums: %x\n", prefix, bitmap); } -static void vmw_print_capabilities(uint32_t capabilities) + +static void vmw_print_sm_type(struct vmw_private *dev_priv) { - DRM_INFO("Capabilities:\n"); - if (capabilities & SVGA_CAP_RECT_COPY) - DRM_INFO(" Rect copy.\n"); - if (capabilities & SVGA_CAP_CURSOR) - DRM_INFO(" Cursor.\n"); - if (capabilities & SVGA_CAP_CURSOR_BYPASS) - DRM_INFO(" Cursor bypass.\n"); - if (capabilities & SVGA_CAP_CURSOR_BYPASS_2) - DRM_INFO(" Cursor bypass 2.\n"); - if (capabilities & SVGA_CAP_8BIT_EMULATION) - DRM_INFO(" 8bit emulation.\n"); - if (capabilities & SVGA_CAP_ALPHA_CURSOR) - DRM_INFO(" Alpha cursor.\n"); - if (capabilities & SVGA_CAP_3D) - DRM_INFO(" 3D.\n"); - if (capabilities & SVGA_CAP_EXTENDED_FIFO) - DRM_INFO(" Extended Fifo.\n"); - if (capabilities & SVGA_CAP_MULTIMON) - DRM_INFO(" Multimon.\n"); - if (capabilities & SVGA_CAP_PITCHLOCK) - DRM_INFO(" Pitchlock.\n"); - if (capabilities & SVGA_CAP_IRQMASK) - DRM_INFO(" Irq mask.\n"); - if (capabilities & SVGA_CAP_DISPLAY_TOPOLOGY) - DRM_INFO(" Display Topology.\n"); - if (capabilities & SVGA_CAP_GMR) - DRM_INFO(" GMR.\n"); - if (capabilities & SVGA_CAP_TRACES) - DRM_INFO(" Traces.\n"); - if (capabilities & SVGA_CAP_GMR2) - DRM_INFO(" GMR2.\n"); - if (capabilities & SVGA_CAP_SCREEN_OBJECT_2) - DRM_INFO(" Screen Object 2.\n"); - if (capabilities & SVGA_CAP_COMMAND_BUFFERS) - DRM_INFO(" Command Buffers.\n"); - if (capabilities & SVGA_CAP_CMD_BUFFERS_2) - DRM_INFO(" Command Buffers 2.\n"); - if (capabilities & SVGA_CAP_GBOBJECTS) - DRM_INFO(" Guest Backed Resources.\n"); - if (capabilities & SVGA_CAP_DX) - DRM_INFO(" DX Features.\n"); - if (capabilities & SVGA_CAP_HP_CMD_QUEUE) - DRM_INFO(" HP Command Queue.\n"); + static const char *names[] = { + [VMW_SM_LEGACY] = "Legacy", + [VMW_SM_4] = "SM4", + [VMW_SM_4_1] = "SM4_1", + [VMW_SM_5] = "SM_5", + [VMW_SM_MAX] = "Invalid" + }; + BUILD_BUG_ON(ARRAY_SIZE(names) != (VMW_SM_MAX + 1)); + drm_info(&dev_priv->drm, "Available shader model: %s.\n", + names[dev_priv->sm_type]); } /** @@ -400,10 +438,6 @@ static int vmw_device_init(struct vmw_private *dev_priv) { bool uses_fb_traces = false; - DRM_INFO("width %d\n", vmw_read(dev_priv, SVGA_REG_WIDTH)); - DRM_INFO("height %d\n", vmw_read(dev_priv, SVGA_REG_HEIGHT)); - DRM_INFO("bpp %d\n", vmw_read(dev_priv, SVGA_REG_BITS_PER_PIXEL)); - dev_priv->enable_state = vmw_read(dev_priv, SVGA_REG_ENABLE); dev_priv->config_done_state = vmw_read(dev_priv, SVGA_REG_CONFIG_DONE); dev_priv->traces_state = vmw_read(dev_priv, SVGA_REG_TRACES); @@ -627,7 +661,6 @@ static void vmw_get_initial_size(struct vmw_private *dev_priv) static int vmw_dma_select_mode(struct vmw_private *dev_priv) { static const char *names[vmw_dma_map_max] = { - [vmw_dma_phys] = "Using physical TTM page addresses.", [vmw_dma_alloc_coherent] = "Using coherent TTM pages.", [vmw_dma_map_populate] = "Caching DMA mappings.", [vmw_dma_map_bind] = "Giving up DMA mappings early."}; @@ -643,7 +676,8 @@ static int vmw_dma_select_mode(struct vmw_private *dev_priv) else dev_priv->map_mode = vmw_dma_map_populate; - DRM_INFO("DMA map mode: %s\n", names[dev_priv->map_mode]); + drm_info(&dev_priv->drm, + "DMA map mode: %s\n", names[dev_priv->map_mode]); return 0; } @@ -661,9 +695,9 @@ static int vmw_dma_masks(struct vmw_private *dev_priv) int ret = 0; ret = dma_set_mask_and_coherent(dev->dev, DMA_BIT_MASK(64)); - if (dev_priv->map_mode != vmw_dma_phys && - (sizeof(unsigned long) == 4 || vmw_restrict_dma_mask)) { - DRM_INFO("Restricting DMA addresses to 44 bits.\n"); + if (sizeof(unsigned long) == 4 || vmw_restrict_dma_mask) { + drm_info(&dev_priv->drm, + "Restricting DMA addresses to 44 bits.\n"); return dma_set_mask_and_coherent(dev->dev, DMA_BIT_MASK(44)); } @@ -693,7 +727,7 @@ static void vmw_vram_manager_fini(struct vmw_private *dev_priv) } static int vmw_setup_pci_resources(struct vmw_private *dev, - unsigned long pci_id) + u32 pci_id) { resource_size_t rmmio_start; resource_size_t rmmio_size; @@ -715,13 +749,15 @@ static int vmw_setup_pci_resources(struct vmw_private *dev, dev->vram_start = pci_resource_start(pdev, 2); dev->vram_size = pci_resource_len(pdev, 2); - DRM_INFO("Register MMIO at 0x%pa size is %llu kiB\n", + drm_info(&dev->drm, + "Register MMIO at 0x%pa size is %llu kiB\n", &rmmio_start, (uint64_t)rmmio_size / 1024); dev->rmmio = devm_ioremap(dev->drm.dev, rmmio_start, rmmio_size); if (!dev->rmmio) { - DRM_ERROR("Failed mapping registers mmio memory.\n"); + drm_err(&dev->drm, + "Failed mapping registers mmio memory.\n"); pci_release_regions(pdev); return -ENOMEM; } @@ -732,7 +768,8 @@ static int vmw_setup_pci_resources(struct vmw_private *dev, fifo_start = pci_resource_start(pdev, 2); fifo_size = pci_resource_len(pdev, 2); - DRM_INFO("FIFO at %pa size is %llu kiB\n", + drm_info(&dev->drm, + "FIFO at %pa size is %llu kiB\n", &fifo_start, (uint64_t)fifo_size / 1024); dev->fifo_mem = devm_memremap(dev->drm.dev, fifo_start, @@ -740,7 +777,8 @@ static int vmw_setup_pci_resources(struct vmw_private *dev, MEMREMAP_WB); if (IS_ERR(dev->fifo_mem)) { - DRM_ERROR("Failed mapping FIFO memory.\n"); + drm_err(&dev->drm, + "Failed mapping FIFO memory.\n"); pci_release_regions(pdev); return PTR_ERR(dev->fifo_mem); } @@ -755,7 +793,8 @@ static int vmw_setup_pci_resources(struct vmw_private *dev, * size will be equal to or bigger than the size reported by * SVGA_REG_VRAM_SIZE. */ - DRM_INFO("VRAM at %pa size is %llu kiB\n", + drm_info(&dev->drm, + "VRAM at %pa size is %llu kiB\n", &dev->vram_start, (uint64_t)dev->vram_size / 1024); return 0; @@ -769,12 +808,14 @@ static int vmw_detect_version(struct vmw_private *dev) SVGA_ID_3 : SVGA_ID_2); svga_id = vmw_read(dev, SVGA_REG_ID); if (svga_id != SVGA_ID_2 && svga_id != SVGA_ID_3) { - DRM_ERROR("Unsupported SVGA ID 0x%x on chipset 0x%x\n", - svga_id, dev->vmw_chipset); + drm_err(&dev->drm, + "Unsupported SVGA ID 0x%x on chipset 0x%x\n", + svga_id, dev->pci_id); return -ENOSYS; } BUG_ON(vmw_is_svga_v3(dev) && (svga_id != SVGA_ID_3)); - DRM_INFO("Running on SVGA version %d.\n", (svga_id & 0xff)); + drm_info(&dev->drm, + "Running on SVGA version %d.\n", (svga_id & 0xff)); return 0; } @@ -785,7 +826,6 @@ static int vmw_driver_load(struct vmw_private *dev_priv, u32 pci_id) bool refuse_dma = false; struct pci_dev *pdev = to_pci_dev(dev_priv->drm.dev); - dev_priv->vmw_chipset = pci_id; dev_priv->drm.dev_private = dev_priv; mutex_init(&dev_priv->cmdbuf_mutex); @@ -793,7 +833,6 @@ static int vmw_driver_load(struct vmw_private *dev_priv, u32 pci_id) spin_lock_init(&dev_priv->resource_lock); spin_lock_init(&dev_priv->hw_lock); spin_lock_init(&dev_priv->waiter_lock); - spin_lock_init(&dev_priv->cap_lock); spin_lock_init(&dev_priv->cursor_lock); ret = vmw_setup_pci_resources(dev_priv, pci_id); @@ -830,10 +869,12 @@ static int vmw_driver_load(struct vmw_private *dev_priv, u32 pci_id) ret = vmw_dma_select_mode(dev_priv); if (unlikely(ret != 0)) { - DRM_INFO("Restricting capabilities since DMA not available.\n"); + drm_info(&dev_priv->drm, + "Restricting capabilities since DMA not available.\n"); refuse_dma = true; if (dev_priv->capabilities & SVGA_CAP_GBOBJECTS) - DRM_INFO("Disabling 3D acceleration.\n"); + drm_info(&dev_priv->drm, + "Disabling 3D acceleration.\n"); } dev_priv->vram_size = vmw_read(dev_priv, SVGA_REG_VRAM_SIZE); @@ -879,9 +920,8 @@ static int vmw_driver_load(struct vmw_private *dev_priv, u32 pci_id) mem_size *= 3; dev_priv->max_mob_pages = mem_size * 1024 / PAGE_SIZE; - dev_priv->prim_bb_mem = - vmw_read(dev_priv, - SVGA_REG_MAX_PRIMARY_BOUNDING_BOX_MEM); + dev_priv->max_primary_mem = + vmw_read(dev_priv, SVGA_REG_MAX_PRIMARY_MEM); dev_priv->max_mob_size = vmw_read(dev_priv, SVGA_REG_MOB_MAX_SIZE); dev_priv->stdu_max_width = @@ -900,14 +940,25 @@ static int vmw_driver_load(struct vmw_private *dev_priv, u32 pci_id) } else { dev_priv->texture_max_width = 8192; dev_priv->texture_max_height = 8192; - dev_priv->prim_bb_mem = dev_priv->vram_size; + dev_priv->max_primary_mem = dev_priv->vram_size; } - - vmw_print_capabilities(dev_priv->capabilities); + drm_info(&dev_priv->drm, + "Legacy memory limits: VRAM = %llu kB, FIFO = %llu kB, surface = %u kB\n", + (u64)dev_priv->vram_size / 1024, + (u64)dev_priv->fifo_mem_size / 1024, + dev_priv->memory_size / 1024); + + drm_info(&dev_priv->drm, + "MOB limits: max mob size = %u kB, max mob pages = %u\n", + dev_priv->max_mob_size / 1024, dev_priv->max_mob_pages); + + vmw_print_bitmap(&dev_priv->drm, "Capabilities", + dev_priv->capabilities, + cap1_names, ARRAY_SIZE(cap1_names)); if (dev_priv->capabilities & SVGA_CAP_CAP2_REGISTER) - vmw_print_capabilities2(dev_priv->capabilities2); - DRM_INFO("Supports command queues = %d\n", - vmw_cmd_supported((dev_priv))); + vmw_print_bitmap(&dev_priv->drm, "Capabilities2", + dev_priv->capabilities2, + cap2_names, ARRAY_SIZE(cap2_names)); ret = vmw_dma_masks(dev_priv); if (unlikely(ret != 0)) @@ -916,15 +967,16 @@ static int vmw_driver_load(struct vmw_private *dev_priv, u32 pci_id) dma_set_max_seg_size(dev_priv->drm.dev, U32_MAX); if (dev_priv->capabilities & SVGA_CAP_GMR2) { - DRM_INFO("Max GMR ids is %u\n", + drm_info(&dev_priv->drm, + "Max GMR ids is %u\n", (unsigned)dev_priv->max_gmr_ids); - DRM_INFO("Max number of GMR pages is %u\n", + drm_info(&dev_priv->drm, + "Max number of GMR pages is %u\n", (unsigned)dev_priv->max_gmr_pages); - DRM_INFO("Max dedicated hypervisor surface memory is %u kiB\n", - (unsigned)dev_priv->memory_size / 1024); } - DRM_INFO("Maximum display memory size is %llu kiB\n", - (uint64_t)dev_priv->prim_bb_mem / 1024); + drm_info(&dev_priv->drm, + "Maximum display memory size is %llu kiB\n", + (uint64_t)dev_priv->max_primary_mem / 1024); /* Need mmio memory to check for fifo pitchlock cap. */ if (!(dev_priv->capabilities & SVGA_CAP_DISPLAY_TOPOLOGY) && @@ -939,7 +991,8 @@ static int vmw_driver_load(struct vmw_private *dev_priv, u32 pci_id) &vmw_prime_dmabuf_ops); if (unlikely(dev_priv->tdev == NULL)) { - DRM_ERROR("Unable to initialize TTM object management.\n"); + drm_err(&dev_priv->drm, + "Unable to initialize TTM object management.\n"); ret = -ENOMEM; goto out_err0; } @@ -947,7 +1000,8 @@ static int vmw_driver_load(struct vmw_private *dev_priv, u32 pci_id) if (dev_priv->capabilities & SVGA_CAP_IRQMASK) { ret = vmw_irq_install(&dev_priv->drm, pdev->irq); if (ret != 0) { - DRM_ERROR("Failed installing irq: %d\n", ret); + drm_err(&dev_priv->drm, + "Failed installing irq: %d\n", ret); goto out_no_irq; } } @@ -968,7 +1022,8 @@ static int vmw_driver_load(struct vmw_private *dev_priv, u32 pci_id) dev_priv->map_mode == vmw_dma_alloc_coherent, false); if (unlikely(ret != 0)) { - DRM_ERROR("Failed initializing TTM buffer object driver.\n"); + drm_err(&dev_priv->drm, + "Failed initializing TTM buffer object driver.\n"); goto out_no_bdev; } @@ -979,7 +1034,15 @@ static int vmw_driver_load(struct vmw_private *dev_priv, u32 pci_id) ret = vmw_vram_manager_init(dev_priv); if (unlikely(ret != 0)) { - DRM_ERROR("Failed initializing memory manager for VRAM.\n"); + drm_err(&dev_priv->drm, + "Failed initializing memory manager for VRAM.\n"); + goto out_no_vram; + } + + ret = vmw_devcaps_create(dev_priv); + if (unlikely(ret != 0)) { + drm_err(&dev_priv->drm, + "Failed initializing device caps.\n"); goto out_no_vram; } @@ -993,7 +1056,8 @@ static int vmw_driver_load(struct vmw_private *dev_priv, u32 pci_id) if (((dev_priv->capabilities & (SVGA_CAP_GMR | SVGA_CAP_GMR2)) == 0) || refuse_dma || vmw_gmrid_man_init(dev_priv, VMW_PL_GMR) != 0) { - DRM_INFO("No GMR memory available. " + drm_info(&dev_priv->drm, + "No GMR memory available. " "Graphics memory resources are very limited.\n"); dev_priv->has_gmr = false; } @@ -1002,18 +1066,16 @@ static int vmw_driver_load(struct vmw_private *dev_priv, u32 pci_id) dev_priv->has_mob = true; if (vmw_gmrid_man_init(dev_priv, VMW_PL_MOB) != 0) { - DRM_INFO("No MOB memory available. " + drm_info(&dev_priv->drm, + "No MOB memory available. " "3D will be disabled.\n"); dev_priv->has_mob = false; } } if (dev_priv->has_mob && (dev_priv->capabilities & SVGA_CAP_DX)) { - spin_lock(&dev_priv->cap_lock); - vmw_write(dev_priv, SVGA_REG_DEV_CAP, SVGA3D_DEVCAP_DXCONTEXT); - if (vmw_read(dev_priv, SVGA_REG_DEV_CAP)) + if (vmw_devcap_get(dev_priv, SVGA3D_DEVCAP_DXCONTEXT)) dev_priv->sm_type = VMW_SM_4; - spin_unlock(&dev_priv->cap_lock); } vmw_validation_mem_init_ttm(dev_priv, VMWGFX_VALIDATION_MEM_GRAN); @@ -1021,15 +1083,11 @@ static int vmw_driver_load(struct vmw_private *dev_priv, u32 pci_id) /* SVGA_CAP2_DX2 (DefineGBSurface_v3) is needed for SM4_1 support */ if (has_sm4_context(dev_priv) && (dev_priv->capabilities2 & SVGA_CAP2_DX2)) { - vmw_write(dev_priv, SVGA_REG_DEV_CAP, SVGA3D_DEVCAP_SM41); - - if (vmw_read(dev_priv, SVGA_REG_DEV_CAP)) + if (vmw_devcap_get(dev_priv, SVGA3D_DEVCAP_SM41)) dev_priv->sm_type = VMW_SM_4_1; - if (has_sm4_1_context(dev_priv) && - (dev_priv->capabilities2 & SVGA_CAP2_DX3)) { - vmw_write(dev_priv, SVGA_REG_DEV_CAP, SVGA3D_DEVCAP_SM5); - if (vmw_read(dev_priv, SVGA_REG_DEV_CAP)) + (dev_priv->capabilities2 & SVGA_CAP2_DX3)) { + if (vmw_devcap_get(dev_priv, SVGA3D_DEVCAP_SM5)) dev_priv->sm_type = VMW_SM_5; } } @@ -1043,14 +1101,7 @@ static int vmw_driver_load(struct vmw_private *dev_priv, u32 pci_id) if (ret) goto out_no_fifo; - if (dev_priv->sm_type == VMW_SM_5) - DRM_INFO("SM5 support available.\n"); - if (dev_priv->sm_type == VMW_SM_4_1) - DRM_INFO("SM4_1 support available.\n"); - if (dev_priv->sm_type == VMW_SM_4) - DRM_INFO("SM4 support available.\n"); - DRM_INFO("Running without reservation semaphore\n"); - + vmw_print_sm_type(dev_priv); vmw_host_printf("vmwgfx: Module Version: %d.%d.%d (kernel: %s)", VMWGFX_DRIVER_MAJOR, VMWGFX_DRIVER_MINOR, VMWGFX_DRIVER_PATCHLEVEL, UTS_RELEASE); @@ -1074,6 +1125,7 @@ out_no_kms: vmw_gmrid_man_fini(dev_priv, VMW_PL_MOB); if (dev_priv->has_gmr) vmw_gmrid_man_fini(dev_priv, VMW_PL_GMR); + vmw_devcaps_destroy(dev_priv); vmw_vram_manager_fini(dev_priv); out_no_vram: ttm_device_fini(&dev_priv->bdev); @@ -1122,6 +1174,7 @@ static void vmw_driver_unload(struct drm_device *dev) vmw_release_device_early(dev_priv); if (dev_priv->has_mob) vmw_gmrid_man_fini(dev_priv, VMW_PL_MOB); + vmw_devcaps_destroy(dev_priv); vmw_vram_manager_fini(dev_priv); ttm_device_fini(&dev_priv->bdev); drm_vma_offset_manager_destroy(&dev_priv->vma_manager); @@ -1137,6 +1190,8 @@ static void vmw_driver_unload(struct drm_device *dev) for (i = vmw_res_context; i < vmw_res_max; ++i) idr_destroy(&dev_priv->res_idr[i]); + vmw_mksstat_remove_all(dev_priv); + pci_release_regions(pdev); } @@ -1560,7 +1615,7 @@ static int vmw_probe(struct pci_dev *pdev, const struct pci_device_id *ent) struct vmw_private *vmw; int ret; - ret = drm_aperture_remove_conflicting_pci_framebuffers(pdev, "svgadrmfb"); + ret = drm_aperture_remove_conflicting_pci_framebuffers(pdev, &driver); if (ret) return ret; diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h index 5652d982b1ce..a833751099b5 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 OR MIT */ /************************************************************************** * - * Copyright 2009-2015 VMware, Inc., Palo Alto, CA., USA + * Copyright 2009-2021 VMware, Inc., Palo Alto, CA., USA * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the @@ -54,10 +54,10 @@ #define VMWGFX_DRIVER_NAME "vmwgfx" -#define VMWGFX_DRIVER_DATE "20210218" +#define VMWGFX_DRIVER_DATE "20210722" #define VMWGFX_DRIVER_MAJOR 2 -#define VMWGFX_DRIVER_MINOR 18 -#define VMWGFX_DRIVER_PATCHLEVEL 1 +#define VMWGFX_DRIVER_MINOR 19 +#define VMWGFX_DRIVER_PATCHLEVEL 0 #define VMWGFX_FIFO_STATIC_SIZE (1024*1024) #define VMWGFX_MAX_RELOCATIONS 2048 #define VMWGFX_MAX_VALIDATIONS 2048 @@ -91,6 +91,9 @@ #define VMW_RES_FENCE ttm_driver_type3 #define VMW_RES_SHADER ttm_driver_type4 +#define MKSSTAT_CAPACITY_LOG2 5U +#define MKSSTAT_CAPACITY (1U << MKSSTAT_CAPACITY_LOG2) + struct vmw_fpriv { struct ttm_object_file *tfile; bool gb_aware; /* user-space is guest-backed aware */ @@ -311,7 +314,6 @@ struct vmw_res_cache_entry { * enum vmw_dma_map_mode - indicate how to perform TTM page dma mappings. */ enum vmw_dma_map_mode { - vmw_dma_phys, /* Use physical page addresses */ vmw_dma_alloc_coherent, /* Use TTM coherent pages */ vmw_dma_map_populate, /* Unmap from DMA just after unpopulate */ vmw_dma_map_bind, /* Unmap from DMA just before unbind */ @@ -356,7 +358,6 @@ struct vmw_piter { unsigned long num_pages; bool (*next)(struct vmw_piter *); dma_addr_t (*dma_address)(struct vmw_piter *); - struct page *(*page)(struct vmw_piter *); }; /* @@ -366,7 +367,8 @@ enum vmw_display_unit_type { vmw_du_invalid = 0, vmw_du_legacy, vmw_du_screen_object, - vmw_du_screen_target + vmw_du_screen_target, + vmw_du_max }; struct vmw_validation_context; @@ -486,12 +488,11 @@ struct vmw_private { struct ttm_device bdev; struct drm_vma_offset_manager vma_manager; - unsigned long pci_id; - u32 vmw_chipset; + u32 pci_id; resource_size_t io_start; resource_size_t vram_start; resource_size_t vram_size; - resource_size_t prim_bb_mem; + resource_size_t max_primary_mem; u32 __iomem *rmmio; u32 *fifo_mem; resource_size_t fifo_mem_size; @@ -513,7 +514,6 @@ struct vmw_private { bool has_gmr; bool has_mob; spinlock_t hw_lock; - spinlock_t cap_lock; bool assume_16bpp; enum vmw_sm_type sm_type; @@ -629,6 +629,20 @@ struct vmw_private { /* Validation memory reservation */ struct vmw_validation_mem vvm; + + uint32 *devcaps; + + /* + * mksGuestStat instance-descriptor and pid arrays + */ + struct page *mksstat_user_pages[MKSSTAT_CAPACITY]; + atomic_t mksstat_user_pids[MKSSTAT_CAPACITY]; + +#if IS_ENABLED(CONFIG_DRM_VMWGFX_MKSSTATS) + struct page *mksstat_kern_pages[MKSSTAT_CAPACITY]; + u8 mksstat_kern_top_timer[MKSSTAT_CAPACITY]; + atomic_t mksstat_kern_pids[MKSSTAT_CAPACITY]; +#endif }; static inline struct vmw_surface *vmw_res_to_srf(struct vmw_resource *res) @@ -1073,7 +1087,7 @@ static inline dma_addr_t vmw_piter_dma_addr(struct vmw_piter *viter) */ static inline struct page *vmw_piter_page(struct vmw_piter *viter) { - return viter->page(viter); + return viter->pages[viter->i]; } /** @@ -1281,7 +1295,6 @@ extern struct vmw_cmdbuf_res_manager * vmw_context_res_man(struct vmw_resource *ctx); extern struct vmw_resource *vmw_context_cotable(struct vmw_resource *ctx, SVGACOTableType cotable_type); -extern struct list_head *vmw_context_binding_list(struct vmw_resource *ctx); struct vmw_ctx_binding_state; extern struct vmw_ctx_binding_state * vmw_context_binding_state(struct vmw_resource *ctx); @@ -1502,6 +1515,17 @@ __printf(1, 2) int vmw_host_printf(const char *fmt, ...); int vmw_msg_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv); +/* Host mksGuestStats -vmwgfx_msg.c: */ +int vmw_mksstat_get_kern_slot(pid_t pid, struct vmw_private *dev_priv); + +int vmw_mksstat_reset_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_priv); +int vmw_mksstat_add_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_priv); +int vmw_mksstat_remove_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_priv); +int vmw_mksstat_remove_all(struct vmw_private *dev_priv); + /* VMW logging */ /** diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c index a2b8464b3f56..5f2ffa9de5c8 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c @@ -32,6 +32,7 @@ #include <drm/ttm/ttm_placement.h> #include "vmwgfx_so.h" #include "vmwgfx_binding.h" +#include "vmwgfx_mksstat.h" #define VMW_RES_HT_ORDER 12 @@ -2364,7 +2365,7 @@ static int vmw_cmd_dx_set_rendertargets(struct vmw_private *dev_priv, sizeof(SVGA3dRenderTargetViewId); int ret; - if (num_rt_view > SVGA3D_MAX_SIMULTANEOUS_RENDER_TARGETS) { + if (num_rt_view > SVGA3D_DX_MAX_RENDER_TARGETS) { VMW_DEBUG_USER("Invalid DX Rendertarget binding.\n"); return -EINVAL; } @@ -2546,6 +2547,8 @@ static int vmw_cmd_dx_so_define(struct vmw_private *dev_priv, so_type = vmw_so_cmd_to_type(header->id); res = vmw_context_cotable(ctx_node->ctx, vmw_so_cotables[so_type]); + if (IS_ERR(res)) + return PTR_ERR(res); cmd = container_of(header, typeof(*cmd), header); ret = vmw_cotable_notify(res, cmd->defined_id); @@ -4406,6 +4409,9 @@ int vmw_execbuf_ioctl(struct drm_device *dev, void *data, int ret; struct dma_fence *in_fence = NULL; + MKS_STAT_TIME_DECL(MKSSTAT_KERN_EXECBUF); + MKS_STAT_TIME_PUSH(MKSSTAT_KERN_EXECBUF); + /* * Extend the ioctl argument while maintaining backwards compatibility: * We take different code paths depending on the value of arg->version. @@ -4415,7 +4421,8 @@ int vmw_execbuf_ioctl(struct drm_device *dev, void *data, if (unlikely(arg->version > DRM_VMW_EXECBUF_VERSION || arg->version == 0)) { VMW_DEBUG_USER("Incorrect execbuf version.\n"); - return -EINVAL; + ret = -EINVAL; + goto mksstats_out; } switch (arg->version) { @@ -4435,7 +4442,8 @@ int vmw_execbuf_ioctl(struct drm_device *dev, void *data, if (!in_fence) { VMW_DEBUG_USER("Cannot get imported fence\n"); - return -EINVAL; + ret = -EINVAL; + goto mksstats_out; } ret = vmw_wait_dma_fence(dev_priv->fman, in_fence); @@ -4458,5 +4466,8 @@ int vmw_execbuf_ioctl(struct drm_device *dev, void *data, out: if (in_fence) dma_fence_put(in_fence); + +mksstats_out: + MKS_STAT_TIME_POP(MKSSTAT_KERN_EXECBUF); return ret; } diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_gmrid_manager.c b/drivers/gpu/drm/vmwgfx/vmwgfx_gmrid_manager.c index 28ceb749a733..b2c4af331c9d 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_gmrid_manager.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_gmrid_manager.c @@ -71,8 +71,40 @@ static int vmw_gmrid_man_get_node(struct ttm_resource_manager *man, if (gman->max_gmr_pages > 0) { gman->used_gmr_pages += (*res)->num_pages; - if (unlikely(gman->used_gmr_pages > gman->max_gmr_pages)) - goto nospace; + /* + * Because the graphics memory is a soft limit we can try to + * expand it instead of letting the userspace apps crash. + * We're just going to have a sane limit (half of RAM) + * on the number of MOB's that we create and will try to keep + * the system running until we reach that. + */ + if (unlikely(gman->used_gmr_pages > gman->max_gmr_pages)) { + const unsigned long max_graphics_pages = totalram_pages() / 2; + uint32_t new_max_pages = 0; + + DRM_WARN("vmwgfx: mob memory overflow. Consider increasing guest RAM and graphicsMemory.\n"); + vmw_host_printf("vmwgfx, warning: mob memory overflow. Consider increasing guest RAM and graphicsMemory.\n"); + + if (gman->max_gmr_pages > (max_graphics_pages / 2)) { + DRM_WARN("vmwgfx: guest requires more than half of RAM for graphics.\n"); + new_max_pages = max_graphics_pages; + } else + new_max_pages = gman->max_gmr_pages * 2; + if (new_max_pages > gman->max_gmr_pages && new_max_pages >= gman->used_gmr_pages) { + DRM_WARN("vmwgfx: increasing guest mob limits to %u kB.\n", + ((new_max_pages) << (PAGE_SHIFT - 10))); + + gman->max_gmr_pages = new_max_pages; + } else { + char buf[256]; + snprintf(buf, sizeof(buf), + "vmwgfx, error: guest graphics is out of memory (mob limit at: %ukB).\n", + ((gman->max_gmr_pages) << (PAGE_SHIFT - 10))); + vmw_host_printf(buf); + DRM_WARN("%s", buf); + goto nospace; + } + } } (*res)->start = id; diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_ioctl.c b/drivers/gpu/drm/vmwgfx/vmwgfx_ioctl.c index 4fdacf9924e6..28af34ab6ed6 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_ioctl.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_ioctl.c @@ -26,14 +26,9 @@ **************************************************************************/ #include "vmwgfx_drv.h" +#include "vmwgfx_devcaps.h" #include <drm/vmwgfx_drm.h> #include "vmwgfx_kms.h" -#include "device_include/svga3d_caps.h" - -struct svga_3d_compat_cap { - SVGA3dCapsRecordHeader header; - SVGA3dCapPair pairs[SVGA3D_DEVCAP_MAX]; -}; int vmw_getparam_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv) @@ -63,7 +58,7 @@ int vmw_getparam_ioctl(struct drm_device *dev, void *data, param->value = vmw_fifo_caps(dev_priv); break; case DRM_VMW_PARAM_MAX_FB_SIZE: - param->value = dev_priv->prim_bb_mem; + param->value = dev_priv->max_primary_mem; break; case DRM_VMW_PARAM_FIFO_HW_VERSION: { @@ -88,16 +83,7 @@ int vmw_getparam_ioctl(struct drm_device *dev, void *data, param->value = dev_priv->memory_size; break; case DRM_VMW_PARAM_3D_CAPS_SIZE: - if ((dev_priv->capabilities & SVGA_CAP_GBOBJECTS) && - vmw_fp->gb_aware) - param->value = SVGA3D_DEVCAP_MAX * sizeof(uint32_t); - else if (dev_priv->capabilities & SVGA_CAP_GBOBJECTS) - param->value = sizeof(struct svga_3d_compat_cap) + - sizeof(uint32_t); - else - param->value = (SVGA_FIFO_3D_CAPS_LAST - - SVGA_FIFO_3D_CAPS + 1) * - sizeof(uint32_t); + param->value = vmw_devcaps_size(dev_priv, vmw_fp->gb_aware); break; case DRM_VMW_PARAM_MAX_MOB_MEMORY: vmw_fp->gb_aware = true; @@ -126,55 +112,6 @@ int vmw_getparam_ioctl(struct drm_device *dev, void *data, return 0; } -static u32 vmw_mask_legacy_multisample(unsigned int cap, u32 fmt_value) -{ - /* - * A version of user-space exists which use MULTISAMPLE_MASKABLESAMPLES - * to check the sample count supported by virtual device. Since there - * never was support for multisample count for backing MOB return 0. - * - * MULTISAMPLE_MASKABLESAMPLES devcap is marked as deprecated by virtual - * device. - */ - if (cap == SVGA3D_DEVCAP_DEAD5) - return 0; - - return fmt_value; -} - -static int vmw_fill_compat_cap(struct vmw_private *dev_priv, void *bounce, - size_t size) -{ - struct svga_3d_compat_cap *compat_cap = - (struct svga_3d_compat_cap *) bounce; - unsigned int i; - size_t pair_offset = offsetof(struct svga_3d_compat_cap, pairs); - unsigned int max_size; - - if (size < pair_offset) - return -EINVAL; - - max_size = (size - pair_offset) / sizeof(SVGA3dCapPair); - - if (max_size > SVGA3D_DEVCAP_MAX) - max_size = SVGA3D_DEVCAP_MAX; - - compat_cap->header.length = - (pair_offset + max_size * sizeof(SVGA3dCapPair)) / sizeof(u32); - compat_cap->header.type = SVGA3DCAPS_RECORD_DEVCAPS; - - spin_lock(&dev_priv->cap_lock); - for (i = 0; i < max_size; ++i) { - vmw_write(dev_priv, SVGA_REG_DEV_CAP, i); - compat_cap->pairs[i][0] = i; - compat_cap->pairs[i][1] = vmw_mask_legacy_multisample - (i, vmw_read(dev_priv, SVGA_REG_DEV_CAP)); - } - spin_unlock(&dev_priv->cap_lock); - - return 0; -} - int vmw_get_cap_3d_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv) @@ -183,11 +120,9 @@ int vmw_get_cap_3d_ioctl(struct drm_device *dev, void *data, (struct drm_vmw_get_3d_cap_arg *) data; struct vmw_private *dev_priv = vmw_priv(dev); uint32_t size; - u32 *fifo_mem; void __user *buffer = (void __user *)((unsigned long)(arg->buffer)); - void *bounce; + void *bounce = NULL; int ret; - bool gb_objects = !!(dev_priv->capabilities & SVGA_CAP_GBOBJECTS); struct vmw_fpriv *vmw_fp = vmw_fpriv(file_priv); if (unlikely(arg->pad64 != 0 || arg->max_size == 0)) { @@ -195,13 +130,11 @@ int vmw_get_cap_3d_ioctl(struct drm_device *dev, void *data, return -EINVAL; } - if (gb_objects && vmw_fp->gb_aware) - size = SVGA3D_DEVCAP_MAX * sizeof(uint32_t); - else if (gb_objects) - size = sizeof(struct svga_3d_compat_cap) + sizeof(uint32_t); - else - size = (SVGA_FIFO_3D_CAPS_LAST - SVGA_FIFO_3D_CAPS + 1) * - sizeof(uint32_t); + size = vmw_devcaps_size(dev_priv, vmw_fp->gb_aware); + if (unlikely(size == 0)) { + DRM_ERROR("Failed to figure out the devcaps size (no 3D).\n"); + return -ENOMEM; + } if (arg->max_size < size) size = arg->max_size; @@ -212,29 +145,9 @@ int vmw_get_cap_3d_ioctl(struct drm_device *dev, void *data, return -ENOMEM; } - if (gb_objects && vmw_fp->gb_aware) { - int i, num; - uint32_t *bounce32 = (uint32_t *) bounce; - - num = size / sizeof(uint32_t); - if (num > SVGA3D_DEVCAP_MAX) - num = SVGA3D_DEVCAP_MAX; - - spin_lock(&dev_priv->cap_lock); - for (i = 0; i < num; ++i) { - vmw_write(dev_priv, SVGA_REG_DEV_CAP, i); - *bounce32++ = vmw_mask_legacy_multisample - (i, vmw_read(dev_priv, SVGA_REG_DEV_CAP)); - } - spin_unlock(&dev_priv->cap_lock); - } else if (gb_objects) { - ret = vmw_fill_compat_cap(dev_priv, bounce, size); - if (unlikely(ret != 0)) - goto out_err; - } else { - fifo_mem = dev_priv->fifo_mem; - memcpy(bounce, &fifo_mem[SVGA_FIFO_3D_CAPS], size); - } + ret = vmw_devcaps_copy(dev_priv, vmw_fp->gb_aware, bounce, size); + if (unlikely (ret != 0)) + goto out_err; ret = copy_to_user(buffer, bounce, size); if (ret) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_irq.c b/drivers/gpu/drm/vmwgfx/vmwgfx_irq.c index b9a9b7ddadbd..c5191de365ca 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_irq.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_irq.c @@ -25,6 +25,7 @@ * **************************************************************************/ +#include <linux/pci.h> #include <linux/sched/signal.h> #include "vmwgfx_drv.h" @@ -287,21 +288,18 @@ static void vmw_irq_preinstall(struct drm_device *dev) void vmw_irq_uninstall(struct drm_device *dev) { struct vmw_private *dev_priv = vmw_priv(dev); + struct pci_dev *pdev = to_pci_dev(dev->dev); uint32_t status; if (!(dev_priv->capabilities & SVGA_CAP_IRQMASK)) return; - if (!dev->irq_enabled) - return; - vmw_write(dev_priv, SVGA_REG_IRQMASK, 0); status = vmw_irq_status_read(dev_priv); vmw_irq_status_write(dev_priv, status); - dev->irq_enabled = false; - free_irq(dev->irq, dev); + free_irq(pdev->irq, dev); } /** @@ -313,20 +311,8 @@ void vmw_irq_uninstall(struct drm_device *dev) */ int vmw_irq_install(struct drm_device *dev, int irq) { - int ret; - - if (dev->irq_enabled) - return -EBUSY; - vmw_irq_preinstall(dev); - ret = request_threaded_irq(irq, vmw_irq_handler, vmw_thread_fn, - IRQF_SHARED, VMWGFX_DRIVER_NAME, dev); - if (ret < 0) - return ret; - - dev->irq_enabled = true; - dev->irq = irq; - - return ret; + return request_threaded_irq(irq, vmw_irq_handler, vmw_thread_fn, + IRQF_SHARED, VMWGFX_DRIVER_NAME, dev); } diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c index 220f9fd0d420..74fa41909213 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c @@ -100,7 +100,7 @@ static int vmw_cursor_update_bo(struct vmw_private *dev_priv, int ret; kmap_offset = 0; - kmap_num = (width*height*4 + PAGE_SIZE - 1) >> PAGE_SHIFT; + kmap_num = PFN_UP(width*height*4); ret = ttm_bo_reserve(&bo->base, true, false, NULL); if (unlikely(ret != 0)) { @@ -1487,7 +1487,7 @@ static int vmw_kms_check_display_memory(struct drm_device *dev, * SVGA_REG_MAX_PRIMARY_BOUNDING_BOX_MEM is not present vram size is * limit on primary bounding box */ - if (pixel_mem > dev_priv->prim_bb_mem) { + if (pixel_mem > dev_priv->max_primary_mem) { VMW_DEBUG_KMS("Combined output size too large.\n"); return -EINVAL; } @@ -1497,7 +1497,7 @@ static int vmw_kms_check_display_memory(struct drm_device *dev, !(dev_priv->capabilities & SVGA_CAP_NO_BB_RESTRICTION)) { bb_mem = (u64) bounding_box.x2 * bounding_box.y2 * 4; - if (bb_mem > dev_priv->prim_bb_mem) { + if (bb_mem > dev_priv->max_primary_mem) { VMW_DEBUG_KMS("Topology is beyond supported limits.\n"); return -EINVAL; } @@ -1793,6 +1793,13 @@ int vmw_kms_init(struct vmw_private *dev_priv) { struct drm_device *dev = &dev_priv->drm; int ret; + static const char *display_unit_names[] = { + "Invalid", + "Legacy", + "Screen Object", + "Screen Target", + "Invalid (max)" + }; drm_mode_config_init(dev); dev->mode_config.funcs = &vmw_kms_funcs; @@ -1810,6 +1817,9 @@ int vmw_kms_init(struct vmw_private *dev_priv) if (ret) /* Fallback */ ret = vmw_kms_ldu_init_display(dev_priv); } + BUILD_BUG_ON(ARRAY_SIZE(display_unit_names) != (vmw_du_max + 1)); + drm_info(&dev_priv->drm, "%s display unit initialized\n", + display_unit_names[dev_priv->active_display_unit]); return ret; } @@ -1897,7 +1907,7 @@ bool vmw_kms_validate_mode_vram(struct vmw_private *dev_priv, { return ((u64) pitch * (u64) height) < (u64) ((dev_priv->active_display_unit == vmw_du_screen_target) ? - dev_priv->prim_bb_mem : dev_priv->vram_size); + dev_priv->max_primary_mem : dev_priv->vram_size); } diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_ldu.c b/drivers/gpu/drm/vmwgfx/vmwgfx_ldu.c index d85c7eab9469..fb58a71c458f 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_ldu.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_ldu.c @@ -493,8 +493,7 @@ int vmw_kms_ldu_init_display(struct vmw_private *dev_priv) struct drm_device *dev = &dev_priv->drm; int i, ret; - if (dev_priv->ldu_priv) { - DRM_INFO("ldu system already on\n"); + if (unlikely(dev_priv->ldu_priv)) { return -EINVAL; } @@ -527,8 +526,6 @@ int vmw_kms_ldu_init_display(struct vmw_private *dev_priv) drm_mode_config_reset(dev); - DRM_INFO("Legacy Display Unit initialized\n"); - return 0; err_free: diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_mksstat.h b/drivers/gpu/drm/vmwgfx/vmwgfx_mksstat.h new file mode 100644 index 000000000000..0509f55f07b4 --- /dev/null +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_mksstat.h @@ -0,0 +1,144 @@ +/* SPDX-License-Identifier: GPL-2.0 OR MIT */ +/************************************************************************** + * + * Copyright 2021 VMware, Inc., Palo Alto, CA., USA + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef _VMWGFX_MKSSTAT_H_ +#define _VMWGFX_MKSSTAT_H_ + +#include <asm/page.h> + +/* Reservation marker for mksstat pid's */ +#define MKSSTAT_PID_RESERVED -1 + +#if IS_ENABLED(CONFIG_DRM_VMWGFX_MKSSTATS) +/* + * Kernel-internal mksGuestStat counters. The order of this enum dictates the + * order of instantiation of these counters in the mksGuestStat pages. + */ + +typedef enum { + MKSSTAT_KERN_EXECBUF, /* vmw_execbuf_ioctl */ + + MKSSTAT_KERN_COUNT /* Reserved entry; always last */ +} mksstat_kern_stats_t; + +/** + * vmw_mksstat_get_kern_pstat: Computes the address of the MKSGuestStatCounterTime + * array from the address of the base page. + * + * @page_addr: Pointer to the base page. + * Return: Pointer to the MKSGuestStatCounterTime array. + */ + +static inline void *vmw_mksstat_get_kern_pstat(void *page_addr) +{ + return page_addr + PAGE_SIZE * 1; +} + +/** + * vmw_mksstat_get_kern_pinfo: Computes the address of the MKSGuestStatInfoEntry + * array from the address of the base page. + * + * @page_addr: Pointer to the base page. + * Return: Pointer to the MKSGuestStatInfoEntry array. + */ + +static inline void *vmw_mksstat_get_kern_pinfo(void *page_addr) +{ + return page_addr + PAGE_SIZE * 2; +} + +/** + * vmw_mksstat_get_kern_pstrs: Computes the address of the mksGuestStat strings + * sequence from the address of the base page. + * + * @page_addr: Pointer to the base page. + * Return: Pointer to the mksGuestStat strings sequence. + */ + +static inline void *vmw_mksstat_get_kern_pstrs(void *page_addr) +{ + return page_addr + PAGE_SIZE * 3; +} + +/* + * MKS_STAT_TIME_DECL/PUSH/POP macros to be used in timer-counted routines. + */ + +struct mksstat_timer_t { +/* mutable */ mksstat_kern_stats_t old_top; + const u64 t0; + const int slot; +}; + +#define MKS_STAT_TIME_DECL(kern_cntr) \ + struct mksstat_timer_t _##kern_cntr = { \ + .t0 = rdtsc(), \ + .slot = vmw_mksstat_get_kern_slot(current->pid, dev_priv) \ + } + +#define MKS_STAT_TIME_PUSH(kern_cntr) \ + do { \ + if (_##kern_cntr.slot >= 0) { \ + _##kern_cntr.old_top = dev_priv->mksstat_kern_top_timer[_##kern_cntr.slot]; \ + dev_priv->mksstat_kern_top_timer[_##kern_cntr.slot] = kern_cntr; \ + } \ + } while (0) + +#define MKS_STAT_TIME_POP(kern_cntr) \ + do { \ + if (_##kern_cntr.slot >= 0) { \ + const pid_t pid = atomic_cmpxchg(&dev_priv->mksstat_kern_pids[_##kern_cntr.slot], current->pid, MKSSTAT_PID_RESERVED); \ + dev_priv->mksstat_kern_top_timer[_##kern_cntr.slot] = _##kern_cntr.old_top; \ + \ + if (pid == current->pid) { \ + const u64 dt = rdtsc() - _##kern_cntr.t0; \ + MKSGuestStatCounterTime *pstat; \ + \ + BUG_ON(!dev_priv->mksstat_kern_pages[_##kern_cntr.slot]); \ + \ + pstat = vmw_mksstat_get_kern_pstat(page_address(dev_priv->mksstat_kern_pages[_##kern_cntr.slot])); \ + \ + atomic64_inc(&pstat[kern_cntr].counter.count); \ + atomic64_add(dt, &pstat[kern_cntr].selfCycles); \ + atomic64_add(dt, &pstat[kern_cntr].totalCycles); \ + \ + if (_##kern_cntr.old_top != MKSSTAT_KERN_COUNT) \ + atomic64_sub(dt, &pstat[_##kern_cntr.old_top].selfCycles); \ + \ + atomic_set(&dev_priv->mksstat_kern_pids[_##kern_cntr.slot], current->pid); \ + } \ + } \ + } while (0) + +#else +#define MKS_STAT_TIME_DECL(kern_cntr) +#define MKS_STAT_TIME_PUSH(kern_cntr) +#define MKS_STAT_TIME_POP(kern_cntr) + +#endif /* IS_ENABLED(CONFIG_DRM_VMWGFX_MKSSTATS */ + +#endif diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_mob.c b/drivers/gpu/drm/vmwgfx/vmwgfx_mob.c index f2d625415458..f9394207dd3c 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_mob.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_mob.c @@ -37,14 +37,14 @@ #ifdef CONFIG_64BIT #define VMW_PPN_SIZE 8 -#define VMW_MOBFMT_PTDEPTH_0 SVGA3D_MOBFMT_PTDEPTH64_0 -#define VMW_MOBFMT_PTDEPTH_1 SVGA3D_MOBFMT_PTDEPTH64_1 -#define VMW_MOBFMT_PTDEPTH_2 SVGA3D_MOBFMT_PTDEPTH64_2 +#define VMW_MOBFMT_PTDEPTH_0 SVGA3D_MOBFMT_PT64_0 +#define VMW_MOBFMT_PTDEPTH_1 SVGA3D_MOBFMT_PT64_1 +#define VMW_MOBFMT_PTDEPTH_2 SVGA3D_MOBFMT_PT64_2 #else #define VMW_PPN_SIZE 4 -#define VMW_MOBFMT_PTDEPTH_0 SVGA3D_MOBFMT_PTDEPTH_0 -#define VMW_MOBFMT_PTDEPTH_1 SVGA3D_MOBFMT_PTDEPTH_1 -#define VMW_MOBFMT_PTDEPTH_2 SVGA3D_MOBFMT_PTDEPTH_2 +#define VMW_MOBFMT_PTDEPTH_0 SVGA3D_MOBFMT_PT_0 +#define VMW_MOBFMT_PTDEPTH_1 SVGA3D_MOBFMT_PT_1 +#define VMW_MOBFMT_PTDEPTH_2 SVGA3D_MOBFMT_PT_2 #endif /* @@ -70,20 +70,20 @@ struct vmw_mob { * @page_table: Pointer to a struct vmw_mob holding the page table. */ static const struct vmw_otable pre_dx_tables[] = { - {VMWGFX_NUM_MOB * SVGA3D_OTABLE_MOB_ENTRY_SIZE, NULL, true}, - {VMWGFX_NUM_GB_SURFACE * SVGA3D_OTABLE_SURFACE_ENTRY_SIZE, NULL, true}, - {VMWGFX_NUM_GB_CONTEXT * SVGA3D_OTABLE_CONTEXT_ENTRY_SIZE, NULL, true}, - {VMWGFX_NUM_GB_SHADER * SVGA3D_OTABLE_SHADER_ENTRY_SIZE, NULL, true}, - {VMWGFX_NUM_GB_SCREEN_TARGET * SVGA3D_OTABLE_SCREEN_TARGET_ENTRY_SIZE, + {VMWGFX_NUM_MOB * sizeof(SVGAOTableMobEntry), NULL, true}, + {VMWGFX_NUM_GB_SURFACE * sizeof(SVGAOTableSurfaceEntry), NULL, true}, + {VMWGFX_NUM_GB_CONTEXT * sizeof(SVGAOTableContextEntry), NULL, true}, + {VMWGFX_NUM_GB_SHADER * sizeof(SVGAOTableShaderEntry), NULL, true}, + {VMWGFX_NUM_GB_SCREEN_TARGET * sizeof(SVGAOTableScreenTargetEntry), NULL, VMWGFX_ENABLE_SCREEN_TARGET_OTABLE} }; static const struct vmw_otable dx_tables[] = { - {VMWGFX_NUM_MOB * SVGA3D_OTABLE_MOB_ENTRY_SIZE, NULL, true}, - {VMWGFX_NUM_GB_SURFACE * SVGA3D_OTABLE_SURFACE_ENTRY_SIZE, NULL, true}, - {VMWGFX_NUM_GB_CONTEXT * SVGA3D_OTABLE_CONTEXT_ENTRY_SIZE, NULL, true}, - {VMWGFX_NUM_GB_SHADER * SVGA3D_OTABLE_SHADER_ENTRY_SIZE, NULL, true}, - {VMWGFX_NUM_GB_SCREEN_TARGET * SVGA3D_OTABLE_SCREEN_TARGET_ENTRY_SIZE, + {VMWGFX_NUM_MOB * sizeof(SVGAOTableMobEntry), NULL, true}, + {VMWGFX_NUM_GB_SURFACE * sizeof(SVGAOTableSurfaceEntry), NULL, true}, + {VMWGFX_NUM_GB_CONTEXT * sizeof(SVGAOTableContextEntry), NULL, true}, + {VMWGFX_NUM_GB_SHADER * sizeof(SVGAOTableShaderEntry), NULL, true}, + {VMWGFX_NUM_GB_SCREEN_TARGET * sizeof(SVGAOTableScreenTargetEntry), NULL, VMWGFX_ENABLE_SCREEN_TARGET_OTABLE}, {VMWGFX_NUM_DXCONTEXT * sizeof(SVGAOTableDXContextEntry), NULL, true}, }; @@ -155,7 +155,7 @@ static int vmw_setup_otable_base(struct vmw_private *dev_priv, goto out_no_populate; vmw_mob_pt_setup(mob, iter, otable->size >> PAGE_SHIFT); - mob->pt_level += VMW_MOBFMT_PTDEPTH_1 - SVGA3D_MOBFMT_PTDEPTH_1; + mob->pt_level += VMW_MOBFMT_PTDEPTH_1 - SVGA3D_MOBFMT_PT_1; } cmd = VMW_CMD_RESERVE(dev_priv, sizeof(*cmd)); @@ -256,8 +256,7 @@ static int vmw_otable_batch_setup(struct vmw_private *dev_priv, if (!otables[i].enabled) continue; - otables[i].size = - (otables[i].size + PAGE_SIZE - 1) & PAGE_MASK; + otables[i].size = PFN_ALIGN(otables[i].size); bo_size += otables[i].size; } @@ -385,7 +384,7 @@ static unsigned long vmw_mob_calculate_pt_pages(unsigned long data_pages) while (likely(data_size > PAGE_SIZE)) { data_size = DIV_ROUND_UP(data_size, PAGE_SIZE); data_size *= VMW_PPN_SIZE; - tot_size += (data_size + PAGE_SIZE - 1) & PAGE_MASK; + tot_size += PFN_ALIGN(data_size); } return tot_size >> PAGE_SHIFT; @@ -506,11 +505,13 @@ static void vmw_mob_pt_setup(struct vmw_mob *mob, { unsigned long num_pt_pages = 0; struct ttm_buffer_object *bo = mob->pt_bo; - struct vmw_piter save_pt_iter; + struct vmw_piter save_pt_iter = {0}; struct vmw_piter pt_iter; const struct vmw_sg_table *vsgt; int ret; + BUG_ON(num_data_pages == 0); + ret = ttm_bo_reserve(bo, false, true, NULL); BUG_ON(ret != 0); @@ -633,7 +634,7 @@ int vmw_mob_bind(struct vmw_private *dev_priv, vmw_mob_pt_setup(mob, data_iter, num_data_pages); pt_set_up = true; - mob->pt_level += VMW_MOBFMT_PTDEPTH_1 - SVGA3D_MOBFMT_PTDEPTH_1; + mob->pt_level += VMW_MOBFMT_PTDEPTH_1 - SVGA3D_MOBFMT_PT_1; } vmw_fifo_resource_inc(dev_priv); diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_msg.c b/drivers/gpu/drm/vmwgfx/vmwgfx_msg.c index 3d08f5700bdb..e50fb82a3030 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_msg.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_msg.c @@ -31,10 +31,12 @@ #include <linux/mem_encrypt.h> #include <asm/hypervisor.h> +#include <drm/drm_ioctl.h> #include "vmwgfx_drv.h" #include "vmwgfx_msg_x86.h" #include "vmwgfx_msg_arm64.h" +#include "vmwgfx_mksstat.h" #define MESSAGE_STATUS_SUCCESS 0x0001 #define MESSAGE_STATUS_DORECV 0x0002 @@ -56,6 +58,11 @@ #define VMW_PORT_CMD_RECVSIZE (MSG_TYPE_RECVSIZE << 16 | VMW_PORT_CMD_MSG) #define VMW_PORT_CMD_RECVSTATUS (MSG_TYPE_RECVSTATUS << 16 | VMW_PORT_CMD_MSG) +#define VMW_PORT_CMD_MKS_GUEST_STATS 85 +#define VMW_PORT_CMD_MKSGS_RESET (0 << 16 | VMW_PORT_CMD_MKS_GUEST_STATS) +#define VMW_PORT_CMD_MKSGS_ADD_PPN (1 << 16 | VMW_PORT_CMD_MKS_GUEST_STATS) +#define VMW_PORT_CMD_MKSGS_REMOVE_PPN (2 << 16 | VMW_PORT_CMD_MKS_GUEST_STATS) + #define HIGH_WORD(X) ((X & 0xFFFF0000) >> 16) #define MAX_USER_MSG_LENGTH PAGE_SIZE @@ -155,6 +162,7 @@ static unsigned long vmw_port_hb_out(struct rpc_channel *channel, /* HB port can't access encrypted memory. */ if (hb && !mem_encrypt_active()) { unsigned long bp = channel->cookie_high; + u32 channel_id = (channel->channel_id << 16); si = (uintptr_t) msg; di = channel->cookie_low; @@ -162,7 +170,7 @@ static unsigned long vmw_port_hb_out(struct rpc_channel *channel, VMW_PORT_HB_OUT( (MESSAGE_STATUS_SUCCESS << 16) | VMW_PORT_CMD_HB_MSG, msg_len, si, di, - VMWARE_HYPERVISOR_HB | (channel->channel_id << 16) | + VMWARE_HYPERVISOR_HB | channel_id | VMWARE_HYPERVISOR_OUT, VMW_HYPERVISOR_MAGIC, bp, eax, ebx, ecx, edx, si, di); @@ -210,6 +218,7 @@ static unsigned long vmw_port_hb_in(struct rpc_channel *channel, char *reply, /* HB port can't access encrypted memory */ if (hb && !mem_encrypt_active()) { unsigned long bp = channel->cookie_low; + u32 channel_id = (channel->channel_id << 16); si = channel->cookie_high; di = (uintptr_t) reply; @@ -217,7 +226,7 @@ static unsigned long vmw_port_hb_in(struct rpc_channel *channel, char *reply, VMW_PORT_HB_IN( (MESSAGE_STATUS_SUCCESS << 16) | VMW_PORT_CMD_HB_MSG, reply_len, si, di, - VMWARE_HYPERVISOR_HB | (channel->channel_id << 16), + VMWARE_HYPERVISOR_HB | channel_id, VMW_HYPERVISOR_MAGIC, bp, eax, ebx, ecx, edx, si, di); @@ -612,3 +621,569 @@ out_open: return -EINVAL; } + +/** + * reset_ppn_array: Resets a PPN64 array to INVALID_PPN64 content + * + * @arr: Array to reset. + * @size: Array length. + */ +static inline void reset_ppn_array(PPN64 *arr, size_t size) +{ + size_t i; + + BUG_ON(!arr || size == 0); + + for (i = 0; i < size; ++i) + arr[i] = INVALID_PPN64; +} + +/** + * hypervisor_ppn_reset_all: Removes all mksGuestStat instance descriptors from + * the hypervisor. All related pages should be subsequently unpinned or freed. + * + */ +static inline void hypervisor_ppn_reset_all(void) +{ + unsigned long eax, ebx, ecx, edx, si = 0, di = 0; + + VMW_PORT(VMW_PORT_CMD_MKSGS_RESET, + 0, si, di, + 0, + VMW_HYPERVISOR_MAGIC, + eax, ebx, ecx, edx, si, di); +} + +/** + * hypervisor_ppn_add: Adds a single mksGuestStat instance descriptor to the + * hypervisor. Any related userspace pages should be pinned in advance. + * + * @pfn: Physical page number of the instance descriptor + */ +static inline void hypervisor_ppn_add(PPN64 pfn) +{ + unsigned long eax, ebx, ecx, edx, si = 0, di = 0; + + VMW_PORT(VMW_PORT_CMD_MKSGS_ADD_PPN, + (unsigned long)pfn, si, di, + 0, + VMW_HYPERVISOR_MAGIC, + eax, ebx, ecx, edx, si, di); +} + +/** + * hypervisor_ppn_remove: Removes a single mksGuestStat instance descriptor from + * the hypervisor. All related pages should be subsequently unpinned or freed. + * + * @pfn: Physical page number of the instance descriptor + */ +static inline void hypervisor_ppn_remove(PPN64 pfn) +{ + unsigned long eax, ebx, ecx, edx, si = 0, di = 0; + + VMW_PORT(VMW_PORT_CMD_MKSGS_REMOVE_PPN, + (unsigned long)pfn, si, di, + 0, + VMW_HYPERVISOR_MAGIC, + eax, ebx, ecx, edx, si, di); +} + +#if IS_ENABLED(CONFIG_DRM_VMWGFX_MKSSTATS) + +/* Order of the total number of pages used for kernel-internal mksGuestStat; at least 2 */ +#define MKSSTAT_KERNEL_PAGES_ORDER 2 +/* Header to the text description of mksGuestStat instance descriptor */ +#define MKSSTAT_KERNEL_DESCRIPTION "vmwgfx" + +/* Kernel mksGuestStats counter names and desciptions; same order as enum mksstat_kern_stats_t */ +static const char* const mksstat_kern_name_desc[MKSSTAT_KERN_COUNT][2] = +{ + { "vmw_execbuf_ioctl", "vmw_execbuf_ioctl" }, +}; + +/** + * mksstat_init_record: Initializes an MKSGuestStatCounter-based record + * for the respective mksGuestStat index. + * + * @stat_idx: Index of the MKSGuestStatCounter-based mksGuestStat record. + * @pstat: Pointer to array of MKSGuestStatCounterTime. + * @pinfo: Pointer to array of MKSGuestStatInfoEntry. + * @pstrs: Pointer to current end of the name/description sequence. + * Return: Pointer to the new end of the names/description sequence. + */ + +static inline char *mksstat_init_record(mksstat_kern_stats_t stat_idx, + MKSGuestStatCounterTime *pstat, MKSGuestStatInfoEntry *pinfo, char *pstrs) +{ + char *const pstrd = pstrs + strlen(mksstat_kern_name_desc[stat_idx][0]) + 1; + strcpy(pstrs, mksstat_kern_name_desc[stat_idx][0]); + strcpy(pstrd, mksstat_kern_name_desc[stat_idx][1]); + + pinfo[stat_idx].name.s = pstrs; + pinfo[stat_idx].description.s = pstrd; + pinfo[stat_idx].flags = MKS_GUEST_STAT_FLAG_NONE; + pinfo[stat_idx].stat.counter = (MKSGuestStatCounter *)&pstat[stat_idx]; + + return pstrd + strlen(mksstat_kern_name_desc[stat_idx][1]) + 1; +} + +/** + * mksstat_init_record_time: Initializes an MKSGuestStatCounterTime-based record + * for the respective mksGuestStat index. + * + * @stat_idx: Index of the MKSGuestStatCounterTime-based mksGuestStat record. + * @pstat: Pointer to array of MKSGuestStatCounterTime. + * @pinfo: Pointer to array of MKSGuestStatInfoEntry. + * @pstrs: Pointer to current end of the name/description sequence. + * Return: Pointer to the new end of the names/description sequence. + */ + +static inline char *mksstat_init_record_time(mksstat_kern_stats_t stat_idx, + MKSGuestStatCounterTime *pstat, MKSGuestStatInfoEntry *pinfo, char *pstrs) +{ + char *const pstrd = pstrs + strlen(mksstat_kern_name_desc[stat_idx][0]) + 1; + strcpy(pstrs, mksstat_kern_name_desc[stat_idx][0]); + strcpy(pstrd, mksstat_kern_name_desc[stat_idx][1]); + + pinfo[stat_idx].name.s = pstrs; + pinfo[stat_idx].description.s = pstrd; + pinfo[stat_idx].flags = MKS_GUEST_STAT_FLAG_TIME; + pinfo[stat_idx].stat.counterTime = &pstat[stat_idx]; + + return pstrd + strlen(mksstat_kern_name_desc[stat_idx][1]) + 1; +} + +/** + * mksstat_init_kern_id: Creates a single mksGuestStat instance descriptor and + * kernel-internal counters. Adds PFN mapping to the hypervisor. + * + * Create a single mksGuestStat instance descriptor and corresponding structures + * for all kernel-internal counters. The corresponding PFNs are mapped with the + * hypervisor. + * + * @ppage: Output pointer to page containing the instance descriptor. + * Return: Zero on success, negative error code on error. + */ + +static int mksstat_init_kern_id(struct page **ppage) +{ + MKSGuestStatInstanceDescriptor *pdesc; + MKSGuestStatCounterTime *pstat; + MKSGuestStatInfoEntry *pinfo; + char *pstrs, *pstrs_acc; + + /* Allocate pages for the kernel-internal instance descriptor */ + struct page *page = alloc_pages(GFP_KERNEL | __GFP_ZERO, MKSSTAT_KERNEL_PAGES_ORDER); + + if (!page) + return -ENOMEM; + + pdesc = page_address(page); + pstat = vmw_mksstat_get_kern_pstat(pdesc); + pinfo = vmw_mksstat_get_kern_pinfo(pdesc); + pstrs = vmw_mksstat_get_kern_pstrs(pdesc); + + /* Set up all kernel-internal counters and corresponding structures */ + pstrs_acc = pstrs; + pstrs_acc = mksstat_init_record_time(MKSSTAT_KERN_EXECBUF, pstat, pinfo, pstrs_acc); + + /* Add new counters above, in their order of appearance in mksstat_kern_stats_t */ + + BUG_ON(pstrs_acc - pstrs > PAGE_SIZE); + + /* Set up the kernel-internal instance descriptor */ + pdesc->reservedMBZ = 0; + pdesc->statStartVA = (uintptr_t)pstat; + pdesc->strsStartVA = (uintptr_t)pstrs; + pdesc->statLength = sizeof(*pstat) * MKSSTAT_KERN_COUNT; + pdesc->infoLength = sizeof(*pinfo) * MKSSTAT_KERN_COUNT; + pdesc->strsLength = pstrs_acc - pstrs; + snprintf(pdesc->description, ARRAY_SIZE(pdesc->description) - 1, "%s pid=%d", + MKSSTAT_KERNEL_DESCRIPTION, current->pid); + + pdesc->statPPNs[0] = page_to_pfn(virt_to_page(pstat)); + reset_ppn_array(pdesc->statPPNs + 1, ARRAY_SIZE(pdesc->statPPNs) - 1); + + pdesc->infoPPNs[0] = page_to_pfn(virt_to_page(pinfo)); + reset_ppn_array(pdesc->infoPPNs + 1, ARRAY_SIZE(pdesc->infoPPNs) - 1); + + pdesc->strsPPNs[0] = page_to_pfn(virt_to_page(pstrs)); + reset_ppn_array(pdesc->strsPPNs + 1, ARRAY_SIZE(pdesc->strsPPNs) - 1); + + *ppage = page; + + hypervisor_ppn_add((PPN64)page_to_pfn(page)); + + return 0; +} + +/** + * vmw_mksstat_get_kern_slot: Acquires a slot for a single kernel-internal + * mksGuestStat instance descriptor. + * + * Find a slot for a single kernel-internal mksGuestStat instance descriptor. + * In case no such was already present, allocate a new one and set up a kernel- + * internal mksGuestStat instance descriptor for the former. + * + * @pid: Process for which a slot is sought. + * @dev_priv: Identifies the drm private device. + * Return: Non-negative slot on success, negative error code on error. + */ + +int vmw_mksstat_get_kern_slot(pid_t pid, struct vmw_private *dev_priv) +{ + const size_t base = (u32)hash_32(pid, MKSSTAT_CAPACITY_LOG2); + size_t i; + + for (i = 0; i < ARRAY_SIZE(dev_priv->mksstat_kern_pids); ++i) { + const size_t slot = (i + base) % ARRAY_SIZE(dev_priv->mksstat_kern_pids); + + /* Check if an instance descriptor for this pid is already present */ + if (pid == (pid_t)atomic_read(&dev_priv->mksstat_kern_pids[slot])) + return (int)slot; + + /* Set up a new instance descriptor for this pid */ + if (!atomic_cmpxchg(&dev_priv->mksstat_kern_pids[slot], 0, MKSSTAT_PID_RESERVED)) { + const int ret = mksstat_init_kern_id(&dev_priv->mksstat_kern_pages[slot]); + + if (!ret) { + /* Reset top-timer tracking for this slot */ + dev_priv->mksstat_kern_top_timer[slot] = MKSSTAT_KERN_COUNT; + + atomic_set(&dev_priv->mksstat_kern_pids[slot], pid); + return (int)slot; + } + + atomic_set(&dev_priv->mksstat_kern_pids[slot], 0); + return ret; + } + } + + return -ENOSPC; +} + +#endif + +/** + * vmw_mksstat_cleanup_descriptor: Frees a single userspace-originating + * mksGuestStat instance-descriptor page and unpins all related user pages. + * + * Unpin all user pages realated to this instance descriptor and free + * the instance-descriptor page itself. + * + * @page: Page of the instance descriptor. + */ + +static void vmw_mksstat_cleanup_descriptor(struct page *page) +{ + MKSGuestStatInstanceDescriptor *pdesc = page_address(page); + size_t i; + + for (i = 0; i < ARRAY_SIZE(pdesc->statPPNs) && pdesc->statPPNs[i] != INVALID_PPN64; ++i) + unpin_user_page(pfn_to_page(pdesc->statPPNs[i])); + + for (i = 0; i < ARRAY_SIZE(pdesc->infoPPNs) && pdesc->infoPPNs[i] != INVALID_PPN64; ++i) + unpin_user_page(pfn_to_page(pdesc->infoPPNs[i])); + + for (i = 0; i < ARRAY_SIZE(pdesc->strsPPNs) && pdesc->strsPPNs[i] != INVALID_PPN64; ++i) + unpin_user_page(pfn_to_page(pdesc->strsPPNs[i])); + + __free_page(page); +} + +/** + * vmw_mksstat_remove_all: Resets all mksGuestStat instance descriptors + * from the hypervisor. + * + * Discard all hypervisor PFN mappings, containing active mksGuestState instance + * descriptors, unpin the related userspace pages and free the related kernel pages. + * + * @dev_priv: Identifies the drm private device. + * Return: Zero on success, negative error code on error. + */ + +int vmw_mksstat_remove_all(struct vmw_private *dev_priv) +{ + int ret = 0; + size_t i; + + /* Discard all PFN mappings with the hypervisor */ + hypervisor_ppn_reset_all(); + + /* Discard all userspace-originating instance descriptors and unpin all related pages */ + for (i = 0; i < ARRAY_SIZE(dev_priv->mksstat_user_pids); ++i) { + const pid_t pid0 = (pid_t)atomic_read(&dev_priv->mksstat_user_pids[i]); + + if (!pid0) + continue; + + if (pid0 != MKSSTAT_PID_RESERVED) { + const pid_t pid1 = atomic_cmpxchg(&dev_priv->mksstat_user_pids[i], pid0, MKSSTAT_PID_RESERVED); + + if (!pid1) + continue; + + if (pid1 == pid0) { + struct page *const page = dev_priv->mksstat_user_pages[i]; + + BUG_ON(!page); + + dev_priv->mksstat_user_pages[i] = NULL; + atomic_set(&dev_priv->mksstat_user_pids[i], 0); + + vmw_mksstat_cleanup_descriptor(page); + continue; + } + } + + ret = -EAGAIN; + } + +#if IS_ENABLED(CONFIG_DRM_VMWGFX_MKSSTATS) + /* Discard all kernel-internal instance descriptors and free all related pages */ + for (i = 0; i < ARRAY_SIZE(dev_priv->mksstat_kern_pids); ++i) { + const pid_t pid0 = (pid_t)atomic_read(&dev_priv->mksstat_kern_pids[i]); + + if (!pid0) + continue; + + if (pid0 != MKSSTAT_PID_RESERVED) { + const pid_t pid1 = atomic_cmpxchg(&dev_priv->mksstat_kern_pids[i], pid0, MKSSTAT_PID_RESERVED); + + if (!pid1) + continue; + + if (pid1 == pid0) { + struct page *const page = dev_priv->mksstat_kern_pages[i]; + + BUG_ON(!page); + + dev_priv->mksstat_kern_pages[i] = NULL; + atomic_set(&dev_priv->mksstat_kern_pids[i], 0); + + __free_pages(page, MKSSTAT_KERNEL_PAGES_ORDER); + continue; + } + } + + ret = -EAGAIN; + } + +#endif + return ret; +} + +/** + * vmw_mksstat_reset_ioctl: Resets all mksGuestStat instance descriptors + * from the hypervisor. + * + * Discard all hypervisor PFN mappings, containing active mksGuestStat instance + * descriptors, unpin the related userspace pages and free the related kernel pages. + * + * @dev: Identifies the drm device. + * @data: Pointer to the ioctl argument. + * @file_priv: Identifies the caller; unused. + * Return: Zero on success, negative error code on error. + */ + +int vmw_mksstat_reset_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_priv) +{ + struct vmw_private *const dev_priv = vmw_priv(dev); + return vmw_mksstat_remove_all(dev_priv); +} + +/** + * vmw_mksstat_add_ioctl: Creates a single userspace-originating mksGuestStat + * instance descriptor and registers that with the hypervisor. + * + * Create a hypervisor PFN mapping, containing a single mksGuestStat instance + * descriptor and pin the corresponding userspace pages. + * + * @dev: Identifies the drm device. + * @data: Pointer to the ioctl argument. + * @file_priv: Identifies the caller; unused. + * Return: Zero on success, negative error code on error. + */ + +int vmw_mksstat_add_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_priv) +{ + struct drm_vmw_mksstat_add_arg *arg = + (struct drm_vmw_mksstat_add_arg *) data; + + struct vmw_private *const dev_priv = vmw_priv(dev); + + struct page *page; + MKSGuestStatInstanceDescriptor *pdesc; + const size_t num_pages_stat = PFN_UP(arg->stat_len); + const size_t num_pages_info = PFN_UP(arg->info_len); + const size_t num_pages_strs = PFN_UP(arg->strs_len); + long desc_len; + long nr_pinned_stat; + long nr_pinned_info; + long nr_pinned_strs; + struct page *pages_stat[ARRAY_SIZE(pdesc->statPPNs)]; + struct page *pages_info[ARRAY_SIZE(pdesc->infoPPNs)]; + struct page *pages_strs[ARRAY_SIZE(pdesc->strsPPNs)]; + size_t i, slot; + + arg->id = -1; + + if (!arg->stat || !arg->info || !arg->strs) + return -EINVAL; + + if (!arg->stat_len || !arg->info_len || !arg->strs_len) + return -EINVAL; + + if (!arg->description) + return -EINVAL; + + if (num_pages_stat > ARRAY_SIZE(pdesc->statPPNs) || + num_pages_info > ARRAY_SIZE(pdesc->infoPPNs) || + num_pages_strs > ARRAY_SIZE(pdesc->strsPPNs)) + return -EINVAL; + + /* Find an available slot in the mksGuestStats user array and reserve it */ + for (slot = 0; slot < ARRAY_SIZE(dev_priv->mksstat_user_pids); ++slot) + if (!atomic_cmpxchg(&dev_priv->mksstat_user_pids[slot], 0, MKSSTAT_PID_RESERVED)) + break; + + if (slot == ARRAY_SIZE(dev_priv->mksstat_user_pids)) + return -ENOSPC; + + BUG_ON(dev_priv->mksstat_user_pages[slot]); + + /* Allocate a page for the instance descriptor */ + page = alloc_page(GFP_KERNEL | __GFP_ZERO); + + if (!page) { + atomic_set(&dev_priv->mksstat_user_pids[slot], 0); + return -ENOMEM; + } + + /* Set up the instance descriptor */ + pdesc = page_address(page); + + pdesc->reservedMBZ = 0; + pdesc->statStartVA = arg->stat; + pdesc->strsStartVA = arg->strs; + pdesc->statLength = arg->stat_len; + pdesc->infoLength = arg->info_len; + pdesc->strsLength = arg->strs_len; + desc_len = strncpy_from_user(pdesc->description, u64_to_user_ptr(arg->description), + ARRAY_SIZE(pdesc->description) - 1); + + if (desc_len < 0) { + atomic_set(&dev_priv->mksstat_user_pids[slot], 0); + return -EFAULT; + } + + reset_ppn_array(pdesc->statPPNs, ARRAY_SIZE(pdesc->statPPNs)); + reset_ppn_array(pdesc->infoPPNs, ARRAY_SIZE(pdesc->infoPPNs)); + reset_ppn_array(pdesc->strsPPNs, ARRAY_SIZE(pdesc->strsPPNs)); + + /* Pin mksGuestStat user pages and store those in the instance descriptor */ + nr_pinned_stat = pin_user_pages(arg->stat, num_pages_stat, FOLL_LONGTERM, pages_stat, NULL); + if (num_pages_stat != nr_pinned_stat) + goto err_pin_stat; + + for (i = 0; i < num_pages_stat; ++i) + pdesc->statPPNs[i] = page_to_pfn(pages_stat[i]); + + nr_pinned_info = pin_user_pages(arg->info, num_pages_info, FOLL_LONGTERM, pages_info, NULL); + if (num_pages_info != nr_pinned_info) + goto err_pin_info; + + for (i = 0; i < num_pages_info; ++i) + pdesc->infoPPNs[i] = page_to_pfn(pages_info[i]); + + nr_pinned_strs = pin_user_pages(arg->strs, num_pages_strs, FOLL_LONGTERM, pages_strs, NULL); + if (num_pages_strs != nr_pinned_strs) + goto err_pin_strs; + + for (i = 0; i < num_pages_strs; ++i) + pdesc->strsPPNs[i] = page_to_pfn(pages_strs[i]); + + /* Send the descriptor to the host via a hypervisor call. The mksGuestStat + pages will remain in use until the user requests a matching remove stats + or a stats reset occurs. */ + hypervisor_ppn_add((PPN64)page_to_pfn(page)); + + dev_priv->mksstat_user_pages[slot] = page; + atomic_set(&dev_priv->mksstat_user_pids[slot], task_pgrp_vnr(current)); + + arg->id = slot; + + DRM_DEV_INFO(dev->dev, "pid=%d arg.description='%.*s' id=%zu\n", current->pid, (int)desc_len, pdesc->description, slot); + + return 0; + +err_pin_strs: + if (nr_pinned_strs > 0) + unpin_user_pages(pages_strs, nr_pinned_strs); + +err_pin_info: + if (nr_pinned_info > 0) + unpin_user_pages(pages_info, nr_pinned_info); + +err_pin_stat: + if (nr_pinned_stat > 0) + unpin_user_pages(pages_stat, nr_pinned_stat); + + atomic_set(&dev_priv->mksstat_user_pids[slot], 0); + __free_page(page); + return -ENOMEM; +} + +/** + * vmw_mksstat_remove_ioctl: Removes a single userspace-originating mksGuestStat + * instance descriptor from the hypervisor. + * + * Discard a hypervisor PFN mapping, containing a single mksGuestStat instance + * descriptor and unpin the corresponding userspace pages. + * + * @dev: Identifies the drm device. + * @data: Pointer to the ioctl argument. + * @file_priv: Identifies the caller; unused. + * Return: Zero on success, negative error code on error. + */ + +int vmw_mksstat_remove_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_priv) +{ + struct drm_vmw_mksstat_remove_arg *arg = + (struct drm_vmw_mksstat_remove_arg *) data; + + struct vmw_private *const dev_priv = vmw_priv(dev); + + const size_t slot = arg->id; + pid_t pgid, pid; + + if (slot >= ARRAY_SIZE(dev_priv->mksstat_user_pids)) + return -EINVAL; + + DRM_DEV_INFO(dev->dev, "pid=%d arg.id=%zu\n", current->pid, slot); + + pgid = task_pgrp_vnr(current); + pid = atomic_cmpxchg(&dev_priv->mksstat_user_pids[slot], pgid, MKSSTAT_PID_RESERVED); + + if (!pid) + return 0; + + if (pid == pgid) { + struct page *const page = dev_priv->mksstat_user_pages[slot]; + + BUG_ON(!page); + + dev_priv->mksstat_user_pages[slot] = NULL; + atomic_set(&dev_priv->mksstat_user_pids[slot], 0); + + hypervisor_ppn_remove((PPN64)page_to_pfn(page)); + + vmw_mksstat_cleanup_descriptor(page); + return 0; + } + + return -EAGAIN; +} diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c index 7b45393ad98e..8d1e869cc196 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c @@ -114,6 +114,7 @@ static void vmw_resource_release(struct kref *kref) container_of(kref, struct vmw_resource, kref); struct vmw_private *dev_priv = res->dev_priv; int id; + int ret; struct idr *idr = &dev_priv->res_idr[res->func->res_type]; spin_lock(&dev_priv->resource_lock); @@ -122,7 +123,8 @@ static void vmw_resource_release(struct kref *kref) if (res->backup) { struct ttm_buffer_object *bo = &res->backup->base; - ttm_bo_reserve(bo, false, false, NULL); + ret = ttm_bo_reserve(bo, false, false, NULL); + BUG_ON(ret); if (vmw_resource_mob_attached(res) && res->func->unbind != NULL) { struct ttm_validate_buffer val_buf; @@ -351,8 +353,7 @@ int vmw_user_lookup_handle(struct vmw_private *dev_priv, static int vmw_resource_buf_alloc(struct vmw_resource *res, bool interruptible) { - unsigned long size = - (res->backup_size + PAGE_SIZE - 1) & PAGE_MASK; + unsigned long size = PFN_ALIGN(res->backup_size); struct vmw_buffer_object *backup; int ret; @@ -1001,7 +1002,9 @@ int vmw_resource_pin(struct vmw_resource *res, bool interruptible) if (res->backup) { vbo = res->backup; - ttm_bo_reserve(&vbo->base, interruptible, false, NULL); + ret = ttm_bo_reserve(&vbo->base, interruptible, false, NULL); + if (ret) + goto out_no_validate; if (!vbo->base.pin_count) { ret = ttm_bo_validate (&vbo->base, diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c b/drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c index 145430d14219..bd157fb21b45 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c @@ -954,8 +954,6 @@ int vmw_kms_sou_init_display(struct vmw_private *dev_priv) int i, ret; if (!(dev_priv->capabilities & SVGA_CAP_SCREEN_OBJECT_2)) { - DRM_INFO("Not using screen objects," - " missing cap SCREEN_OBJECT_2\n"); return -ENOSYS; } @@ -972,8 +970,6 @@ int vmw_kms_sou_init_display(struct vmw_private *dev_priv) drm_mode_config_reset(dev); - DRM_INFO("Screen Objects Display Unit initialized\n"); - return 0; } diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_shader.c b/drivers/gpu/drm/vmwgfx/vmwgfx_shader.c index b3c8d2da6f1a..b8dd62529104 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_shader.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_shader.c @@ -981,8 +981,7 @@ int vmw_compat_shader_add(struct vmw_private *dev_priv, goto no_reserve; /* Map and copy shader bytecode. */ - ret = ttm_bo_kmap(&buf->base, 0, PAGE_ALIGN(size) >> PAGE_SHIFT, - &map); + ret = ttm_bo_kmap(&buf->base, 0, PFN_UP(size), &map); if (unlikely(ret != 0)) { ttm_bo_unreserve(&buf->base); goto no_reserve; diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_so.c b/drivers/gpu/drm/vmwgfx/vmwgfx_so.c index c3a8d6e8380e..9efb4463ce99 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_so.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_so.c @@ -539,7 +539,8 @@ const SVGACOTableType vmw_so_cotables[] = { [vmw_so_ds] = SVGA_COTABLE_DEPTHSTENCIL, [vmw_so_rs] = SVGA_COTABLE_RASTERIZERSTATE, [vmw_so_ss] = SVGA_COTABLE_SAMPLER, - [vmw_so_so] = SVGA_COTABLE_STREAMOUTPUT + [vmw_so_so] = SVGA_COTABLE_STREAMOUTPUT, + [vmw_so_max]= SVGA_COTABLE_MAX }; diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c b/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c index 9e236f9f8a8a..d85310b2608d 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c @@ -33,7 +33,7 @@ #include <drm/drm_vblank.h> #include "vmwgfx_kms.h" -#include "device_include/svga3d_surfacedefs.h" +#include "vmw_surface_cache.h" #define vmw_crtc_to_stdu(x) \ container_of(x, struct vmw_screen_target_display_unit, base.crtc) @@ -1889,14 +1889,13 @@ int vmw_kms_stdu_init_display(struct vmw_private *dev_priv) ret = vmw_stdu_init(dev_priv, i); if (unlikely(ret != 0)) { - DRM_ERROR("Failed to initialize STDU %d", i); + drm_err(&dev_priv->drm, + "Failed to initialize STDU %d", i); return ret; } } drm_mode_config_reset(dev); - DRM_INFO("Screen Target Display device initialized\n"); - return 0; } diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c b/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c index 0835468bb2ee..5d53a5f9d123 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c @@ -31,6 +31,7 @@ #include "vmwgfx_resource_priv.h" #include "vmwgfx_so.h" #include "vmwgfx_binding.h" +#include "vmw_surface_cache.h" #include "device_include/svga3d_surfacedefs.h" #define SVGA3D_FLAGS_64(upper32, lower32) (((uint64_t)upper32 << 32) | lower32) @@ -78,7 +79,7 @@ struct vmw_surface_offset { * @boxes: Array of SVGA3dBoxes indicating dirty regions. One per subresource. */ struct vmw_surface_dirty { - struct svga3dsurface_cache cache; + struct vmw_surface_cache cache; size_t size; u32 num_subres; SVGA3dBox boxes[]; @@ -307,8 +308,8 @@ static void vmw_surface_dma_encode(struct vmw_surface *srf, { uint32_t i; struct vmw_surface_dma *cmd = (struct vmw_surface_dma *)cmd_space; - const struct svga3d_surface_desc *desc = - svga3dsurface_get_desc(srf->metadata.format); + const struct SVGA3dSurfaceDesc *desc = + vmw_surface_get_desc(srf->metadata.format); for (i = 0; i < srf->metadata.num_sizes; ++i) { SVGA3dCmdHeader *header = &cmd->header; @@ -323,8 +324,7 @@ static void vmw_surface_dma_encode(struct vmw_surface *srf, body->guest.ptr = *ptr; body->guest.ptr.offset += cur_offset->bo_offset; - body->guest.pitch = svga3dsurface_calculate_pitch(desc, - cur_size); + body->guest.pitch = vmw_surface_calculate_pitch(desc, cur_size); body->host.sid = srf->res.id; body->host.face = cur_offset->face; body->host.mipmap = cur_offset->mip; @@ -342,7 +342,7 @@ static void vmw_surface_dma_encode(struct vmw_surface *srf, suffix->suffixSize = sizeof(*suffix); suffix->maximumOffset = - svga3dsurface_get_image_buffer_size(desc, cur_size, + vmw_surface_get_image_buffer_size(desc, cur_size, body->guest.pitch); suffix->flags.discard = 0; suffix->flags.unsynchronized = 0; @@ -432,7 +432,7 @@ static int vmw_legacy_srf_create(struct vmw_resource *res) goto out_no_id; } - if (unlikely(res->id >= SVGA3D_MAX_SURFACE_IDS)) { + if (unlikely(res->id >= SVGA3D_HB_MAX_SURFACE_IDS)) { ret = -EBUSY; goto out_no_fifo; } @@ -751,7 +751,7 @@ int vmw_surface_define_ioctl(struct drm_device *dev, void *data, struct vmw_surface_offset *cur_offset; uint32_t num_sizes; uint32_t size; - const struct svga3d_surface_desc *desc; + const SVGA3dSurfaceDesc *desc; if (unlikely(vmw_user_surface_size == 0)) vmw_user_surface_size = ttm_round_pot(sizeof(*user_srf)) + @@ -772,8 +772,8 @@ int vmw_surface_define_ioctl(struct drm_device *dev, void *data, ttm_round_pot(num_sizes * sizeof(struct drm_vmw_size)) + ttm_round_pot(num_sizes * sizeof(struct vmw_surface_offset)); - desc = svga3dsurface_get_desc(req->format); - if (unlikely(desc->block_desc == SVGA3DBLOCKDESC_NONE)) { + desc = vmw_surface_get_desc(req->format); + if (unlikely(desc->blockDesc == SVGA3DBLOCKDESC_NONE)) { VMW_DEBUG_USER("Invalid format %d for surface creation.\n", req->format); return -EINVAL; @@ -833,13 +833,13 @@ int vmw_surface_define_ioctl(struct drm_device *dev, void *data, for (i = 0; i < DRM_VMW_MAX_SURFACE_FACES; ++i) { for (j = 0; j < metadata->mip_levels[i]; ++j) { - uint32_t stride = svga3dsurface_calculate_pitch - (desc, cur_size); + uint32_t stride = vmw_surface_calculate_pitch( + desc, cur_size); cur_offset->face = i; cur_offset->mip = j; cur_offset->bo_offset = cur_bo_offset; - cur_bo_offset += svga3dsurface_get_image_buffer_size + cur_bo_offset += vmw_surface_get_image_buffer_size (desc, cur_size, stride); ++cur_offset; ++cur_size; @@ -865,7 +865,7 @@ int vmw_surface_define_ioctl(struct drm_device *dev, void *data, user_srf->prime.base.shareable = false; user_srf->prime.base.tfile = NULL; if (drm_is_primary_client(file_priv)) - user_srf->master = drm_master_get(file_priv->master); + user_srf->master = drm_file_get_master(file_priv); /** * From this point, the generic resource management functions @@ -1534,7 +1534,7 @@ vmw_gb_surface_define_internal(struct drm_device *dev, user_srf = container_of(srf, struct vmw_user_surface, srf); if (drm_is_primary_client(file_priv)) - user_srf->master = drm_master_get(file_priv->master); + user_srf->master = drm_file_get_master(file_priv); res = &user_srf->srf.res; @@ -1711,10 +1711,10 @@ out_bad_resource: * than partial z slices are dirtied. */ static void vmw_subres_dirty_add(struct vmw_surface_dirty *dirty, - const struct svga3dsurface_loc *loc_start, - const struct svga3dsurface_loc *loc_end) + const struct vmw_surface_loc *loc_start, + const struct vmw_surface_loc *loc_end) { - const struct svga3dsurface_cache *cache = &dirty->cache; + const struct vmw_surface_cache *cache = &dirty->cache; SVGA3dBox *box = &dirty->boxes[loc_start->sub_resource]; u32 mip = loc_start->sub_resource % cache->num_mip_levels; const struct drm_vmw_size *size = &cache->mip[mip].size; @@ -1760,7 +1760,7 @@ static void vmw_subres_dirty_add(struct vmw_surface_dirty *dirty, */ static void vmw_subres_dirty_full(struct vmw_surface_dirty *dirty, u32 subres) { - const struct svga3dsurface_cache *cache = &dirty->cache; + const struct vmw_surface_cache *cache = &dirty->cache; u32 mip = subres % cache->num_mip_levels; const struct drm_vmw_size *size = &cache->mip[mip].size; SVGA3dBox *box = &dirty->boxes[subres]; @@ -1783,15 +1783,15 @@ static void vmw_surface_tex_dirty_range_add(struct vmw_resource *res, struct vmw_surface_dirty *dirty = (struct vmw_surface_dirty *) res->dirty; size_t backup_end = res->backup_offset + res->backup_size; - struct svga3dsurface_loc loc1, loc2; - const struct svga3dsurface_cache *cache; + struct vmw_surface_loc loc1, loc2; + const struct vmw_surface_cache *cache; start = max_t(size_t, start, res->backup_offset) - res->backup_offset; end = min(end, backup_end) - res->backup_offset; cache = &dirty->cache; - svga3dsurface_get_loc(cache, &loc1, start); - svga3dsurface_get_loc(cache, &loc2, end - 1); - svga3dsurface_inc_loc(cache, &loc2); + vmw_surface_get_loc(cache, &loc1, start); + vmw_surface_get_loc(cache, &loc2, end - 1); + vmw_surface_inc_loc(cache, &loc2); if (loc1.sheet != loc2.sheet) { u32 sub_res; @@ -1811,12 +1811,12 @@ static void vmw_surface_tex_dirty_range_add(struct vmw_resource *res, vmw_subres_dirty_add(dirty, &loc1, &loc2); } else { /* Dirty range covers multiple sub-resources */ - struct svga3dsurface_loc loc_min, loc_max; + struct vmw_surface_loc loc_min, loc_max; u32 sub_res; - svga3dsurface_max_loc(cache, loc1.sub_resource, &loc_max); + vmw_surface_max_loc(cache, loc1.sub_resource, &loc_max); vmw_subres_dirty_add(dirty, &loc1, &loc_max); - svga3dsurface_min_loc(cache, loc2.sub_resource - 1, &loc_min); + vmw_surface_min_loc(cache, loc2.sub_resource - 1, &loc_min); vmw_subres_dirty_add(dirty, &loc_min, &loc2); for (sub_res = loc1.sub_resource + 1; sub_res < loc2.sub_resource - 1; ++sub_res) @@ -1833,7 +1833,7 @@ static void vmw_surface_buf_dirty_range_add(struct vmw_resource *res, { struct vmw_surface_dirty *dirty = (struct vmw_surface_dirty *) res->dirty; - const struct svga3dsurface_cache *cache = &dirty->cache; + const struct vmw_surface_cache *cache = &dirty->cache; size_t backup_end = res->backup_offset + cache->mip_chain_bytes; SVGA3dBox *box = &dirty->boxes[0]; u32 box_c2; @@ -1872,12 +1872,11 @@ static void vmw_surface_dirty_range_add(struct vmw_resource *res, size_t start, static int vmw_surface_dirty_sync(struct vmw_resource *res) { struct vmw_private *dev_priv = res->dev_priv; - bool has_dx = 0; u32 i, num_dirty; struct vmw_surface_dirty *dirty = (struct vmw_surface_dirty *) res->dirty; size_t alloc_size; - const struct svga3dsurface_cache *cache = &dirty->cache; + const struct vmw_surface_cache *cache = &dirty->cache; struct { SVGA3dCmdHeader header; SVGA3dCmdDXUpdateSubResource body; @@ -1899,7 +1898,7 @@ static int vmw_surface_dirty_sync(struct vmw_resource *res) if (!num_dirty) goto out; - alloc_size = num_dirty * ((has_dx) ? sizeof(*cmd1) : sizeof(*cmd2)); + alloc_size = num_dirty * ((has_sm4_context(dev_priv)) ? sizeof(*cmd1) : sizeof(*cmd2)); cmd = VMW_CMD_RESERVE(dev_priv, alloc_size); if (!cmd) return -ENOMEM; @@ -1917,7 +1916,7 @@ static int vmw_surface_dirty_sync(struct vmw_resource *res) * DX_UPDATE_SUBRESOURCE is aware of array surfaces. * UPDATE_GB_IMAGE is not. */ - if (has_dx) { + if (has_sm4_context(dev_priv)) { cmd1->header.id = SVGA_3D_CMD_DX_UPDATE_SUBRESOURCE; cmd1->header.size = sizeof(cmd1->body); cmd1->body.sid = res->id; @@ -1990,7 +1989,7 @@ static int vmw_surface_dirty_alloc(struct vmw_resource *res) } num_samples = max_t(u32, 1, metadata->multisample_count); - ret = svga3dsurface_setup_cache(&metadata->base_size, metadata->format, + ret = vmw_surface_setup_cache(&metadata->base_size, metadata->format, num_mip, num_layers, num_samples, &dirty->cache); if (ret) @@ -2081,7 +2080,7 @@ int vmw_gb_surface_define(struct vmw_private *dev_priv, *srf_out = NULL; if (req->scanout) { - if (!svga3dsurface_is_screen_target_format(req->format)) { + if (!vmw_surface_is_screen_target_format(req->format)) { VMW_DEBUG_USER("Invalid Screen Target surface format."); return -EINVAL; } @@ -2096,10 +2095,10 @@ int vmw_gb_surface_define(struct vmw_private *dev_priv, return -EINVAL; } } else { - const struct svga3d_surface_desc *desc = - svga3dsurface_get_desc(req->format); + const SVGA3dSurfaceDesc *desc = + vmw_surface_get_desc(req->format); - if (desc->block_desc == SVGA3DBLOCKDESC_NONE) { + if (desc->blockDesc == SVGA3DBLOCKDESC_NONE) { VMW_DEBUG_USER("Invalid surface format.\n"); return -EINVAL; } @@ -2148,11 +2147,12 @@ int vmw_gb_surface_define(struct vmw_private *dev_priv, sample_count = metadata->multisample_count; srf->res.backup_size = - svga3dsurface_get_serialized_size_extended(metadata->format, - metadata->base_size, - metadata->mip_levels[0], - num_layers, - sample_count); + vmw_surface_get_serialized_size_extended( + metadata->format, + metadata->base_size, + metadata->mip_levels[0], + num_layers, + sample_count); if (metadata->flags & SVGA3D_SURFACE_BIND_STREAM_OUTPUT) srf->res.backup_size += sizeof(SVGA3dDXSOState); diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c b/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c index 0488042fb287..b0973c27e774 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c @@ -222,36 +222,6 @@ static bool __vmw_piter_sg_next(struct vmw_piter *viter) } -/** - * __vmw_piter_non_sg_page: Helper functions to return a pointer - * to the current page. - * - * @viter: Pointer to the iterator - * - * These functions return a pointer to the page currently - * pointed to by @viter. Functions are selected depending on the - * current mapping mode. - */ -static struct page *__vmw_piter_non_sg_page(struct vmw_piter *viter) -{ - return viter->pages[viter->i]; -} - -/** - * __vmw_piter_phys_addr: Helper functions to return the DMA - * address of the current page. - * - * @viter: Pointer to the iterator - * - * These functions return the DMA address of the page currently - * pointed to by @viter. Functions are selected depending on the - * current mapping mode. - */ -static dma_addr_t __vmw_piter_phys_addr(struct vmw_piter *viter) -{ - return page_to_phys(viter->pages[viter->i]); -} - static dma_addr_t __vmw_piter_dma_addr(struct vmw_piter *viter) { return viter->addrs[viter->i]; @@ -279,13 +249,8 @@ void vmw_piter_start(struct vmw_piter *viter, const struct vmw_sg_table *vsgt, { viter->i = p_offset - 1; viter->num_pages = vsgt->num_pages; - viter->page = &__vmw_piter_non_sg_page; viter->pages = vsgt->pages; switch (vsgt->mode) { - case vmw_dma_phys: - viter->next = &__vmw_piter_non_sg_next; - viter->dma_address = &__vmw_piter_phys_addr; - break; case vmw_dma_alloc_coherent: viter->next = &__vmw_piter_non_sg_next; viter->dma_address = &__vmw_piter_dma_addr; diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_validation.c b/drivers/gpu/drm/vmwgfx/vmwgfx_validation.c index 8338b1d20f2a..b09094b50c5d 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_validation.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_validation.c @@ -586,13 +586,13 @@ int vmw_validation_bo_validate(struct vmw_validation_context *ctx, bool intr) container_of(entry->base.bo, typeof(*vbo), base); if (entry->cpu_blit) { - struct ttm_operation_ctx ctx = { + struct ttm_operation_ctx ttm_ctx = { .interruptible = intr, .no_wait_gpu = false }; ret = ttm_bo_validate(entry->base.bo, - &vmw_nonfixed_placement, &ctx); + &vmw_nonfixed_placement, &ttm_ctx); } else { ret = vmw_validation_bo_validate_single (entry->base.bo, intr, entry->as_mob); diff --git a/drivers/gpu/drm/xen/xen_drm_front_kms.c b/drivers/gpu/drm/xen/xen_drm_front_kms.c index 371202ebe900..cfda74490765 100644 --- a/drivers/gpu/drm/xen/xen_drm_front_kms.c +++ b/drivers/gpu/drm/xen/xen_drm_front_kms.c @@ -302,7 +302,6 @@ static const struct drm_simple_display_pipe_funcs display_funcs = { .mode_valid = display_mode_valid, .enable = display_enable, .disable = display_disable, - .prepare_fb = drm_gem_simple_display_pipe_prepare_fb, .check = display_check, .update = display_update, }; diff --git a/drivers/gpu/drm/xlnx/Kconfig b/drivers/gpu/drm/xlnx/Kconfig index b52c6cdfc0b8..c3d08269faa9 100644 --- a/drivers/gpu/drm/xlnx/Kconfig +++ b/drivers/gpu/drm/xlnx/Kconfig @@ -3,6 +3,8 @@ config DRM_ZYNQMP_DPSUB depends on ARCH_ZYNQMP || COMPILE_TEST depends on COMMON_CLK && DRM && OF depends on DMADEVICES + depends on PHY_XILINX_ZYNQMP + depends on XILINX_ZYNQMP_DPDMA select DMA_ENGINE select DRM_GEM_CMA_HELPER select DRM_KMS_CMA_HELPER diff --git a/drivers/gpu/drm/xlnx/zynqmp_disp.c b/drivers/gpu/drm/xlnx/zynqmp_disp.c index 109d627968ac..ff2b308d8651 100644 --- a/drivers/gpu/drm/xlnx/zynqmp_disp.c +++ b/drivers/gpu/drm/xlnx/zynqmp_disp.c @@ -91,7 +91,7 @@ struct zynqmp_disp_format { }; /** - * enum zynqmp_disp_id - Layer identifier + * enum zynqmp_disp_layer_id - Layer identifier * @ZYNQMP_DISP_LAYER_VID: Video layer * @ZYNQMP_DISP_LAYER_GFX: Graphics layer */ @@ -159,42 +159,16 @@ struct zynqmp_disp_layer { }; /** - * struct zynqmp_disp_blend - Blender - * @base: Registers I/O base address - */ -struct zynqmp_disp_blend { - void __iomem *base; -}; - -/** - * struct zynqmp_disp_avbuf - Audio/video buffer manager - * @base: Registers I/O base address - */ -struct zynqmp_disp_avbuf { - void __iomem *base; -}; - -/** - * struct zynqmp_disp_audio - Audio mixer - * @base: Registers I/O base address - * @clk: Audio clock - * @clk_from_ps: True of the audio clock comes from PS, false from PL - */ -struct zynqmp_disp_audio { - void __iomem *base; - struct clk *clk; - bool clk_from_ps; -}; - -/** * struct zynqmp_disp - Display controller * @dev: Device structure * @drm: DRM core * @dpsub: Display subsystem * @crtc: DRM CRTC - * @blend: Blender (video rendering pipeline) - * @avbuf: Audio/video buffer manager - * @audio: Audio mixer + * @blend.base: Register I/O base address for the blender + * @avbuf.base: Register I/O base address for the audio/video buffer manager + * @audio.base: Registers I/O base address for the audio mixer + * @audio.clk: Audio clock + * @audio.clk_from_ps: True of the audio clock comes from PS, false from PL * @layers: Layers (planes) * @event: Pending vblank event request * @pclk: Pixel clock @@ -207,9 +181,17 @@ struct zynqmp_disp { struct drm_crtc crtc; - struct zynqmp_disp_blend blend; - struct zynqmp_disp_avbuf avbuf; - struct zynqmp_disp_audio audio; + struct { + void __iomem *base; + } blend; + struct { + void __iomem *base; + } avbuf; + struct { + void __iomem *base; + struct clk *clk; + bool clk_from_ps; + } audio; struct zynqmp_disp_layer layers[ZYNQMP_DISP_NUM_LAYERS]; @@ -423,51 +405,60 @@ static const struct zynqmp_disp_format avbuf_gfx_fmts[] = { }, }; -static u32 zynqmp_disp_avbuf_read(struct zynqmp_disp_avbuf *avbuf, int reg) +static u32 zynqmp_disp_avbuf_read(struct zynqmp_disp *disp, int reg) { - return readl(avbuf->base + reg); + return readl(disp->avbuf.base + reg); } -static void zynqmp_disp_avbuf_write(struct zynqmp_disp_avbuf *avbuf, - int reg, u32 val) +static void zynqmp_disp_avbuf_write(struct zynqmp_disp *disp, int reg, u32 val) { - writel(val, avbuf->base + reg); + writel(val, disp->avbuf.base + reg); +} + +static bool zynqmp_disp_layer_is_gfx(const struct zynqmp_disp_layer *layer) +{ + return layer->id == ZYNQMP_DISP_LAYER_GFX; +} + +static bool zynqmp_disp_layer_is_video(const struct zynqmp_disp_layer *layer) +{ + return layer->id == ZYNQMP_DISP_LAYER_VID; } /** * zynqmp_disp_avbuf_set_format - Set the input format for a layer - * @avbuf: Audio/video buffer manager - * @layer: The layer ID + * @disp: Display controller + * @layer: The layer * @fmt: The format information * * Set the video buffer manager format for @layer to @fmt. */ -static void zynqmp_disp_avbuf_set_format(struct zynqmp_disp_avbuf *avbuf, - enum zynqmp_disp_layer_id layer, +static void zynqmp_disp_avbuf_set_format(struct zynqmp_disp *disp, + struct zynqmp_disp_layer *layer, const struct zynqmp_disp_format *fmt) { unsigned int i; u32 val; - val = zynqmp_disp_avbuf_read(avbuf, ZYNQMP_DISP_AV_BUF_FMT); - val &= layer == ZYNQMP_DISP_LAYER_VID + val = zynqmp_disp_avbuf_read(disp, ZYNQMP_DISP_AV_BUF_FMT); + val &= zynqmp_disp_layer_is_video(layer) ? ~ZYNQMP_DISP_AV_BUF_FMT_NL_VID_MASK : ~ZYNQMP_DISP_AV_BUF_FMT_NL_GFX_MASK; val |= fmt->buf_fmt; - zynqmp_disp_avbuf_write(avbuf, ZYNQMP_DISP_AV_BUF_FMT, val); + zynqmp_disp_avbuf_write(disp, ZYNQMP_DISP_AV_BUF_FMT, val); for (i = 0; i < ZYNQMP_DISP_AV_BUF_NUM_SF; i++) { - unsigned int reg = layer == ZYNQMP_DISP_LAYER_VID + unsigned int reg = zynqmp_disp_layer_is_video(layer) ? ZYNQMP_DISP_AV_BUF_VID_COMP_SF(i) : ZYNQMP_DISP_AV_BUF_GFX_COMP_SF(i); - zynqmp_disp_avbuf_write(avbuf, reg, fmt->sf[i]); + zynqmp_disp_avbuf_write(disp, reg, fmt->sf[i]); } } /** * zynqmp_disp_avbuf_set_clocks_sources - Set the clocks sources - * @avbuf: Audio/video buffer manager + * @disp: Display controller * @video_from_ps: True if the video clock originates from the PS * @audio_from_ps: True if the audio clock originates from the PS * @timings_internal: True if video timings are generated internally @@ -477,7 +468,7 @@ static void zynqmp_disp_avbuf_set_format(struct zynqmp_disp_avbuf *avbuf, * generated internally or externally. */ static void -zynqmp_disp_avbuf_set_clocks_sources(struct zynqmp_disp_avbuf *avbuf, +zynqmp_disp_avbuf_set_clocks_sources(struct zynqmp_disp *disp, bool video_from_ps, bool audio_from_ps, bool timings_internal) { @@ -490,16 +481,16 @@ zynqmp_disp_avbuf_set_clocks_sources(struct zynqmp_disp_avbuf *avbuf, if (timings_internal) val |= ZYNQMP_DISP_AV_BUF_CLK_SRC_VID_INTERNAL_TIMING; - zynqmp_disp_avbuf_write(avbuf, ZYNQMP_DISP_AV_BUF_CLK_SRC, val); + zynqmp_disp_avbuf_write(disp, ZYNQMP_DISP_AV_BUF_CLK_SRC, val); } /** * zynqmp_disp_avbuf_enable_channels - Enable buffer channels - * @avbuf: Audio/video buffer manager + * @disp: Display controller * * Enable all (video and audio) buffer channels. */ -static void zynqmp_disp_avbuf_enable_channels(struct zynqmp_disp_avbuf *avbuf) +static void zynqmp_disp_avbuf_enable_channels(struct zynqmp_disp *disp) { unsigned int i; u32 val; @@ -509,7 +500,7 @@ static void zynqmp_disp_avbuf_enable_channels(struct zynqmp_disp_avbuf *avbuf) ZYNQMP_DISP_AV_BUF_CHBUF_BURST_LEN_SHIFT); for (i = 0; i < ZYNQMP_DISP_AV_BUF_NUM_VID_GFX_BUFFERS; i++) - zynqmp_disp_avbuf_write(avbuf, ZYNQMP_DISP_AV_BUF_CHBUF(i), + zynqmp_disp_avbuf_write(disp, ZYNQMP_DISP_AV_BUF_CHBUF(i), val); val = ZYNQMP_DISP_AV_BUF_CHBUF_EN | @@ -517,75 +508,75 @@ static void zynqmp_disp_avbuf_enable_channels(struct zynqmp_disp_avbuf *avbuf) ZYNQMP_DISP_AV_BUF_CHBUF_BURST_LEN_SHIFT); for (; i < ZYNQMP_DISP_AV_BUF_NUM_BUFFERS; i++) - zynqmp_disp_avbuf_write(avbuf, ZYNQMP_DISP_AV_BUF_CHBUF(i), + zynqmp_disp_avbuf_write(disp, ZYNQMP_DISP_AV_BUF_CHBUF(i), val); } /** * zynqmp_disp_avbuf_disable_channels - Disable buffer channels - * @avbuf: Audio/video buffer manager + * @disp: Display controller * * Disable all (video and audio) buffer channels. */ -static void zynqmp_disp_avbuf_disable_channels(struct zynqmp_disp_avbuf *avbuf) +static void zynqmp_disp_avbuf_disable_channels(struct zynqmp_disp *disp) { unsigned int i; for (i = 0; i < ZYNQMP_DISP_AV_BUF_NUM_BUFFERS; i++) - zynqmp_disp_avbuf_write(avbuf, ZYNQMP_DISP_AV_BUF_CHBUF(i), + zynqmp_disp_avbuf_write(disp, ZYNQMP_DISP_AV_BUF_CHBUF(i), ZYNQMP_DISP_AV_BUF_CHBUF_FLUSH); } /** * zynqmp_disp_avbuf_enable_audio - Enable audio - * @avbuf: Audio/video buffer manager + * @disp: Display controller * * Enable all audio buffers with a non-live (memory) source. */ -static void zynqmp_disp_avbuf_enable_audio(struct zynqmp_disp_avbuf *avbuf) +static void zynqmp_disp_avbuf_enable_audio(struct zynqmp_disp *disp) { u32 val; - val = zynqmp_disp_avbuf_read(avbuf, ZYNQMP_DISP_AV_BUF_OUTPUT); + val = zynqmp_disp_avbuf_read(disp, ZYNQMP_DISP_AV_BUF_OUTPUT); val &= ~ZYNQMP_DISP_AV_BUF_OUTPUT_AUD1_MASK; val |= ZYNQMP_DISP_AV_BUF_OUTPUT_AUD1_MEM; val |= ZYNQMP_DISP_AV_BUF_OUTPUT_AUD2_EN; - zynqmp_disp_avbuf_write(avbuf, ZYNQMP_DISP_AV_BUF_OUTPUT, val); + zynqmp_disp_avbuf_write(disp, ZYNQMP_DISP_AV_BUF_OUTPUT, val); } /** * zynqmp_disp_avbuf_disable_audio - Disable audio - * @avbuf: Audio/video buffer manager + * @disp: Display controller * * Disable all audio buffers. */ -static void zynqmp_disp_avbuf_disable_audio(struct zynqmp_disp_avbuf *avbuf) +static void zynqmp_disp_avbuf_disable_audio(struct zynqmp_disp *disp) { u32 val; - val = zynqmp_disp_avbuf_read(avbuf, ZYNQMP_DISP_AV_BUF_OUTPUT); + val = zynqmp_disp_avbuf_read(disp, ZYNQMP_DISP_AV_BUF_OUTPUT); val &= ~ZYNQMP_DISP_AV_BUF_OUTPUT_AUD1_MASK; val |= ZYNQMP_DISP_AV_BUF_OUTPUT_AUD1_DISABLE; val &= ~ZYNQMP_DISP_AV_BUF_OUTPUT_AUD2_EN; - zynqmp_disp_avbuf_write(avbuf, ZYNQMP_DISP_AV_BUF_OUTPUT, val); + zynqmp_disp_avbuf_write(disp, ZYNQMP_DISP_AV_BUF_OUTPUT, val); } /** * zynqmp_disp_avbuf_enable_video - Enable a video layer - * @avbuf: Audio/video buffer manager - * @layer: The layer ID + * @disp: Display controller + * @layer: The layer * @mode: Operating mode of layer * * Enable the video/graphics buffer for @layer. */ -static void zynqmp_disp_avbuf_enable_video(struct zynqmp_disp_avbuf *avbuf, - enum zynqmp_disp_layer_id layer, +static void zynqmp_disp_avbuf_enable_video(struct zynqmp_disp *disp, + struct zynqmp_disp_layer *layer, enum zynqmp_disp_layer_mode mode) { u32 val; - val = zynqmp_disp_avbuf_read(avbuf, ZYNQMP_DISP_AV_BUF_OUTPUT); - if (layer == ZYNQMP_DISP_LAYER_VID) { + val = zynqmp_disp_avbuf_read(disp, ZYNQMP_DISP_AV_BUF_OUTPUT); + if (zynqmp_disp_layer_is_video(layer)) { val &= ~ZYNQMP_DISP_AV_BUF_OUTPUT_VID1_MASK; if (mode == ZYNQMP_DISP_LAYER_NONLIVE) val |= ZYNQMP_DISP_AV_BUF_OUTPUT_VID1_MEM; @@ -599,52 +590,52 @@ static void zynqmp_disp_avbuf_enable_video(struct zynqmp_disp_avbuf *avbuf, else val |= ZYNQMP_DISP_AV_BUF_OUTPUT_VID2_LIVE; } - zynqmp_disp_avbuf_write(avbuf, ZYNQMP_DISP_AV_BUF_OUTPUT, val); + zynqmp_disp_avbuf_write(disp, ZYNQMP_DISP_AV_BUF_OUTPUT, val); } /** * zynqmp_disp_avbuf_disable_video - Disable a video layer - * @avbuf: Audio/video buffer manager - * @layer: The layer ID + * @disp: Display controller + * @layer: The layer * * Disable the video/graphics buffer for @layer. */ -static void zynqmp_disp_avbuf_disable_video(struct zynqmp_disp_avbuf *avbuf, - enum zynqmp_disp_layer_id layer) +static void zynqmp_disp_avbuf_disable_video(struct zynqmp_disp *disp, + struct zynqmp_disp_layer *layer) { u32 val; - val = zynqmp_disp_avbuf_read(avbuf, ZYNQMP_DISP_AV_BUF_OUTPUT); - if (layer == ZYNQMP_DISP_LAYER_VID) { + val = zynqmp_disp_avbuf_read(disp, ZYNQMP_DISP_AV_BUF_OUTPUT); + if (zynqmp_disp_layer_is_video(layer)) { val &= ~ZYNQMP_DISP_AV_BUF_OUTPUT_VID1_MASK; val |= ZYNQMP_DISP_AV_BUF_OUTPUT_VID1_NONE; } else { val &= ~ZYNQMP_DISP_AV_BUF_OUTPUT_VID2_MASK; val |= ZYNQMP_DISP_AV_BUF_OUTPUT_VID2_DISABLE; } - zynqmp_disp_avbuf_write(avbuf, ZYNQMP_DISP_AV_BUF_OUTPUT, val); + zynqmp_disp_avbuf_write(disp, ZYNQMP_DISP_AV_BUF_OUTPUT, val); } /** * zynqmp_disp_avbuf_enable - Enable the video pipe - * @avbuf: Audio/video buffer manager + * @disp: Display controller * * De-assert the video pipe reset. */ -static void zynqmp_disp_avbuf_enable(struct zynqmp_disp_avbuf *avbuf) +static void zynqmp_disp_avbuf_enable(struct zynqmp_disp *disp) { - zynqmp_disp_avbuf_write(avbuf, ZYNQMP_DISP_AV_BUF_SRST_REG, 0); + zynqmp_disp_avbuf_write(disp, ZYNQMP_DISP_AV_BUF_SRST_REG, 0); } /** * zynqmp_disp_avbuf_disable - Disable the video pipe - * @avbuf: Audio/video buffer manager + * @disp: Display controller * * Assert the video pipe reset. */ -static void zynqmp_disp_avbuf_disable(struct zynqmp_disp_avbuf *avbuf) +static void zynqmp_disp_avbuf_disable(struct zynqmp_disp *disp) { - zynqmp_disp_avbuf_write(avbuf, ZYNQMP_DISP_AV_BUF_SRST_REG, + zynqmp_disp_avbuf_write(disp, ZYNQMP_DISP_AV_BUF_SRST_REG, ZYNQMP_DISP_AV_BUF_SRST_REG_VID_RST); } @@ -652,10 +643,9 @@ static void zynqmp_disp_avbuf_disable(struct zynqmp_disp_avbuf *avbuf) * Blender (Video Pipeline) */ -static void zynqmp_disp_blend_write(struct zynqmp_disp_blend *blend, - int reg, u32 val) +static void zynqmp_disp_blend_write(struct zynqmp_disp *disp, int reg, u32 val) { - writel(val, blend->base + reg); + writel(val, disp->blend.base + reg); } /* @@ -701,12 +691,12 @@ static const u32 csc_sdtv_to_rgb_offsets[] = { /** * zynqmp_disp_blend_set_output_format - Set the output format of the blender - * @blend: Blender object + * @disp: Display controller * @format: Output format * * Set the output format of the blender to @format. */ -static void zynqmp_disp_blend_set_output_format(struct zynqmp_disp_blend *blend, +static void zynqmp_disp_blend_set_output_format(struct zynqmp_disp *disp, enum zynqmp_dpsub_format format) { static const unsigned int blend_output_fmts[] = { @@ -722,7 +712,7 @@ static void zynqmp_disp_blend_set_output_format(struct zynqmp_disp_blend *blend, const u32 *offsets; unsigned int i; - zynqmp_disp_blend_write(blend, ZYNQMP_DISP_V_BLEND_OUTPUT_VID_FMT, fmt); + zynqmp_disp_blend_write(disp, ZYNQMP_DISP_V_BLEND_OUTPUT_VID_FMT, fmt); if (fmt == ZYNQMP_DISP_V_BLEND_OUTPUT_VID_FMT_RGB) { coeffs = csc_identity_matrix; offsets = csc_zero_offsets; @@ -732,19 +722,19 @@ static void zynqmp_disp_blend_set_output_format(struct zynqmp_disp_blend *blend, } for (i = 0; i < ZYNQMP_DISP_V_BLEND_NUM_COEFF; i++) - zynqmp_disp_blend_write(blend, + zynqmp_disp_blend_write(disp, ZYNQMP_DISP_V_BLEND_RGB2YCBCR_COEFF(i), coeffs[i]); for (i = 0; i < ZYNQMP_DISP_V_BLEND_NUM_OFFSET; i++) - zynqmp_disp_blend_write(blend, + zynqmp_disp_blend_write(disp, ZYNQMP_DISP_V_BLEND_OUTCSC_OFFSET(i), offsets[i]); } /** * zynqmp_disp_blend_set_bg_color - Set the background color - * @blend: Blender object + * @disp: Display controller * @rcr: Red/Cr color component * @gy: Green/Y color component * @bcb: Blue/Cb color component @@ -753,31 +743,31 @@ static void zynqmp_disp_blend_set_output_format(struct zynqmp_disp_blend *blend, * B or Cr, Y and Cb components respectively depending on the selected output * format. */ -static void zynqmp_disp_blend_set_bg_color(struct zynqmp_disp_blend *blend, +static void zynqmp_disp_blend_set_bg_color(struct zynqmp_disp *disp, u32 rcr, u32 gy, u32 bcb) { - zynqmp_disp_blend_write(blend, ZYNQMP_DISP_V_BLEND_BG_CLR_0, rcr); - zynqmp_disp_blend_write(blend, ZYNQMP_DISP_V_BLEND_BG_CLR_1, gy); - zynqmp_disp_blend_write(blend, ZYNQMP_DISP_V_BLEND_BG_CLR_2, bcb); + zynqmp_disp_blend_write(disp, ZYNQMP_DISP_V_BLEND_BG_CLR_0, rcr); + zynqmp_disp_blend_write(disp, ZYNQMP_DISP_V_BLEND_BG_CLR_1, gy); + zynqmp_disp_blend_write(disp, ZYNQMP_DISP_V_BLEND_BG_CLR_2, bcb); } /** * zynqmp_disp_blend_set_global_alpha - Configure global alpha blending - * @blend: Blender object + * @disp: Display controller * @enable: True to enable global alpha blending * @alpha: Global alpha value (ignored if @enabled is false) */ -static void zynqmp_disp_blend_set_global_alpha(struct zynqmp_disp_blend *blend, +static void zynqmp_disp_blend_set_global_alpha(struct zynqmp_disp *disp, bool enable, u32 alpha) { - zynqmp_disp_blend_write(blend, ZYNQMP_DISP_V_BLEND_SET_GLOBAL_ALPHA, + zynqmp_disp_blend_write(disp, ZYNQMP_DISP_V_BLEND_SET_GLOBAL_ALPHA, ZYNQMP_DISP_V_BLEND_SET_GLOBAL_ALPHA_VALUE(alpha) | (enable ? ZYNQMP_DISP_V_BLEND_SET_GLOBAL_ALPHA_EN : 0)); } /** * zynqmp_disp_blend_layer_set_csc - Configure colorspace conversion for layer - * @blend: Blender object + * @disp: Display controller * @layer: The layer * @coeffs: Colorspace conversion matrix * @offsets: Colorspace conversion offsets @@ -786,7 +776,7 @@ static void zynqmp_disp_blend_set_global_alpha(struct zynqmp_disp_blend *blend, * Columns of the matrix are automatically swapped based on the input format to * handle RGB and YCrCb components permutations. */ -static void zynqmp_disp_blend_layer_set_csc(struct zynqmp_disp_blend *blend, +static void zynqmp_disp_blend_layer_set_csc(struct zynqmp_disp *disp, struct zynqmp_disp_layer *layer, const u16 *coeffs, const u32 *offsets) @@ -807,32 +797,32 @@ static void zynqmp_disp_blend_layer_set_csc(struct zynqmp_disp_blend *blend, } } - if (layer->id == ZYNQMP_DISP_LAYER_VID) + if (zynqmp_disp_layer_is_video(layer)) reg = ZYNQMP_DISP_V_BLEND_IN1CSC_COEFF(0); else reg = ZYNQMP_DISP_V_BLEND_IN2CSC_COEFF(0); for (i = 0; i < ZYNQMP_DISP_V_BLEND_NUM_COEFF; i += 3, reg += 12) { - zynqmp_disp_blend_write(blend, reg + 0, coeffs[i + swap[0]]); - zynqmp_disp_blend_write(blend, reg + 4, coeffs[i + swap[1]]); - zynqmp_disp_blend_write(blend, reg + 8, coeffs[i + swap[2]]); + zynqmp_disp_blend_write(disp, reg + 0, coeffs[i + swap[0]]); + zynqmp_disp_blend_write(disp, reg + 4, coeffs[i + swap[1]]); + zynqmp_disp_blend_write(disp, reg + 8, coeffs[i + swap[2]]); } - if (layer->id == ZYNQMP_DISP_LAYER_VID) + if (zynqmp_disp_layer_is_video(layer)) reg = ZYNQMP_DISP_V_BLEND_IN1CSC_OFFSET(0); else reg = ZYNQMP_DISP_V_BLEND_IN2CSC_OFFSET(0); for (i = 0; i < ZYNQMP_DISP_V_BLEND_NUM_OFFSET; i++) - zynqmp_disp_blend_write(blend, reg + i * 4, offsets[i]); + zynqmp_disp_blend_write(disp, reg + i * 4, offsets[i]); } /** * zynqmp_disp_blend_layer_enable - Enable a layer - * @blend: Blender object + * @disp: Display controller * @layer: The layer */ -static void zynqmp_disp_blend_layer_enable(struct zynqmp_disp_blend *blend, +static void zynqmp_disp_blend_layer_enable(struct zynqmp_disp *disp, struct zynqmp_disp_layer *layer) { const u16 *coeffs; @@ -844,7 +834,7 @@ static void zynqmp_disp_blend_layer_enable(struct zynqmp_disp_blend *blend, (layer->drm_fmt->hsub > 1 ? ZYNQMP_DISP_V_BLEND_LAYER_CONTROL_EN_US : 0); - zynqmp_disp_blend_write(blend, + zynqmp_disp_blend_write(disp, ZYNQMP_DISP_V_BLEND_LAYER_CONTROL(layer->id), val); @@ -856,22 +846,22 @@ static void zynqmp_disp_blend_layer_enable(struct zynqmp_disp_blend *blend, offsets = csc_zero_offsets; } - zynqmp_disp_blend_layer_set_csc(blend, layer, coeffs, offsets); + zynqmp_disp_blend_layer_set_csc(disp, layer, coeffs, offsets); } /** * zynqmp_disp_blend_layer_disable - Disable a layer - * @blend: Blender object + * @disp: Display controller * @layer: The layer */ -static void zynqmp_disp_blend_layer_disable(struct zynqmp_disp_blend *blend, +static void zynqmp_disp_blend_layer_disable(struct zynqmp_disp *disp, struct zynqmp_disp_layer *layer) { - zynqmp_disp_blend_write(blend, + zynqmp_disp_blend_write(disp, ZYNQMP_DISP_V_BLEND_LAYER_CONTROL(layer->id), 0); - zynqmp_disp_blend_layer_set_csc(blend, layer, csc_zero_matrix, + zynqmp_disp_blend_layer_set_csc(disp, layer, csc_zero_matrix, csc_zero_offsets); } @@ -879,57 +869,55 @@ static void zynqmp_disp_blend_layer_disable(struct zynqmp_disp_blend *blend, * Audio Mixer */ -static void zynqmp_disp_audio_write(struct zynqmp_disp_audio *audio, - int reg, u32 val) +static void zynqmp_disp_audio_write(struct zynqmp_disp *disp, int reg, u32 val) { - writel(val, audio->base + reg); + writel(val, disp->audio.base + reg); } /** * zynqmp_disp_audio_enable - Enable the audio mixer - * @audio: Audio mixer + * @disp: Display controller * * Enable the audio mixer by de-asserting the soft reset. The audio state is set to * default values by the reset, set the default mixer volume explicitly. */ -static void zynqmp_disp_audio_enable(struct zynqmp_disp_audio *audio) +static void zynqmp_disp_audio_enable(struct zynqmp_disp *disp) { /* Clear the audio soft reset register as it's an non-reset flop. */ - zynqmp_disp_audio_write(audio, ZYNQMP_DISP_AUD_SOFT_RESET, 0); - zynqmp_disp_audio_write(audio, ZYNQMP_DISP_AUD_MIXER_VOLUME, + zynqmp_disp_audio_write(disp, ZYNQMP_DISP_AUD_SOFT_RESET, 0); + zynqmp_disp_audio_write(disp, ZYNQMP_DISP_AUD_MIXER_VOLUME, ZYNQMP_DISP_AUD_MIXER_VOLUME_NO_SCALE); } /** * zynqmp_disp_audio_disable - Disable the audio mixer - * @audio: Audio mixer + * @disp: Display controller * * Disable the audio mixer by asserting its soft reset. */ -static void zynqmp_disp_audio_disable(struct zynqmp_disp_audio *audio) +static void zynqmp_disp_audio_disable(struct zynqmp_disp *disp) { - zynqmp_disp_audio_write(audio, ZYNQMP_DISP_AUD_SOFT_RESET, + zynqmp_disp_audio_write(disp, ZYNQMP_DISP_AUD_SOFT_RESET, ZYNQMP_DISP_AUD_SOFT_RESET_AUD_SRST); } -static void zynqmp_disp_audio_init(struct device *dev, - struct zynqmp_disp_audio *audio) +static void zynqmp_disp_audio_init(struct zynqmp_disp *disp) { /* Try the live PL audio clock. */ - audio->clk = devm_clk_get(dev, "dp_live_audio_aclk"); - if (!IS_ERR(audio->clk)) { - audio->clk_from_ps = false; + disp->audio.clk = devm_clk_get(disp->dev, "dp_live_audio_aclk"); + if (!IS_ERR(disp->audio.clk)) { + disp->audio.clk_from_ps = false; return; } /* If the live PL audio clock is not valid, fall back to PS clock. */ - audio->clk = devm_clk_get(dev, "dp_aud_clk"); - if (!IS_ERR(audio->clk)) { - audio->clk_from_ps = true; + disp->audio.clk = devm_clk_get(disp->dev, "dp_aud_clk"); + if (!IS_ERR(disp->audio.clk)) { + disp->audio.clk_from_ps = true; return; } - dev_err(dev, "audio disabled due to missing clock\n"); + dev_err(disp->dev, "audio disabled due to missing clock\n"); } /* ----------------------------------------------------------------------------- @@ -1025,9 +1013,9 @@ zynqmp_disp_layer_find_format(struct zynqmp_disp_layer *layer, */ static void zynqmp_disp_layer_enable(struct zynqmp_disp_layer *layer) { - zynqmp_disp_avbuf_enable_video(&layer->disp->avbuf, layer->id, + zynqmp_disp_avbuf_enable_video(layer->disp, layer, ZYNQMP_DISP_LAYER_NONLIVE); - zynqmp_disp_blend_layer_enable(&layer->disp->blend, layer); + zynqmp_disp_blend_layer_enable(layer->disp, layer); layer->mode = ZYNQMP_DISP_LAYER_NONLIVE; } @@ -1046,8 +1034,8 @@ static void zynqmp_disp_layer_disable(struct zynqmp_disp_layer *layer) for (i = 0; i < layer->drm_fmt->num_planes; i++) dmaengine_terminate_sync(layer->dmas[i].chan); - zynqmp_disp_avbuf_disable_video(&layer->disp->avbuf, layer->id); - zynqmp_disp_blend_layer_disable(&layer->disp->blend, layer); + zynqmp_disp_avbuf_disable_video(layer->disp, layer); + zynqmp_disp_blend_layer_disable(layer->disp, layer); } /** @@ -1067,8 +1055,7 @@ static void zynqmp_disp_layer_set_format(struct zynqmp_disp_layer *layer, layer->disp_fmt = zynqmp_disp_layer_find_format(layer, info->format); layer->drm_fmt = info; - zynqmp_disp_avbuf_set_format(&layer->disp->avbuf, layer->id, - layer->disp_fmt); + zynqmp_disp_avbuf_set_format(layer->disp, layer, layer->disp_fmt); /* * Set slave_id for each DMA channel to indicate they're part of a @@ -1175,6 +1162,10 @@ zynqmp_disp_plane_atomic_disable(struct drm_plane *plane, return; zynqmp_disp_layer_disable(layer); + + if (zynqmp_disp_layer_is_gfx(layer)) + zynqmp_disp_blend_set_global_alpha(layer->disp, false, + plane->state->alpha >> 8); } static void @@ -1204,6 +1195,10 @@ zynqmp_disp_plane_atomic_update(struct drm_plane *plane, zynqmp_disp_layer_update(layer, new_state); + if (zynqmp_disp_layer_is_gfx(layer)) + zynqmp_disp_blend_set_global_alpha(layer->disp, true, + plane->state->alpha >> 8); + /* Enable or re-enable the plane is the format has changed. */ if (format_changed) zynqmp_disp_layer_enable(layer); @@ -1244,8 +1239,8 @@ static int zynqmp_disp_create_planes(struct zynqmp_disp *disp) drm_formats[j] = layer->info->formats[j].drm_fmt; /* Graphics layer is primary, and video layer is overlay. */ - type = i == ZYNQMP_DISP_LAYER_GFX - ? DRM_PLANE_TYPE_PRIMARY : DRM_PLANE_TYPE_OVERLAY; + type = zynqmp_disp_layer_is_video(layer) + ? DRM_PLANE_TYPE_OVERLAY : DRM_PLANE_TYPE_PRIMARY; ret = drm_universal_plane_init(disp->drm, &layer->plane, 0, &zynqmp_disp_plane_funcs, drm_formats, @@ -1256,6 +1251,10 @@ static int zynqmp_disp_create_planes(struct zynqmp_disp *disp) drm_plane_helper_add(&layer->plane, &zynqmp_disp_plane_helper_funcs); + + drm_plane_create_zpos_immutable_property(&layer->plane, i); + if (zynqmp_disp_layer_is_gfx(layer)) + drm_plane_create_alpha_property(&layer->plane); } return 0; @@ -1387,14 +1386,14 @@ err: */ static void zynqmp_disp_enable(struct zynqmp_disp *disp) { - zynqmp_disp_avbuf_enable(&disp->avbuf); + zynqmp_disp_avbuf_enable(disp); /* Choose clock source based on the DT clock handle. */ - zynqmp_disp_avbuf_set_clocks_sources(&disp->avbuf, disp->pclk_from_ps, + zynqmp_disp_avbuf_set_clocks_sources(disp, disp->pclk_from_ps, disp->audio.clk_from_ps, true); - zynqmp_disp_avbuf_enable_channels(&disp->avbuf); - zynqmp_disp_avbuf_enable_audio(&disp->avbuf); + zynqmp_disp_avbuf_enable_channels(disp); + zynqmp_disp_avbuf_enable_audio(disp); - zynqmp_disp_audio_enable(&disp->audio); + zynqmp_disp_audio_enable(disp); } /** @@ -1403,11 +1402,11 @@ static void zynqmp_disp_enable(struct zynqmp_disp *disp) */ static void zynqmp_disp_disable(struct zynqmp_disp *disp) { - zynqmp_disp_audio_disable(&disp->audio); + zynqmp_disp_audio_disable(disp); - zynqmp_disp_avbuf_disable_audio(&disp->avbuf); - zynqmp_disp_avbuf_disable_channels(&disp->avbuf); - zynqmp_disp_avbuf_disable(&disp->avbuf); + zynqmp_disp_avbuf_disable_audio(disp); + zynqmp_disp_avbuf_disable_channels(disp); + zynqmp_disp_avbuf_disable(disp); } static inline struct zynqmp_disp *crtc_to_disp(struct drm_crtc *crtc) @@ -1452,9 +1451,10 @@ zynqmp_disp_crtc_atomic_enable(struct drm_crtc *crtc, struct drm_display_mode *adjusted_mode = &crtc->state->adjusted_mode; int ret, vrefresh; + pm_runtime_get_sync(disp->dev); + zynqmp_disp_crtc_setup_clock(crtc, adjusted_mode); - pm_runtime_get_sync(disp->dev); ret = clk_prepare_enable(disp->pclk); if (ret) { dev_err(disp->dev, "failed to enable a pixel clock\n"); @@ -1462,10 +1462,8 @@ zynqmp_disp_crtc_atomic_enable(struct drm_crtc *crtc, return; } - zynqmp_disp_blend_set_output_format(&disp->blend, - ZYNQMP_DPSUB_FORMAT_RGB); - zynqmp_disp_blend_set_bg_color(&disp->blend, 0, 0, 0); - zynqmp_disp_blend_set_global_alpha(&disp->blend, false, 0); + zynqmp_disp_blend_set_output_format(disp, ZYNQMP_DPSUB_FORMAT_RGB); + zynqmp_disp_blend_set_bg_color(disp, 0, 0, 0); zynqmp_disp_enable(disp); @@ -1674,7 +1672,7 @@ int zynqmp_disp_probe(struct zynqmp_dpsub *dpsub, struct drm_device *drm) disp->pclk_from_ps = true; } - zynqmp_disp_audio_init(disp->dev, &disp->audio); + zynqmp_disp_audio_init(disp); ret = zynqmp_disp_create_layers(disp); if (ret) diff --git a/drivers/gpu/drm/xlnx/zynqmp_dp.c b/drivers/gpu/drm/xlnx/zynqmp_dp.c index 82430ca9b913..6f588dc09ba6 100644 --- a/drivers/gpu/drm/xlnx/zynqmp_dp.c +++ b/drivers/gpu/drm/xlnx/zynqmp_dp.c @@ -402,10 +402,6 @@ static int zynqmp_dp_phy_init(struct zynqmp_dp *dp) } } - ret = zynqmp_dp_reset(dp, false); - if (ret < 0) - return ret; - zynqmp_dp_clr(dp, ZYNQMP_DP_PHY_RESET, ZYNQMP_DP_PHY_RESET_ALL_RESET); /* @@ -441,8 +437,6 @@ static void zynqmp_dp_phy_exit(struct zynqmp_dp *dp) ret); } - zynqmp_dp_reset(dp, true); - for (i = 0; i < dp->num_lanes; i++) { ret = phy_exit(dp->phy[i]); if (ret) @@ -1683,9 +1677,13 @@ int zynqmp_dp_probe(struct zynqmp_dpsub *dpsub, struct drm_device *drm) return PTR_ERR(dp->reset); } + ret = zynqmp_dp_reset(dp, false); + if (ret < 0) + return ret; + ret = zynqmp_dp_phy_probe(dp); if (ret) - return ret; + goto err_reset; /* Initialize the hardware. */ zynqmp_dp_write(dp, ZYNQMP_DP_TX_PHY_POWER_DOWN, @@ -1697,7 +1695,7 @@ int zynqmp_dp_probe(struct zynqmp_dpsub *dpsub, struct drm_device *drm) ret = zynqmp_dp_phy_init(dp); if (ret) - return ret; + goto err_reset; zynqmp_dp_write(dp, ZYNQMP_DP_TRANSMITTER_ENABLE, 1); @@ -1709,15 +1707,18 @@ int zynqmp_dp_probe(struct zynqmp_dpsub *dpsub, struct drm_device *drm) zynqmp_dp_irq_handler, IRQF_ONESHOT, dev_name(dp->dev), dp); if (ret < 0) - goto error; + goto err_phy_exit; dev_dbg(dp->dev, "ZynqMP DisplayPort Tx probed with %u lanes\n", dp->num_lanes); return 0; -error: +err_phy_exit: zynqmp_dp_phy_exit(dp); +err_reset: + zynqmp_dp_reset(dp, true); + return ret; } @@ -1735,4 +1736,5 @@ void zynqmp_dp_remove(struct zynqmp_dpsub *dpsub) zynqmp_dp_write(dp, ZYNQMP_DP_INT_DS, 0xffffffff); zynqmp_dp_phy_exit(dp); + zynqmp_dp_reset(dp, true); } diff --git a/drivers/gpu/drm/xlnx/zynqmp_dpsub.c b/drivers/gpu/drm/xlnx/zynqmp_dpsub.c index 0c1c50271a88..ac37053412a1 100644 --- a/drivers/gpu/drm/xlnx/zynqmp_dpsub.c +++ b/drivers/gpu/drm/xlnx/zynqmp_dpsub.c @@ -111,8 +111,6 @@ static int zynqmp_dpsub_drm_init(struct zynqmp_dpsub *dpsub) if (ret) return ret; - drm->irq_enabled = 1; - drm_kms_helper_poll_init(drm); /* diff --git a/drivers/gpu/drm/zte/zx_drm_drv.c b/drivers/gpu/drm/zte/zx_drm_drv.c index 5506336594e2..064056503ebb 100644 --- a/drivers/gpu/drm/zte/zx_drm_drv.c +++ b/drivers/gpu/drm/zte/zx_drm_drv.c @@ -75,12 +75,6 @@ static int zx_drm_bind(struct device *dev) goto out_unbind; } - /* - * We will manage irq handler on our own. In this case, irq_enabled - * need to be true for using vblank core support. - */ - drm->irq_enabled = true; - drm_mode_config_reset(drm); drm_kms_helper_poll_init(drm); diff --git a/drivers/gpu/host1x/Makefile b/drivers/gpu/host1x/Makefile index 096017b8789d..d2b6f7de0498 100644 --- a/drivers/gpu/host1x/Makefile +++ b/drivers/gpu/host1x/Makefile @@ -9,6 +9,7 @@ host1x-y = \ job.o \ debug.o \ mipi.o \ + fence.o \ hw/host1x01.o \ hw/host1x02.o \ hw/host1x04.o \ diff --git a/drivers/gpu/host1x/cdma.c b/drivers/gpu/host1x/cdma.c index 6e6ca774f68d..765e5aa64eb6 100644 --- a/drivers/gpu/host1x/cdma.c +++ b/drivers/gpu/host1x/cdma.c @@ -312,10 +312,6 @@ static void update_cdma_locked(struct host1x_cdma *cdma) bool signal = false; struct host1x_job *job, *n; - /* If CDMA is stopped, queue is cleared and we can return */ - if (!cdma->running) - return; - /* * Walk the sync queue, reading the sync point registers as necessary, * to consume as many sync queue entries as possible without blocking @@ -324,7 +320,8 @@ static void update_cdma_locked(struct host1x_cdma *cdma) struct host1x_syncpt *sp = job->syncpt; /* Check whether this syncpt has completed, and bail if not */ - if (!host1x_syncpt_is_expired(sp, job->syncpt_end)) { + if (!host1x_syncpt_is_expired(sp, job->syncpt_end) && + !job->cancelled) { /* Start timer on next pending syncpt */ if (job->timeout) cdma_start_timer_locked(cdma, job); @@ -413,8 +410,11 @@ syncpt_incr: else restart_addr = cdma->last_pos; + if (!job) + goto resume; + /* do CPU increments for the remaining syncpts */ - if (job) { + if (job->syncpt_recovery) { dev_dbg(dev, "%s: perform CPU incr on pending buffers\n", __func__); @@ -433,8 +433,44 @@ syncpt_incr: dev_dbg(dev, "%s: finished sync_queue modification\n", __func__); + } else { + struct host1x_job *failed_job = job; + + host1x_job_dump(dev, job); + + host1x_syncpt_set_locked(job->syncpt); + failed_job->cancelled = true; + + list_for_each_entry_continue(job, &cdma->sync_queue, list) { + unsigned int i; + + if (job->syncpt != failed_job->syncpt) + continue; + + for (i = 0; i < job->num_slots; i++) { + unsigned int slot = (job->first_get/8 + i) % + HOST1X_PUSHBUFFER_SLOTS; + u32 *mapped = cdma->push_buffer.mapped; + + /* + * Overwrite opcodes with 0 word writes + * to offset 0xbad. This does nothing but + * has a easily detected signature in debug + * traces. + */ + mapped[2*slot+0] = 0x1bad0000; + mapped[2*slot+1] = 0x1bad0000; + } + + job->cancelled = true; + } + + wmb(); + + update_cdma_locked(cdma); } +resume: /* roll back DMAGET and start up channel again */ host1x_hw_cdma_resume(host1x, cdma, restart_addr); } @@ -490,6 +526,16 @@ int host1x_cdma_begin(struct host1x_cdma *cdma, struct host1x_job *job) mutex_lock(&cdma->lock); + /* + * Check if syncpoint was locked due to previous job timeout. + * This needs to be done within the cdma lock to avoid a race + * with the timeout handler. + */ + if (job->syncpt->locked) { + mutex_unlock(&cdma->lock); + return -EPERM; + } + if (job->timeout) { /* init state on first submit with timeout value */ if (!cdma->timeout.initialized) { diff --git a/drivers/gpu/host1x/fence.c b/drivers/gpu/host1x/fence.c new file mode 100644 index 000000000000..6941add95d0f --- /dev/null +++ b/drivers/gpu/host1x/fence.c @@ -0,0 +1,168 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Syncpoint dma_fence implementation + * + * Copyright (c) 2020, NVIDIA Corporation. + */ + +#include <linux/dma-fence.h> +#include <linux/file.h> +#include <linux/fs.h> +#include <linux/slab.h> +#include <linux/sync_file.h> + +#include "fence.h" +#include "intr.h" +#include "syncpt.h" + +DEFINE_SPINLOCK(lock); + +struct host1x_syncpt_fence { + struct dma_fence base; + + atomic_t signaling; + + struct host1x_syncpt *sp; + u32 threshold; + + struct host1x_waitlist *waiter; + void *waiter_ref; + + struct delayed_work timeout_work; +}; + +static const char *host1x_syncpt_fence_get_driver_name(struct dma_fence *f) +{ + return "host1x"; +} + +static const char *host1x_syncpt_fence_get_timeline_name(struct dma_fence *f) +{ + return "syncpoint"; +} + +static struct host1x_syncpt_fence *to_host1x_fence(struct dma_fence *f) +{ + return container_of(f, struct host1x_syncpt_fence, base); +} + +static bool host1x_syncpt_fence_enable_signaling(struct dma_fence *f) +{ + struct host1x_syncpt_fence *sf = to_host1x_fence(f); + int err; + + if (host1x_syncpt_is_expired(sf->sp, sf->threshold)) + return false; + + dma_fence_get(f); + + /* + * The dma_fence framework requires the fence driver to keep a + * reference to any fences for which 'enable_signaling' has been + * called (and that have not been signalled). + * + * We provide a userspace API to create arbitrary syncpoint fences, + * so we cannot normally guarantee that all fences get signalled. + * As such, setup a timeout, so that long-lasting fences will get + * reaped eventually. + */ + schedule_delayed_work(&sf->timeout_work, msecs_to_jiffies(30000)); + + err = host1x_intr_add_action(sf->sp->host, sf->sp, sf->threshold, + HOST1X_INTR_ACTION_SIGNAL_FENCE, f, + sf->waiter, &sf->waiter_ref); + if (err) { + cancel_delayed_work_sync(&sf->timeout_work); + dma_fence_put(f); + return false; + } + + /* intr framework takes ownership of waiter */ + sf->waiter = NULL; + + /* + * The fence may get signalled at any time after the above call, + * so we need to initialize all state used by signalling + * before it. + */ + + return true; +} + +static void host1x_syncpt_fence_release(struct dma_fence *f) +{ + struct host1x_syncpt_fence *sf = to_host1x_fence(f); + + if (sf->waiter) + kfree(sf->waiter); + + dma_fence_free(f); +} + +const struct dma_fence_ops host1x_syncpt_fence_ops = { + .get_driver_name = host1x_syncpt_fence_get_driver_name, + .get_timeline_name = host1x_syncpt_fence_get_timeline_name, + .enable_signaling = host1x_syncpt_fence_enable_signaling, + .release = host1x_syncpt_fence_release, +}; + +void host1x_fence_signal(struct host1x_syncpt_fence *f) +{ + if (atomic_xchg(&f->signaling, 1)) + return; + + /* + * Cancel pending timeout work - if it races, it will + * not get 'f->signaling' and return. + */ + cancel_delayed_work_sync(&f->timeout_work); + + host1x_intr_put_ref(f->sp->host, f->sp->id, f->waiter_ref, false); + + dma_fence_signal(&f->base); + dma_fence_put(&f->base); +} + +static void do_fence_timeout(struct work_struct *work) +{ + struct delayed_work *dwork = (struct delayed_work *)work; + struct host1x_syncpt_fence *f = + container_of(dwork, struct host1x_syncpt_fence, timeout_work); + + if (atomic_xchg(&f->signaling, 1)) + return; + + /* + * Cancel pending timeout work - if it races, it will + * not get 'f->signaling' and return. + */ + host1x_intr_put_ref(f->sp->host, f->sp->id, f->waiter_ref, true); + + dma_fence_set_error(&f->base, -ETIMEDOUT); + dma_fence_signal(&f->base); + dma_fence_put(&f->base); +} + +struct dma_fence *host1x_fence_create(struct host1x_syncpt *sp, u32 threshold) +{ + struct host1x_syncpt_fence *fence; + + fence = kzalloc(sizeof(*fence), GFP_KERNEL); + if (!fence) + return ERR_PTR(-ENOMEM); + + fence->waiter = kzalloc(sizeof(*fence->waiter), GFP_KERNEL); + if (!fence->waiter) + return ERR_PTR(-ENOMEM); + + fence->sp = sp; + fence->threshold = threshold; + + dma_fence_init(&fence->base, &host1x_syncpt_fence_ops, &lock, + dma_fence_context_alloc(1), 0); + + INIT_DELAYED_WORK(&fence->timeout_work, do_fence_timeout); + + return &fence->base; +} +EXPORT_SYMBOL(host1x_fence_create); diff --git a/drivers/gpu/host1x/fence.h b/drivers/gpu/host1x/fence.h new file mode 100644 index 000000000000..70c91de82f14 --- /dev/null +++ b/drivers/gpu/host1x/fence.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2020, NVIDIA Corporation. + */ + +#ifndef HOST1X_FENCE_H +#define HOST1X_FENCE_H + +struct host1x_syncpt_fence; + +void host1x_fence_signal(struct host1x_syncpt_fence *fence); + +#endif diff --git a/drivers/gpu/host1x/hw/channel_hw.c b/drivers/gpu/host1x/hw/channel_hw.c index d4c28faf27d1..1999780a7203 100644 --- a/drivers/gpu/host1x/hw/channel_hw.c +++ b/drivers/gpu/host1x/hw/channel_hw.c @@ -47,39 +47,84 @@ static void trace_write_gather(struct host1x_cdma *cdma, struct host1x_bo *bo, } } -static void submit_gathers(struct host1x_job *job) +static void submit_wait(struct host1x_cdma *cdma, u32 id, u32 threshold, + u32 next_class) +{ +#if HOST1X_HW >= 2 + host1x_cdma_push_wide(cdma, + host1x_opcode_setclass( + HOST1X_CLASS_HOST1X, + HOST1X_UCLASS_LOAD_SYNCPT_PAYLOAD_32, + /* WAIT_SYNCPT_32 is at SYNCPT_PAYLOAD_32+2 */ + BIT(0) | BIT(2) + ), + threshold, + id, + host1x_opcode_setclass(next_class, 0, 0) + ); +#else + /* TODO add waitchk or use waitbases or other mitigation */ + host1x_cdma_push(cdma, + host1x_opcode_setclass( + HOST1X_CLASS_HOST1X, + host1x_uclass_wait_syncpt_r(), + BIT(0) + ), + host1x_class_host_wait_syncpt(id, threshold) + ); + host1x_cdma_push(cdma, + host1x_opcode_setclass(next_class, 0, 0), + HOST1X_OPCODE_NOP + ); +#endif +} + +static void submit_gathers(struct host1x_job *job, u32 job_syncpt_base) { struct host1x_cdma *cdma = &job->channel->cdma; #if HOST1X_HW < 6 struct device *dev = job->channel->dev; #endif unsigned int i; + u32 threshold; - for (i = 0; i < job->num_gathers; i++) { - struct host1x_job_gather *g = &job->gathers[i]; - dma_addr_t addr = g->base + g->offset; - u32 op2, op3; + for (i = 0; i < job->num_cmds; i++) { + struct host1x_job_cmd *cmd = &job->cmds[i]; - op2 = lower_32_bits(addr); - op3 = upper_32_bits(addr); + if (cmd->is_wait) { + if (cmd->wait.relative) + threshold = job_syncpt_base + cmd->wait.threshold; + else + threshold = cmd->wait.threshold; - trace_write_gather(cdma, g->bo, g->offset, g->words); + submit_wait(cdma, cmd->wait.id, threshold, cmd->wait.next_class); + } else { + struct host1x_job_gather *g = &cmd->gather; + + dma_addr_t addr = g->base + g->offset; + u32 op2, op3; + + op2 = lower_32_bits(addr); + op3 = upper_32_bits(addr); - if (op3 != 0) { + trace_write_gather(cdma, g->bo, g->offset, g->words); + + if (op3 != 0) { #if HOST1X_HW >= 6 - u32 op1 = host1x_opcode_gather_wide(g->words); - u32 op4 = HOST1X_OPCODE_NOP; + u32 op1 = host1x_opcode_gather_wide(g->words); + u32 op4 = HOST1X_OPCODE_NOP; - host1x_cdma_push_wide(cdma, op1, op2, op3, op4); + host1x_cdma_push_wide(cdma, op1, op2, op3, op4); #else - dev_err(dev, "invalid gather for push buffer %pad\n", - &addr); - continue; + dev_err(dev, "invalid gather for push buffer %pad\n", + &addr); + continue; #endif - } else { - u32 op1 = host1x_opcode_gather(g->words); + } else { + u32 op1 = host1x_opcode_gather(g->words); - host1x_cdma_push(cdma, op1, op2); + host1x_cdma_push(cdma, op1, op2); + } } } } @@ -126,7 +171,7 @@ static int channel_submit(struct host1x_job *job) struct host1x *host = dev_get_drvdata(ch->dev->parent); trace_host1x_channel_submit(dev_name(ch->dev), - job->num_gathers, job->num_relocs, + job->num_cmds, job->num_relocs, job->syncpt->id, job->syncpt_incrs); /* before error checks, return current max */ @@ -181,7 +226,7 @@ static int channel_submit(struct host1x_job *job) host1x_opcode_setclass(job->class, 0, 0), HOST1X_OPCODE_NOP); - submit_gathers(job); + submit_gathers(job, syncval - user_syncpt_incrs); /* end CDMA submit & stash pinned hMems into sync queue */ host1x_cdma_end(&ch->cdma, job); @@ -191,7 +236,7 @@ static int channel_submit(struct host1x_job *job) /* schedule a submit complete interrupt */ err = host1x_intr_add_action(host, sp, syncval, HOST1X_INTR_ACTION_SUBMIT_COMPLETE, ch, - completed_waiter, NULL); + completed_waiter, &job->waiter); completed_waiter = NULL; WARN(err, "Failed to set submit complete interrupt"); diff --git a/drivers/gpu/host1x/hw/debug_hw.c b/drivers/gpu/host1x/hw/debug_hw.c index ceb48229d14b..54e31d81517b 100644 --- a/drivers/gpu/host1x/hw/debug_hw.c +++ b/drivers/gpu/host1x/hw/debug_hw.c @@ -156,9 +156,9 @@ static unsigned int show_channel_command(struct output *o, u32 val, } } -static void show_gather(struct output *o, phys_addr_t phys_addr, +static void show_gather(struct output *o, dma_addr_t phys_addr, unsigned int words, struct host1x_cdma *cdma, - phys_addr_t pin_addr, u32 *map_addr) + dma_addr_t pin_addr, u32 *map_addr) { /* Map dmaget cursor to corresponding mem handle */ u32 offset = phys_addr - pin_addr; @@ -176,11 +176,11 @@ static void show_gather(struct output *o, phys_addr_t phys_addr, } for (i = 0; i < words; i++) { - u32 addr = phys_addr + i * 4; + dma_addr_t addr = phys_addr + i * 4; u32 val = *(map_addr + offset / 4 + i); if (!data_count) { - host1x_debug_output(o, "%08x: %08x: ", addr, val); + host1x_debug_output(o, " %pad: %08x: ", &addr, val); data_count = show_channel_command(o, val, &payload); } else { host1x_debug_cont(o, "%08x%s", val, @@ -195,23 +195,25 @@ static void show_channel_gathers(struct output *o, struct host1x_cdma *cdma) struct push_buffer *pb = &cdma->push_buffer; struct host1x_job *job; - host1x_debug_output(o, "PUSHBUF at %pad, %u words\n", - &pb->dma, pb->size / 4); - - show_gather(o, pb->dma, pb->size / 4, cdma, pb->dma, pb->mapped); - list_for_each_entry(job, &cdma->sync_queue, list) { unsigned int i; - host1x_debug_output(o, "\n%p: JOB, syncpt_id=%d, syncpt_val=%d, first_get=%08x, timeout=%d num_slots=%d, num_handles=%d\n", - job, job->syncpt->id, job->syncpt_end, - job->first_get, job->timeout, + host1x_debug_output(o, "JOB, syncpt %u: %u timeout: %u num_slots: %u num_handles: %u\n", + job->syncpt->id, job->syncpt_end, job->timeout, job->num_slots, job->num_unpins); - for (i = 0; i < job->num_gathers; i++) { - struct host1x_job_gather *g = &job->gathers[i]; + show_gather(o, pb->dma + job->first_get, job->num_slots * 2, cdma, + pb->dma + job->first_get, pb->mapped + job->first_get); + + for (i = 0; i < job->num_cmds; i++) { + struct host1x_job_gather *g; u32 *mapped; + if (job->cmds[i].is_wait) + continue; + + g = &job->cmds[i].gather; + if (job->gather_copy_mapped) mapped = (u32 *)job->gather_copy_mapped; else @@ -222,7 +224,7 @@ static void show_channel_gathers(struct output *o, struct host1x_cdma *cdma) continue; } - host1x_debug_output(o, " GATHER at %pad+%#x, %d words\n", + host1x_debug_output(o, " GATHER at %pad+%#x, %d words\n", &g->base, g->offset, g->words); show_gather(o, g->base + g->offset, g->words, cdma, diff --git a/drivers/gpu/host1x/hw/debug_hw_1x01.c b/drivers/gpu/host1x/hw/debug_hw_1x01.c index 02a93305ac7b..85242a59fa6a 100644 --- a/drivers/gpu/host1x/hw/debug_hw_1x01.c +++ b/drivers/gpu/host1x/hw/debug_hw_1x01.c @@ -16,10 +16,13 @@ static void host1x_debug_show_channel_cdma(struct host1x *host, struct output *o) { struct host1x_cdma *cdma = &ch->cdma; + dma_addr_t dmastart, dmaend; u32 dmaput, dmaget, dmactrl; u32 cbstat, cbread; u32 val, base, baseval; + dmastart = host1x_ch_readl(ch, HOST1X_CHANNEL_DMASTART); + dmaend = host1x_ch_readl(ch, HOST1X_CHANNEL_DMAEND); dmaput = host1x_ch_readl(ch, HOST1X_CHANNEL_DMAPUT); dmaget = host1x_ch_readl(ch, HOST1X_CHANNEL_DMAGET); dmactrl = host1x_ch_readl(ch, HOST1X_CHANNEL_DMACTRL); @@ -56,9 +59,10 @@ static void host1x_debug_show_channel_cdma(struct host1x *host, HOST1X_SYNC_CBSTAT_CBOFFSET_V(cbstat), cbread); - host1x_debug_output(o, "DMAPUT %08x, DMAGET %08x, DMACTL %08x\n", + host1x_debug_output(o, "DMASTART %pad, DMAEND %pad\n", &dmastart, &dmaend); + host1x_debug_output(o, "DMAPUT %08x DMAGET %08x DMACTL %08x\n", dmaput, dmaget, dmactrl); - host1x_debug_output(o, "CBREAD %08x, CBSTAT %08x\n", cbread, cbstat); + host1x_debug_output(o, "CBREAD %08x CBSTAT %08x\n", cbread, cbstat); show_channel_gathers(o, cdma); host1x_debug_output(o, "\n"); diff --git a/drivers/gpu/host1x/hw/debug_hw_1x06.c b/drivers/gpu/host1x/hw/debug_hw_1x06.c index 6d1b583aa90f..9d0667879a19 100644 --- a/drivers/gpu/host1x/hw/debug_hw_1x06.c +++ b/drivers/gpu/host1x/hw/debug_hw_1x06.c @@ -16,10 +16,23 @@ static void host1x_debug_show_channel_cdma(struct host1x *host, struct output *o) { struct host1x_cdma *cdma = &ch->cdma; + dma_addr_t dmastart = 0, dmaend = 0; u32 dmaput, dmaget, dmactrl; u32 offset, class; u32 ch_stat; +#if defined(CONFIG_ARCH_DMA_ADDR_T_64BIT) && HOST1X_HW >= 6 + dmastart = host1x_ch_readl(ch, HOST1X_CHANNEL_DMASTART_HI); + dmastart <<= 32; +#endif + dmastart |= host1x_ch_readl(ch, HOST1X_CHANNEL_DMASTART); + +#if defined(CONFIG_ARCH_DMA_ADDR_T_64BIT) && HOST1X_HW >= 6 + dmaend = host1x_ch_readl(ch, HOST1X_CHANNEL_DMAEND_HI); + dmaend <<= 32; +#endif + dmaend |= host1x_ch_readl(ch, HOST1X_CHANNEL_DMAEND); + dmaput = host1x_ch_readl(ch, HOST1X_CHANNEL_DMAPUT); dmaget = host1x_ch_readl(ch, HOST1X_CHANNEL_DMAGET); dmactrl = host1x_ch_readl(ch, HOST1X_CHANNEL_DMACTRL); @@ -41,7 +54,8 @@ static void host1x_debug_show_channel_cdma(struct host1x *host, host1x_debug_output(o, "active class %02x, offset %04x\n", class, offset); - host1x_debug_output(o, "DMAPUT %08x, DMAGET %08x, DMACTL %08x\n", + host1x_debug_output(o, "DMASTART %pad, DMAEND %pad\n", &dmastart, &dmaend); + host1x_debug_output(o, "DMAPUT %08x DMAGET %08x DMACTL %08x\n", dmaput, dmaget, dmactrl); host1x_debug_output(o, "CHANNELSTAT %02x\n", ch_stat); diff --git a/drivers/gpu/host1x/hw/hw_host1x02_uclass.h b/drivers/gpu/host1x/hw/hw_host1x02_uclass.h index 4fc51f70496b..0a2ab8f1da6f 100644 --- a/drivers/gpu/host1x/hw/hw_host1x02_uclass.h +++ b/drivers/gpu/host1x/hw/hw_host1x02_uclass.h @@ -165,5 +165,17 @@ static inline u32 host1x_uclass_indoff_rwn_read_v(void) } #define HOST1X_UCLASS_INDOFF_INDROFFSET_F(v) \ host1x_uclass_indoff_indroffset_f(v) +static inline u32 host1x_uclass_load_syncpt_payload_32_r(void) +{ + return 0x4e; +} +#define HOST1X_UCLASS_LOAD_SYNCPT_PAYLOAD_32 \ + host1x_uclass_load_syncpt_payload_32_r() +static inline u32 host1x_uclass_wait_syncpt_32_r(void) +{ + return 0x50; +} +#define HOST1X_UCLASS_WAIT_SYNCPT_32 \ + host1x_uclass_wait_syncpt_32_r() #endif diff --git a/drivers/gpu/host1x/hw/hw_host1x04_uclass.h b/drivers/gpu/host1x/hw/hw_host1x04_uclass.h index 9e84a4adca9f..60c692b92955 100644 --- a/drivers/gpu/host1x/hw/hw_host1x04_uclass.h +++ b/drivers/gpu/host1x/hw/hw_host1x04_uclass.h @@ -165,5 +165,17 @@ static inline u32 host1x_uclass_indoff_rwn_read_v(void) } #define HOST1X_UCLASS_INDOFF_INDROFFSET_F(v) \ host1x_uclass_indoff_indroffset_f(v) +static inline u32 host1x_uclass_load_syncpt_payload_32_r(void) +{ + return 0x4e; +} +#define HOST1X_UCLASS_LOAD_SYNCPT_PAYLOAD_32 \ + host1x_uclass_load_syncpt_payload_32_r() +static inline u32 host1x_uclass_wait_syncpt_32_r(void) +{ + return 0x50; +} +#define HOST1X_UCLASS_WAIT_SYNCPT_32 \ + host1x_uclass_wait_syncpt_32_r() #endif diff --git a/drivers/gpu/host1x/hw/hw_host1x05_uclass.h b/drivers/gpu/host1x/hw/hw_host1x05_uclass.h index aee5a4e32877..2fcc9a2ad3ef 100644 --- a/drivers/gpu/host1x/hw/hw_host1x05_uclass.h +++ b/drivers/gpu/host1x/hw/hw_host1x05_uclass.h @@ -165,5 +165,17 @@ static inline u32 host1x_uclass_indoff_rwn_read_v(void) } #define HOST1X_UCLASS_INDOFF_INDROFFSET_F(v) \ host1x_uclass_indoff_indroffset_f(v) +static inline u32 host1x_uclass_load_syncpt_payload_32_r(void) +{ + return 0x4e; +} +#define HOST1X_UCLASS_LOAD_SYNCPT_PAYLOAD_32 \ + host1x_uclass_load_syncpt_payload_32_r() +static inline u32 host1x_uclass_wait_syncpt_32_r(void) +{ + return 0x50; +} +#define HOST1X_UCLASS_WAIT_SYNCPT_32 \ + host1x_uclass_wait_syncpt_32_r() #endif diff --git a/drivers/gpu/host1x/hw/hw_host1x06_uclass.h b/drivers/gpu/host1x/hw/hw_host1x06_uclass.h index c4bacdb7155f..5f831438d19b 100644 --- a/drivers/gpu/host1x/hw/hw_host1x06_uclass.h +++ b/drivers/gpu/host1x/hw/hw_host1x06_uclass.h @@ -165,5 +165,17 @@ static inline u32 host1x_uclass_indoff_rwn_read_v(void) } #define HOST1X_UCLASS_INDOFF_INDROFFSET_F(v) \ host1x_uclass_indoff_indroffset_f(v) +static inline u32 host1x_uclass_load_syncpt_payload_32_r(void) +{ + return 0x4e; +} +#define HOST1X_UCLASS_LOAD_SYNCPT_PAYLOAD_32 \ + host1x_uclass_load_syncpt_payload_32_r() +static inline u32 host1x_uclass_wait_syncpt_32_r(void) +{ + return 0x50; +} +#define HOST1X_UCLASS_WAIT_SYNCPT_32 \ + host1x_uclass_wait_syncpt_32_r() #endif diff --git a/drivers/gpu/host1x/hw/hw_host1x07_uclass.h b/drivers/gpu/host1x/hw/hw_host1x07_uclass.h index c74070f3f203..8cd2ef087d5d 100644 --- a/drivers/gpu/host1x/hw/hw_host1x07_uclass.h +++ b/drivers/gpu/host1x/hw/hw_host1x07_uclass.h @@ -165,5 +165,17 @@ static inline u32 host1x_uclass_indoff_rwn_read_v(void) } #define HOST1X_UCLASS_INDOFF_INDROFFSET_F(v) \ host1x_uclass_indoff_indroffset_f(v) +static inline u32 host1x_uclass_load_syncpt_payload_32_r(void) +{ + return 0x4e; +} +#define HOST1X_UCLASS_LOAD_SYNCPT_PAYLOAD_32 \ + host1x_uclass_load_syncpt_payload_32_r() +static inline u32 host1x_uclass_wait_syncpt_32_r(void) +{ + return 0x50; +} +#define HOST1X_UCLASS_WAIT_SYNCPT_32 \ + host1x_uclass_wait_syncpt_32_r() #endif diff --git a/drivers/gpu/host1x/intr.c b/drivers/gpu/host1x/intr.c index 6d1f3c0fdbe7..45b6be927ec4 100644 --- a/drivers/gpu/host1x/intr.c +++ b/drivers/gpu/host1x/intr.c @@ -13,6 +13,7 @@ #include <trace/events/host1x.h> #include "channel.h" #include "dev.h" +#include "fence.h" #include "intr.h" /* Wait list management */ @@ -121,12 +122,20 @@ static void action_wakeup_interruptible(struct host1x_waitlist *waiter) wake_up_interruptible(wq); } +static void action_signal_fence(struct host1x_waitlist *waiter) +{ + struct host1x_syncpt_fence *f = waiter->data; + + host1x_fence_signal(f); +} + typedef void (*action_handler)(struct host1x_waitlist *waiter); static const action_handler action_handlers[HOST1X_INTR_ACTION_COUNT] = { action_submit_complete, action_wakeup, action_wakeup_interruptible, + action_signal_fence, }; static void run_handlers(struct list_head completed[HOST1X_INTR_ACTION_COUNT]) diff --git a/drivers/gpu/host1x/intr.h b/drivers/gpu/host1x/intr.h index 6ea55e615e3a..e4c346099273 100644 --- a/drivers/gpu/host1x/intr.h +++ b/drivers/gpu/host1x/intr.h @@ -33,6 +33,8 @@ enum host1x_intr_action { */ HOST1X_INTR_ACTION_WAKEUP_INTERRUPTIBLE, + HOST1X_INTR_ACTION_SIGNAL_FENCE, + HOST1X_INTR_ACTION_COUNT }; diff --git a/drivers/gpu/host1x/job.c b/drivers/gpu/host1x/job.c index adbdc225de8d..0eef6df7c89e 100644 --- a/drivers/gpu/host1x/job.c +++ b/drivers/gpu/host1x/job.c @@ -24,21 +24,25 @@ #define HOST1X_WAIT_SYNCPT_OFFSET 0x8 struct host1x_job *host1x_job_alloc(struct host1x_channel *ch, - u32 num_cmdbufs, u32 num_relocs) + u32 num_cmdbufs, u32 num_relocs, + bool skip_firewall) { struct host1x_job *job = NULL; unsigned int num_unpins = num_relocs; + bool enable_firewall; u64 total; void *mem; - if (!IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL)) + enable_firewall = IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL) && !skip_firewall; + + if (!enable_firewall) num_unpins += num_cmdbufs; /* Check that we're not going to overflow */ total = sizeof(struct host1x_job) + (u64)num_relocs * sizeof(struct host1x_reloc) + (u64)num_unpins * sizeof(struct host1x_job_unpin_data) + - (u64)num_cmdbufs * sizeof(struct host1x_job_gather) + + (u64)num_cmdbufs * sizeof(struct host1x_job_cmd) + (u64)num_unpins * sizeof(dma_addr_t) + (u64)num_unpins * sizeof(u32 *); if (total > ULONG_MAX) @@ -48,6 +52,8 @@ struct host1x_job *host1x_job_alloc(struct host1x_channel *ch, if (!job) return NULL; + job->enable_firewall = enable_firewall; + kref_init(&job->ref); job->channel = ch; @@ -57,8 +63,8 @@ struct host1x_job *host1x_job_alloc(struct host1x_channel *ch, mem += num_relocs * sizeof(struct host1x_reloc); job->unpins = num_unpins ? mem : NULL; mem += num_unpins * sizeof(struct host1x_job_unpin_data); - job->gathers = num_cmdbufs ? mem : NULL; - mem += num_cmdbufs * sizeof(struct host1x_job_gather); + job->cmds = num_cmdbufs ? mem : NULL; + mem += num_cmdbufs * sizeof(struct host1x_job_cmd); job->addr_phys = num_unpins ? mem : NULL; job->reloc_addr_phys = job->addr_phys; @@ -79,6 +85,13 @@ static void job_free(struct kref *ref) { struct host1x_job *job = container_of(ref, struct host1x_job, ref); + if (job->release) + job->release(job); + + if (job->waiter) + host1x_intr_put_ref(job->syncpt->host, job->syncpt->id, + job->waiter, false); + if (job->syncpt) host1x_syncpt_put(job->syncpt); @@ -94,22 +107,38 @@ EXPORT_SYMBOL(host1x_job_put); void host1x_job_add_gather(struct host1x_job *job, struct host1x_bo *bo, unsigned int words, unsigned int offset) { - struct host1x_job_gather *gather = &job->gathers[job->num_gathers]; + struct host1x_job_gather *gather = &job->cmds[job->num_cmds].gather; gather->words = words; gather->bo = bo; gather->offset = offset; - job->num_gathers++; + job->num_cmds++; } EXPORT_SYMBOL(host1x_job_add_gather); +void host1x_job_add_wait(struct host1x_job *job, u32 id, u32 thresh, + bool relative, u32 next_class) +{ + struct host1x_job_cmd *cmd = &job->cmds[job->num_cmds]; + + cmd->is_wait = true; + cmd->wait.id = id; + cmd->wait.threshold = thresh; + cmd->wait.next_class = next_class; + cmd->wait.relative = relative; + + job->num_cmds++; +} +EXPORT_SYMBOL(host1x_job_add_wait); + static unsigned int pin_job(struct host1x *host, struct host1x_job *job) { struct host1x_client *client = job->client; struct device *dev = client->dev; struct host1x_job_gather *g; struct iommu_domain *domain; + struct sg_table *sgt; unsigned int i; int err; @@ -119,7 +148,6 @@ static unsigned int pin_job(struct host1x *host, struct host1x_job *job) for (i = 0; i < job->num_relocs; i++) { struct host1x_reloc *reloc = &job->relocs[i]; dma_addr_t phys_addr, *phys; - struct sg_table *sgt; reloc->target.bo = host1x_bo_get(reloc->target.bo); if (!reloc->target.bo) { @@ -192,20 +220,23 @@ static unsigned int pin_job(struct host1x *host, struct host1x_job *job) * We will copy gathers BO content later, so there is no need to * hold and pin them. */ - if (IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL)) + if (job->enable_firewall) return 0; - for (i = 0; i < job->num_gathers; i++) { + for (i = 0; i < job->num_cmds; i++) { size_t gather_size = 0; struct scatterlist *sg; - struct sg_table *sgt; dma_addr_t phys_addr; unsigned long shift; struct iova *alloc; dma_addr_t *phys; unsigned int j; - g = &job->gathers[i]; + if (job->cmds[i].is_wait) + continue; + + g = &job->cmds[i].gather; + g->bo = host1x_bo_get(g->bo); if (!g->bo) { err = -EINVAL; @@ -296,7 +327,7 @@ static int do_relocs(struct host1x_job *job, struct host1x_job_gather *g) if (cmdbuf != reloc->cmdbuf.bo) continue; - if (IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL)) { + if (job->enable_firewall) { target = (u32 *)job->gather_copy_mapped + reloc->cmdbuf.offset / sizeof(u32) + g->offset / sizeof(u32); @@ -538,8 +569,13 @@ static inline int copy_gathers(struct device *host, struct host1x_job *job, fw.num_relocs = job->num_relocs; fw.class = job->class; - for (i = 0; i < job->num_gathers; i++) { - struct host1x_job_gather *g = &job->gathers[i]; + for (i = 0; i < job->num_cmds; i++) { + struct host1x_job_gather *g; + + if (job->cmds[i].is_wait) + continue; + + g = &job->cmds[i].gather; size += g->words * sizeof(u32); } @@ -561,10 +597,14 @@ static inline int copy_gathers(struct device *host, struct host1x_job *job, job->gather_copy_size = size; - for (i = 0; i < job->num_gathers; i++) { - struct host1x_job_gather *g = &job->gathers[i]; + for (i = 0; i < job->num_cmds; i++) { + struct host1x_job_gather *g; void *gather; + if (job->cmds[i].is_wait) + continue; + g = &job->cmds[i].gather; + /* Copy the gather */ gather = host1x_bo_mmap(g->bo); memcpy(job->gather_copy_mapped + offset, gather + g->offset, @@ -600,28 +640,33 @@ int host1x_job_pin(struct host1x_job *job, struct device *dev) if (err) goto out; - if (IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL)) { + if (job->enable_firewall) { err = copy_gathers(host->dev, job, dev); if (err) goto out; } /* patch gathers */ - for (i = 0; i < job->num_gathers; i++) { - struct host1x_job_gather *g = &job->gathers[i]; + for (i = 0; i < job->num_cmds; i++) { + struct host1x_job_gather *g; + + if (job->cmds[i].is_wait) + continue; + g = &job->cmds[i].gather; /* process each gather mem only once */ if (g->handled) continue; /* copy_gathers() sets gathers base if firewall is enabled */ - if (!IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL)) + if (!job->enable_firewall) g->base = job->gather_addr_phys[i]; - for (j = i + 1; j < job->num_gathers; j++) { - if (job->gathers[j].bo == g->bo) { - job->gathers[j].handled = true; - job->gathers[j].base = g->base; + for (j = i + 1; j < job->num_cmds; j++) { + if (!job->cmds[j].is_wait && + job->cmds[j].gather.bo == g->bo) { + job->cmds[j].gather.handled = true; + job->cmds[j].gather.base = g->base; } } @@ -649,8 +694,7 @@ void host1x_job_unpin(struct host1x_job *job) struct device *dev = unpin->dev ?: host->dev; struct sg_table *sgt = unpin->sgt; - if (!IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL) && - unpin->size && host->domain) { + if (!job->enable_firewall && unpin->size && host->domain) { iommu_unmap(host->domain, job->addr_phys[i], unpin->size); free_iova(&host->iova, diff --git a/drivers/gpu/host1x/job.h b/drivers/gpu/host1x/job.h index 94bc2e4ae241..b4428c5495c9 100644 --- a/drivers/gpu/host1x/job.h +++ b/drivers/gpu/host1x/job.h @@ -18,6 +18,22 @@ struct host1x_job_gather { bool handled; }; +struct host1x_job_wait { + u32 id; + u32 threshold; + u32 next_class; + bool relative; +}; + +struct host1x_job_cmd { + bool is_wait; + + union { + struct host1x_job_gather gather; + struct host1x_job_wait wait; + }; +}; + struct host1x_job_unpin_data { struct host1x_bo *bo; struct sg_table *sgt; diff --git a/drivers/gpu/host1x/syncpt.c b/drivers/gpu/host1x/syncpt.c index e648ebbb2027..d198a10848c6 100644 --- a/drivers/gpu/host1x/syncpt.c +++ b/drivers/gpu/host1x/syncpt.c @@ -407,6 +407,8 @@ static void syncpt_release(struct kref *ref) atomic_set(&sp->max_val, host1x_syncpt_read(sp)); + sp->locked = false; + mutex_lock(&sp->host->syncpt_mutex); host1x_syncpt_base_free(sp->base); diff --git a/drivers/gpu/host1x/syncpt.h b/drivers/gpu/host1x/syncpt.h index a6766f8d55ee..95cd29b79d6d 100644 --- a/drivers/gpu/host1x/syncpt.h +++ b/drivers/gpu/host1x/syncpt.h @@ -40,6 +40,13 @@ struct host1x_syncpt { /* interrupt data */ struct host1x_syncpt_intr intr; + + /* + * If a submission incrementing this syncpoint fails, lock it so that + * further submission cannot be made until application has handled the + * failure. + */ + bool locked; }; /* Initialize sync point array */ @@ -115,4 +122,9 @@ static inline int host1x_syncpt_is_valid(struct host1x_syncpt *sp) return sp->id < host1x_syncpt_nb_pts(sp->host); } +static inline void host1x_syncpt_set_locked(struct host1x_syncpt *sp) +{ + sp->locked = true; +} + #endif diff --git a/drivers/gpu/vga/vgaarb.c b/drivers/gpu/vga/vgaarb.c index 949fde433ea2..569930552957 100644 --- a/drivers/gpu/vga/vgaarb.c +++ b/drivers/gpu/vga/vgaarb.c @@ -72,10 +72,7 @@ struct vga_device { unsigned int io_norm_cnt; /* normal IO count */ unsigned int mem_norm_cnt; /* normal MEM count */ bool bridge_has_one_vga; - /* allow IRQ enable/disable hook */ - void *cookie; - void (*irq_set_state)(void *cookie, bool enable); - unsigned int (*set_vga_decode)(void *cookie, bool decode); + unsigned int (*set_decode)(struct pci_dev *pdev, bool decode); }; static LIST_HEAD(vga_list); @@ -218,13 +215,6 @@ int vga_remove_vgacon(struct pci_dev *pdev) #endif EXPORT_SYMBOL(vga_remove_vgacon); -static inline void vga_irq_set_state(struct vga_device *vgadev, bool state) -{ - if (vgadev->irq_set_state) - vgadev->irq_set_state(vgadev->cookie, state); -} - - /* If we don't ever use VGA arb we should avoid turning off anything anywhere due to old X servers getting confused about the boot device not being VGA */ @@ -284,12 +274,6 @@ static struct vga_device *__vga_tryget(struct vga_device *vgadev, if (vgadev == conflict) continue; - /* Check if the architecture allows a conflict between those - * 2 devices or if they are on separate domains - */ - if (!vga_conflicts(vgadev->pdev, conflict->pdev)) - continue; - /* We have a possible conflict. before we go further, we must * check if we sit on the same bus as the conflicting device. * if we don't, then we must tie both IO and MEM resources @@ -331,10 +315,8 @@ static struct vga_device *__vga_tryget(struct vga_device *vgadev, if ((match & conflict->decodes) & VGA_RSRC_LEGACY_IO) pci_bits |= PCI_COMMAND_IO; - if (pci_bits) { - vga_irq_set_state(conflict, false); + if (pci_bits) flags |= PCI_VGA_STATE_CHANGE_DECODES; - } } if (change_bridge) @@ -371,9 +353,6 @@ enable_them: pci_set_vga_state(vgadev->pdev, true, pci_bits, flags); - if (!vgadev->bridge_has_one_vga) - vga_irq_set_state(vgadev, true); - vgadev->owns |= wants; lock_them: vgadev->locks |= (rsrc & VGA_RSRC_LEGACY_MASK); @@ -826,7 +805,7 @@ static void __vga_set_legacy_decoding(struct pci_dev *pdev, goto bail; /* don't let userspace futz with kernel driver decodes */ - if (userspace && vgadev->set_vga_decode) + if (userspace && vgadev->set_decode) goto bail; /* update the device decodes + counter */ @@ -840,6 +819,17 @@ bail: spin_unlock_irqrestore(&vga_lock, flags); } +/** + * vga_set_legacy_decoding + * @pdev: pci device of the VGA card + * @decodes: bit mask of what legacy regions the card decodes + * + * Indicates to the arbiter if the card decodes legacy VGA IOs, legacy VGA + * Memory, both, or none. All cards default to both, the card driver (fbdev for + * example) should tell the arbiter if it has disabled legacy decoding, so the + * card can be left out of the arbitration process (and can be safe to take + * interrupts at any time. + */ void vga_set_legacy_decoding(struct pci_dev *pdev, unsigned int decodes) { __vga_set_legacy_decoding(pdev, decodes, false); @@ -849,17 +839,11 @@ EXPORT_SYMBOL(vga_set_legacy_decoding); /** * vga_client_register - register or unregister a VGA arbitration client * @pdev: pci device of the VGA client - * @cookie: client cookie to be used in callbacks - * @irq_set_state: irq state change callback - * @set_vga_decode: vga decode change callback + * @set_decode: vga decode change callback * * Clients have two callback mechanisms they can use. * - * @irq_set_state callback: If a client can't disable its GPUs VGA - * resources, then we need to be able to ask it to turn off its irqs when we - * turn off its mem and io decoding. - * - * @set_vga_decode callback: If a client can disable its GPU VGA resource, it + * @set_decode callback: If a client can disable its GPU VGA resource, it * will get a callback from this to set the encode/decode state. * * Rationale: we cannot disable VGA decode resources unconditionally some single @@ -872,15 +856,12 @@ EXPORT_SYMBOL(vga_set_legacy_decoding); * This function does not check whether a client for @pdev has been registered * already. * - * To unregister just call this function with @irq_set_state and @set_vga_decode - * both set to NULL for the same @pdev as originally used to register them. + * To unregister just call vga_client_unregister(). * * Returns: 0 on success, -1 on failure */ -int vga_client_register(struct pci_dev *pdev, void *cookie, - void (*irq_set_state)(void *cookie, bool state), - unsigned int (*set_vga_decode)(void *cookie, - bool decode)) +int vga_client_register(struct pci_dev *pdev, + unsigned int (*set_decode)(struct pci_dev *pdev, bool decode)) { int ret = -ENODEV; struct vga_device *vgadev; @@ -891,9 +872,7 @@ int vga_client_register(struct pci_dev *pdev, void *cookie, if (!vgadev) goto bail; - vgadev->irq_set_state = irq_set_state; - vgadev->set_vga_decode = set_vga_decode; - vgadev->cookie = cookie; + vgadev->set_decode = set_decode; ret = 0; bail: @@ -1403,9 +1382,9 @@ static void vga_arbiter_notify_clients(void) new_state = false; else new_state = true; - if (vgadev->set_vga_decode) { - new_decodes = vgadev->set_vga_decode(vgadev->cookie, - new_state); + if (vgadev->set_decode) { + new_decodes = vgadev->set_decode(vgadev->pdev, + new_state); vga_update_device_decodes(vgadev, new_decodes); } } diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c index 318864d52837..cf27df8048db 100644 --- a/drivers/vfio/pci/vfio_pci.c +++ b/drivers/vfio/pci/vfio_pci.c @@ -119,10 +119,9 @@ static bool vfio_pci_is_denylisted(struct pci_dev *pdev) * has no way to get to it and routing can be disabled externally at the * bridge. */ -static unsigned int vfio_pci_set_vga_decode(void *opaque, bool single_vga) +static unsigned int vfio_pci_set_decode(struct pci_dev *pdev, bool single_vga) { - struct vfio_pci_device *vdev = opaque; - struct pci_dev *tmp = NULL, *pdev = vdev->pdev; + struct pci_dev *tmp = NULL; unsigned char max_busnr; unsigned int decodes; @@ -1954,10 +1953,10 @@ static int vfio_pci_vga_init(struct vfio_pci_device *vdev) if (!vfio_pci_is_vga(pdev)) return 0; - ret = vga_client_register(pdev, vdev, NULL, vfio_pci_set_vga_decode); + ret = vga_client_register(pdev, vfio_pci_set_decode); if (ret) return ret; - vga_set_legacy_decoding(pdev, vfio_pci_set_vga_decode(vdev, false)); + vga_set_legacy_decoding(pdev, vfio_pci_set_decode(pdev, false)); return 0; } @@ -1967,7 +1966,7 @@ static void vfio_pci_vga_uninit(struct vfio_pci_device *vdev) if (!vfio_pci_is_vga(pdev)) return; - vga_client_register(pdev, NULL, NULL, NULL); + vga_client_unregister(pdev); vga_set_legacy_decoding(pdev, VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM | VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM); diff --git a/drivers/video/fbdev/arcfb.c b/drivers/video/fbdev/arcfb.c index 1447324ed0b6..45e64016db32 100644 --- a/drivers/video/fbdev/arcfb.c +++ b/drivers/video/fbdev/arcfb.c @@ -446,7 +446,7 @@ static ssize_t arcfb_write(struct fb_info *info, const char __user *buf, /* modded from epson 1355 */ unsigned long p; - int err=-EINVAL; + int err; unsigned int fbmemlength,x,y,w,h, bitppos, startpos, endpos, bitcount; struct arcfb_par *par; unsigned int xres; diff --git a/drivers/video/fbdev/asiliantfb.c b/drivers/video/fbdev/asiliantfb.c index 3e006da47752..84c56f525889 100644 --- a/drivers/video/fbdev/asiliantfb.c +++ b/drivers/video/fbdev/asiliantfb.c @@ -227,6 +227,9 @@ static int asiliantfb_check_var(struct fb_var_screeninfo *var, { unsigned long Ftarget, ratio, remainder; + if (!var->pixclock) + return -EINVAL; + ratio = 1000000 / var->pixclock; remainder = 1000000 % var->pixclock; Ftarget = 1000000 * ratio + (1000000 * remainder) / var->pixclock; diff --git a/drivers/video/fbdev/core/fbmem.c b/drivers/video/fbdev/core/fbmem.c index 1c855145711b..71fb710f1ce3 100644 --- a/drivers/video/fbdev/core/fbmem.c +++ b/drivers/video/fbdev/core/fbmem.c @@ -67,7 +67,7 @@ static struct fb_info *get_fb_info(unsigned int idx) mutex_lock(®istration_lock); fb_info = registered_fb[idx]; if (fb_info) - atomic_inc(&fb_info->count); + refcount_inc(&fb_info->count); mutex_unlock(®istration_lock); return fb_info; @@ -75,7 +75,7 @@ static struct fb_info *get_fb_info(unsigned int idx) static void put_fb_info(struct fb_info *fb_info) { - if (!atomic_dec_and_test(&fb_info->count)) + if (!refcount_dec_and_test(&fb_info->count)) return; if (fb_info->fbops->fb_destroy) fb_info->fbops->fb_destroy(fb_info); @@ -1592,7 +1592,7 @@ static int do_register_framebuffer(struct fb_info *fb_info) if (!registered_fb[i]) break; fb_info->node = i; - atomic_set(&fb_info->count, 1); + refcount_set(&fb_info->count, 1); mutex_init(&fb_info->lock); mutex_init(&fb_info->mm_lock); diff --git a/drivers/video/fbdev/ep93xx-fb.c b/drivers/video/fbdev/ep93xx-fb.c index ba33b4dce0df..2398b3d48fed 100644 --- a/drivers/video/fbdev/ep93xx-fb.c +++ b/drivers/video/fbdev/ep93xx-fb.c @@ -548,7 +548,7 @@ static int ep93xxfb_probe(struct platform_device *pdev) } ep93xxfb_set_par(info); - clk_enable(fbi->clk); + clk_prepare_enable(fbi->clk); err = register_framebuffer(info); if (err) @@ -577,7 +577,7 @@ static int ep93xxfb_remove(struct platform_device *pdev) struct ep93xx_fbi *fbi = info->par; unregister_framebuffer(info); - clk_disable(fbi->clk); + clk_disable_unprepare(fbi->clk); ep93xxfb_dealloc_videomem(info); fb_dealloc_cmap(&info->cmap); diff --git a/drivers/video/fbdev/kyro/fbdev.c b/drivers/video/fbdev/kyro/fbdev.c index 8fbde92ae8b9..25801e8e3f74 100644 --- a/drivers/video/fbdev/kyro/fbdev.c +++ b/drivers/video/fbdev/kyro/fbdev.c @@ -372,6 +372,11 @@ static int kyro_dev_overlay_viewport_set(u32 x, u32 y, u32 ulWidth, u32 ulHeight /* probably haven't called CreateOverlay yet */ return -EINVAL; + if (ulWidth == 0 || ulWidth == 0xffffffff || + ulHeight == 0 || ulHeight == 0xffffffff || + (x < 2 && ulWidth + 2 == 0)) + return -EINVAL; + /* Stop Ramdac Output */ DisableRamdacOutput(deviceInfo.pSTGReg); @@ -394,6 +399,9 @@ static int kyrofb_check_var(struct fb_var_screeninfo *var, struct fb_info *info) { struct kyrofb_info *par = info->par; + if (!var->pixclock) + return -EINVAL; + if (var->bits_per_pixel != 16 && var->bits_per_pixel != 32) { printk(KERN_WARNING "kyrofb: depth not supported: %u\n", var->bits_per_pixel); return -EINVAL; diff --git a/drivers/video/fbdev/neofb.c b/drivers/video/fbdev/neofb.c index c0f4f402da3f..966df2a07360 100644 --- a/drivers/video/fbdev/neofb.c +++ b/drivers/video/fbdev/neofb.c @@ -585,7 +585,7 @@ neofb_check_var(struct fb_var_screeninfo *var, struct fb_info *info) DBG("neofb_check_var"); - if (PICOS2KHZ(var->pixclock) > par->maxClock) + if (var->pixclock && PICOS2KHZ(var->pixclock) > par->maxClock) return -EINVAL; /* Is the mode larger than the LCD panel? */ diff --git a/drivers/video/fbdev/omap2/omapfb/dss/apply.c b/drivers/video/fbdev/omap2/omapfb/dss/apply.c index c71021091828..acca991c7540 100644 --- a/drivers/video/fbdev/omap2/omapfb/dss/apply.c +++ b/drivers/video/fbdev/omap2/omapfb/dss/apply.c @@ -108,7 +108,7 @@ static struct { } dss_data; /* protects dss_data */ -static spinlock_t data_lock; +static DEFINE_SPINLOCK(data_lock); /* lock for blocking functions */ static DEFINE_MUTEX(apply_lock); static DECLARE_COMPLETION(extra_updated_completion); @@ -131,8 +131,6 @@ static void apply_init_priv(void) struct mgr_priv_data *mp; int i; - spin_lock_init(&data_lock); - for (i = 0; i < num_ovls; ++i) { struct ovl_priv_data *op; diff --git a/drivers/video/fbdev/riva/fbdev.c b/drivers/video/fbdev/riva/fbdev.c index 55554b0433cb..84d5e23ad7d3 100644 --- a/drivers/video/fbdev/riva/fbdev.c +++ b/drivers/video/fbdev/riva/fbdev.c @@ -1084,6 +1084,9 @@ static int rivafb_check_var(struct fb_var_screeninfo *var, struct fb_info *info) int mode_valid = 0; NVTRACE_ENTER(); + if (!var->pixclock) + return -EINVAL; + switch (var->bits_per_pixel) { case 1 ... 8: var->red.offset = var->green.offset = var->blue.offset = 0; diff --git a/drivers/video/fbdev/ssd1307fb.c b/drivers/video/fbdev/ssd1307fb.c index eda448b7a0c9..1e2f71c2f8a8 100644 --- a/drivers/video/fbdev/ssd1307fb.c +++ b/drivers/video/fbdev/ssd1307fb.c @@ -82,6 +82,11 @@ struct ssd1307fb_par { struct regulator *vbat_reg; u32 vcomh; u32 width; + /* Cached address ranges */ + u8 col_start; + u8 col_end; + u8 page_start; + u8 page_end; }; struct ssd1307fb_array { @@ -152,17 +157,72 @@ static inline int ssd1307fb_write_cmd(struct i2c_client *client, u8 cmd) return ret; } -static void ssd1307fb_update_display(struct ssd1307fb_par *par) +static int ssd1307fb_set_col_range(struct ssd1307fb_par *par, u8 col_start, + u8 cols) +{ + u8 col_end = col_start + cols - 1; + int ret; + + if (col_start == par->col_start && col_end == par->col_end) + return 0; + + ret = ssd1307fb_write_cmd(par->client, SSD1307FB_SET_COL_RANGE); + if (ret < 0) + return ret; + + ret = ssd1307fb_write_cmd(par->client, col_start); + if (ret < 0) + return ret; + + ret = ssd1307fb_write_cmd(par->client, col_end); + if (ret < 0) + return ret; + + par->col_start = col_start; + par->col_end = col_end; + return 0; +} + +static int ssd1307fb_set_page_range(struct ssd1307fb_par *par, u8 page_start, + u8 pages) +{ + u8 page_end = page_start + pages - 1; + int ret; + + if (page_start == par->page_start && page_end == par->page_end) + return 0; + + ret = ssd1307fb_write_cmd(par->client, SSD1307FB_SET_PAGE_RANGE); + if (ret < 0) + return ret; + + ret = ssd1307fb_write_cmd(par->client, page_start); + if (ret < 0) + return ret; + + ret = ssd1307fb_write_cmd(par->client, page_end); + if (ret < 0) + return ret; + + par->page_start = page_start; + par->page_end = page_end; + return 0; +} + +static int ssd1307fb_update_rect(struct ssd1307fb_par *par, unsigned int x, + unsigned int y, unsigned int width, + unsigned int height) { struct ssd1307fb_array *array; u8 *vmem = par->info->screen_buffer; unsigned int line_length = par->info->fix.line_length; - unsigned int pages = DIV_ROUND_UP(par->height, 8); - int i, j, k; + unsigned int pages = DIV_ROUND_UP(y % 8 + height, 8); + u32 array_idx = 0; + int ret, i, j, k; - array = ssd1307fb_alloc_array(par->width * pages, SSD1307FB_DATA); + array = ssd1307fb_alloc_array(width * pages, SSD1307FB_DATA); if (!array) - return; + return -ENOMEM; /* * The screen is divided in pages, each having a height of 8 @@ -193,27 +253,44 @@ static void ssd1307fb_update_display(struct ssd1307fb_par *par) * (5) A4 B4 C4 D4 E4 F4 G4 H4 */ - for (i = 0; i < pages; i++) { - for (j = 0; j < par->width; j++) { - int m = 8; - u32 array_idx = i * par->width + j; - array->data[array_idx] = 0; - /* Last page may be partial */ - if (i + 1 == pages && par->height % 8) - m = par->height % 8; + ret = ssd1307fb_set_col_range(par, par->col_offset + x, width); + if (ret < 0) + goto out_free; + + ret = ssd1307fb_set_page_range(par, par->page_offset + y / 8, pages); + if (ret < 0) + goto out_free; + + for (i = y / 8; i < y / 8 + pages; i++) { + int m = 8; + + /* Last page may be partial */ + if (8 * (i + 1) > par->height) + m = par->height % 8; + for (j = x; j < x + width; j++) { + u8 data = 0; + for (k = 0; k < m; k++) { u8 byte = vmem[(8 * i + k) * line_length + j / 8]; u8 bit = (byte >> (j % 8)) & 1; - array->data[array_idx] |= bit << k; + data |= bit << k; } + array->data[array_idx++] = data; } } - ssd1307fb_write_array(par->client, array, par->width * pages); + ret = ssd1307fb_write_array(par->client, array, width * pages); + +out_free: kfree(array); + return ret; } +static int ssd1307fb_update_display(struct ssd1307fb_par *par) +{ + return ssd1307fb_update_rect(par, 0, 0, par->width, par->height); +} static ssize_t ssd1307fb_write(struct fb_info *info, const char __user *buf, size_t count, loff_t *ppos) @@ -222,6 +299,7 @@ static ssize_t ssd1307fb_write(struct fb_info *info, const char __user *buf, unsigned long total_size; unsigned long p = *ppos; void *dst; + int ret; total_size = info->fix.smem_len; @@ -239,7 +317,9 @@ static ssize_t ssd1307fb_write(struct fb_info *info, const char __user *buf, if (copy_from_user(dst, buf, count)) return -EFAULT; - ssd1307fb_update_display(par); + ret = ssd1307fb_update_display(par); + if (ret < 0) + return ret; *ppos += count; @@ -260,21 +340,24 @@ static void ssd1307fb_fillrect(struct fb_info *info, const struct fb_fillrect *r { struct ssd1307fb_par *par = info->par; sys_fillrect(info, rect); - ssd1307fb_update_display(par); + ssd1307fb_update_rect(par, rect->dx, rect->dy, rect->width, + rect->height); } static void ssd1307fb_copyarea(struct fb_info *info, const struct fb_copyarea *area) { struct ssd1307fb_par *par = info->par; sys_copyarea(info, area); - ssd1307fb_update_display(par); + ssd1307fb_update_rect(par, area->dx, area->dy, area->width, + area->height); } static void ssd1307fb_imageblit(struct fb_info *info, const struct fb_image *image) { struct ssd1307fb_par *par = info->par; sys_imageblit(info, image); - ssd1307fb_update_display(par); + ssd1307fb_update_rect(par, image->dx, image->dy, image->width, + image->height); } static const struct fb_ops ssd1307fb_ops = { @@ -454,37 +537,11 @@ static int ssd1307fb_init(struct ssd1307fb_par *par) if (ret < 0) return ret; - /* Set column range */ - ret = ssd1307fb_write_cmd(par->client, SSD1307FB_SET_COL_RANGE); - if (ret < 0) - return ret; - - ret = ssd1307fb_write_cmd(par->client, par->col_offset); - if (ret < 0) - return ret; - - ret = ssd1307fb_write_cmd(par->client, par->col_offset + par->width - 1); - if (ret < 0) - return ret; - - /* Set page range */ - ret = ssd1307fb_write_cmd(par->client, SSD1307FB_SET_PAGE_RANGE); - if (ret < 0) - return ret; - - ret = ssd1307fb_write_cmd(par->client, par->page_offset); - if (ret < 0) - return ret; - - ret = ssd1307fb_write_cmd(par->client, - par->page_offset + - DIV_ROUND_UP(par->height, 8) - 1); + /* Clear the screen */ + ret = ssd1307fb_update_display(par); if (ret < 0) return ret; - /* Clear the screen */ - ssd1307fb_update_display(par); - /* Turn on the display */ ret = ssd1307fb_write_cmd(par->client, SSD1307FB_DISPLAY_ON); if (ret < 0) diff --git a/include/drm/amd_asic_type.h b/include/drm/amd_asic_type.h index 0bf0ad869eb9..0f66a0d9f06d 100644 --- a/include/drm/amd_asic_type.h +++ b/include/drm/amd_asic_type.h @@ -53,14 +53,15 @@ enum amd_asic_type { CHIP_RENOIR, /* 24 */ CHIP_ALDEBARAN, /* 25 */ CHIP_NAVI10, /* 26 */ - CHIP_NAVI14, /* 27 */ - CHIP_NAVI12, /* 28 */ - CHIP_SIENNA_CICHLID, /* 29 */ - CHIP_NAVY_FLOUNDER, /* 30 */ - CHIP_VANGOGH, /* 31 */ - CHIP_DIMGREY_CAVEFISH, /* 32 */ - CHIP_BEIGE_GOBY, /* 33 */ - CHIP_YELLOW_CARP, /* 34 */ + CHIP_CYAN_SKILLFISH, /* 27 */ + CHIP_NAVI14, /* 28 */ + CHIP_NAVI12, /* 29 */ + CHIP_SIENNA_CICHLID, /* 30 */ + CHIP_NAVY_FLOUNDER, /* 31 */ + CHIP_VANGOGH, /* 32 */ + CHIP_DIMGREY_CAVEFISH, /* 33 */ + CHIP_BEIGE_GOBY, /* 34 */ + CHIP_YELLOW_CARP, /* 35 */ CHIP_LAST, }; diff --git a/include/drm/bridge/dw_hdmi.h b/include/drm/bridge/dw_hdmi.h index 6a5716655619..2a1f85f9a8a3 100644 --- a/include/drm/bridge/dw_hdmi.h +++ b/include/drm/bridge/dw_hdmi.h @@ -126,6 +126,8 @@ struct dw_hdmi_phy_ops { struct dw_hdmi_plat_data { struct regmap *regm; + unsigned int output_port; + unsigned long input_bus_encoding; bool use_drm_infoframe; bool ycbcr_420_allowed; diff --git a/include/drm/drm_aperture.h b/include/drm/drm_aperture.h index 6c148078780c..7096703c3949 100644 --- a/include/drm/drm_aperture.h +++ b/include/drm/drm_aperture.h @@ -6,20 +6,22 @@ #include <linux/types.h> struct drm_device; +struct drm_driver; struct pci_dev; int devm_aperture_acquire_from_firmware(struct drm_device *dev, resource_size_t base, resource_size_t size); int drm_aperture_remove_conflicting_framebuffers(resource_size_t base, resource_size_t size, - bool primary, const char *name); + bool primary, const struct drm_driver *req_driver); -int drm_aperture_remove_conflicting_pci_framebuffers(struct pci_dev *pdev, const char *name); +int drm_aperture_remove_conflicting_pci_framebuffers(struct pci_dev *pdev, + const struct drm_driver *req_driver); /** * drm_aperture_remove_framebuffers - remove all existing framebuffers * @primary: also kick vga16fb if present - * @name: requesting driver name + * @req_driver: requesting DRM driver * * This function removes all graphics device drivers. Use this function on systems * that can have their framebuffer located anywhere in memory. @@ -27,9 +29,11 @@ int drm_aperture_remove_conflicting_pci_framebuffers(struct pci_dev *pdev, const * Returns: * 0 on success, or a negative errno code otherwise */ -static inline int drm_aperture_remove_framebuffers(bool primary, const char *name) +static inline int +drm_aperture_remove_framebuffers(bool primary, const struct drm_driver *req_driver) { - return drm_aperture_remove_conflicting_framebuffers(0, (resource_size_t)-1, primary, name); + return drm_aperture_remove_conflicting_framebuffers(0, (resource_size_t)-1, primary, + req_driver); } #endif diff --git a/include/drm/drm_auth.h b/include/drm/drm_auth.h index 6bf8b2b78991..ba248ca8866f 100644 --- a/include/drm/drm_auth.h +++ b/include/drm/drm_auth.h @@ -58,12 +58,6 @@ struct drm_lock_data { * @refcount: Refcount for this master object. * @dev: Link back to the DRM device * @driver_priv: Pointer to driver-private information. - * @lessor: Lease holder - * @lessee_id: id for lessees. Owners always have id 0 - * @lessee_list: other lessees of the same master - * @lessees: drm_masters leasing from this one - * @leases: Objects leased to this drm_master. - * @lessee_idr: All lessees under this owner (only used where lessor == NULL) * * Note that master structures are only relevant for the legacy/primary device * nodes, hence there can only be one per device, not one per drm_minor. @@ -88,17 +82,68 @@ struct drm_master { struct idr magic_map; void *driver_priv; - /* Tree of display resource leases, each of which is a drm_master struct - * All of these get activated simultaneously, so drm_device master points - * at the top of the tree (for which lessor is NULL). Protected by - * &drm_device.mode_config.idr_mutex. + /** + * @lessor: + * + * Lease grantor, only set if this &struct drm_master represents a + * lessee holding a lease of objects from @lessor. Full owners of the + * device have this set to NULL. + * + * The lessor does not change once it's set in drm_lease_create(), and + * each lessee holds a reference to its lessor that it releases upon + * being destroyed in drm_lease_destroy(). + * + * See also the :ref:`section on display resource leasing + * <drm_leasing>`. */ - struct drm_master *lessor; + + /** + * @lessee_id: + * + * ID for lessees. Owners (i.e. @lessor is NULL) always have ID 0. + * Protected by &drm_device.mode_config's &drm_mode_config.idr_mutex. + */ int lessee_id; + + /** + * @lessee_list: + * + * List entry of lessees of @lessor, where they are linked to @lessees. + * Not used for owners. Protected by &drm_device.mode_config's + * &drm_mode_config.idr_mutex. + */ struct list_head lessee_list; + + /** + * @lessees: + * + * List of drm_masters leasing from this one. Protected by + * &drm_device.mode_config's &drm_mode_config.idr_mutex. + * + * This list is empty if no leases have been granted, or if all lessees + * have been destroyed. Since lessors are referenced by all their + * lessees, this master cannot be destroyed unless the list is empty. + */ struct list_head lessees; + + /** + * @leases: + * + * Objects leased to this drm_master. Protected by + * &drm_device.mode_config's &drm_mode_config.idr_mutex. + * + * Objects are leased all together in drm_lease_create(), and are + * removed all together when the lease is revoked. + */ struct idr leases; + + /** + * @lessee_idr: + * + * All lessees under this owner (only used where @lessor is NULL). + * Protected by &drm_device.mode_config's &drm_mode_config.idr_mutex. + */ struct idr lessee_idr; /* private: */ #if IS_ENABLED(CONFIG_DRM_LEGACY) @@ -107,6 +152,7 @@ struct drm_master { }; struct drm_master *drm_master_get(struct drm_master *master); +struct drm_master *drm_file_get_master(struct drm_file *file_priv); void drm_master_put(struct drm_master **master); bool drm_is_current_master(struct drm_file *fpriv); diff --git a/include/drm/drm_bridge.h b/include/drm/drm_bridge.h index 2195daa289d2..46bdfa48c413 100644 --- a/include/drm/drm_bridge.h +++ b/include/drm/drm_bridge.h @@ -171,6 +171,11 @@ struct drm_bridge_funcs { * signals) feeding it is still running when this callback is called. * * The @disable callback is optional. + * + * NOTE: + * + * This is deprecated, do not use! + * New drivers shall use &drm_bridge_funcs.atomic_disable. */ void (*disable)(struct drm_bridge *bridge); @@ -190,6 +195,11 @@ struct drm_bridge_funcs { * called. * * The @post_disable callback is optional. + * + * NOTE: + * + * This is deprecated, do not use! + * New drivers shall use &drm_bridge_funcs.atomic_post_disable. */ void (*post_disable)(struct drm_bridge *bridge); @@ -215,9 +225,9 @@ struct drm_bridge_funcs { * * NOTE: * - * If a need arises to store and access modes adjusted for other - * locations than the connection between the CRTC and the first bridge, - * the DRM framework will have to be extended with DRM bridge states. + * This is deprecated, do not use! + * New drivers shall set their mode in the + * &drm_bridge_funcs.atomic_enable operation. */ void (*mode_set)(struct drm_bridge *bridge, const struct drm_display_mode *mode, @@ -239,6 +249,11 @@ struct drm_bridge_funcs { * there is one) when this callback is called. * * The @pre_enable callback is optional. + * + * NOTE: + * + * This is deprecated, do not use! + * New drivers shall use &drm_bridge_funcs.atomic_pre_enable. */ void (*pre_enable)(struct drm_bridge *bridge); @@ -259,6 +274,11 @@ struct drm_bridge_funcs { * chain if there is one. * * The @enable callback is optional. + * + * NOTE: + * + * This is deprecated, do not use! + * New drivers shall use &drm_bridge_funcs.atomic_enable. */ void (*enable)(struct drm_bridge *bridge); diff --git a/include/drm/drm_connector.h b/include/drm/drm_connector.h index 714d1a01c065..1647960c9e50 100644 --- a/include/drm/drm_connector.h +++ b/include/drm/drm_connector.h @@ -848,6 +848,11 @@ struct drm_connector_funcs { * locks to avoid races with concurrent modeset changes need to use * &drm_connector_helper_funcs.detect_ctx instead. * + * Also note that this callback can be called no matter the + * state the connector is in. Drivers that need the underlying + * device to be powered to perform the detection will first need + * to make sure it's been properly enabled. + * * RETURNS: * * drm_connector_status indicating the connector's status. @@ -1735,6 +1740,11 @@ void drm_mode_put_tile_group(struct drm_device *dev, * drm_connector_list_iter_begin(), drm_connector_list_iter_end() and * drm_connector_list_iter_next() respectively the convenience macro * drm_for_each_connector_iter(). + * + * Note that the return value of drm_connector_list_iter_next() is only valid + * up to the next drm_connector_list_iter_next() or + * drm_connector_list_iter_end() call. If you want to use the connector later, + * then you need to grab your own reference first using drm_connector_get(). */ struct drm_connector_list_iter { /* private: */ diff --git a/include/drm/drm_damage_helper.h b/include/drm/drm_damage_helper.h index 40c34a5bf149..effda42cce31 100644 --- a/include/drm/drm_damage_helper.h +++ b/include/drm/drm_damage_helper.h @@ -64,7 +64,6 @@ struct drm_atomic_helper_damage_iter { bool full_update; }; -void drm_plane_enable_fb_damage_clips(struct drm_plane *plane); void drm_atomic_helper_check_plane_damage(struct drm_atomic_state *state, struct drm_plane_state *plane_state); int drm_atomic_helper_dirtyfb(struct drm_framebuffer *fb, @@ -82,21 +81,4 @@ bool drm_atomic_helper_damage_merged(const struct drm_plane_state *old_state, struct drm_plane_state *state, struct drm_rect *rect); -/** - * drm_helper_get_plane_damage_clips - Returns damage clips in &drm_rect. - * @state: Plane state. - * - * Returns plane damage rectangles in internal &drm_rect. Currently &drm_rect - * can be obtained by simply typecasting &drm_mode_rect. This is because both - * are signed 32 and during drm_atomic_check_only() it is verified that damage - * clips are inside fb. - * - * Return: Clips in plane fb_damage_clips blob property. - */ -static inline struct drm_rect * -drm_helper_get_plane_damage_clips(const struct drm_plane_state *state) -{ - return (struct drm_rect *)drm_plane_get_damage_clips(state); -} - #endif diff --git a/include/drm/drm_device.h b/include/drm/drm_device.h index f588f967bb14..604b1d1b2d72 100644 --- a/include/drm/drm_device.h +++ b/include/drm/drm_device.h @@ -192,20 +192,6 @@ struct drm_device { struct list_head clientlist; /** - * @irq_enabled: - * - * Indicates that interrupt handling is enabled, specifically vblank - * handling. Drivers which don't use drm_irq_install() need to set this - * to true manually. - */ - bool irq_enabled; - - /** - * @irq: Used by the drm_irq_install() and drm_irq_unistall() helpers. - */ - int irq; - - /** * @vblank_disable_immediate: * * If true, vblank interrupt will be disabled immediately when the @@ -372,6 +358,10 @@ struct drm_device { /* Scatter gather memory */ struct drm_sg_mem *sg; + + /* IRQs */ + bool irq_enabled; + int irq; #endif }; diff --git a/include/drm/drm_dp_aux_bus.h b/include/drm/drm_dp_aux_bus.h new file mode 100644 index 000000000000..4f19b20b1dd6 --- /dev/null +++ b/include/drm/drm_dp_aux_bus.h @@ -0,0 +1,57 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright 2021 Google Inc. + * + * The DP AUX bus is used for devices that are connected over a DisplayPort + * AUX bus. The devices on the far side of the bus are referred to as + * endpoints in this code. + */ + +#ifndef _DP_AUX_BUS_H_ +#define _DP_AUX_BUS_H_ + +#include <linux/device.h> +#include <linux/mod_devicetable.h> + +/** + * struct dp_aux_ep_device - Main dev structure for DP AUX endpoints + * + * This is used to instantiate devices that are connected via a DP AUX + * bus. Usually the device is a panel, but conceivable other devices could + * be hooked up there. + */ +struct dp_aux_ep_device { + /** @dev: The normal dev pointer */ + struct device dev; + /** @aux: Pointer to the aux bus */ + struct drm_dp_aux *aux; +}; + +struct dp_aux_ep_driver { + int (*probe)(struct dp_aux_ep_device *aux_ep); + void (*remove)(struct dp_aux_ep_device *aux_ep); + void (*shutdown)(struct dp_aux_ep_device *aux_ep); + struct device_driver driver; +}; + +static inline struct dp_aux_ep_device *to_dp_aux_ep_dev(struct device *dev) +{ + return container_of(dev, struct dp_aux_ep_device, dev); +} + +static inline struct dp_aux_ep_driver *to_dp_aux_ep_drv(struct device_driver *drv) +{ + return container_of(drv, struct dp_aux_ep_driver, driver); +} + +int of_dp_aux_populate_ep_devices(struct drm_dp_aux *aux); +void of_dp_aux_depopulate_ep_devices(struct drm_dp_aux *aux); +int devm_of_dp_aux_populate_ep_devices(struct drm_dp_aux *aux); + +#define dp_aux_dp_driver_register(aux_ep_drv) \ + __dp_aux_dp_driver_register(aux_ep_drv, THIS_MODULE) +int __dp_aux_dp_driver_register(struct dp_aux_ep_driver *aux_ep_drv, + struct module *owner); +void dp_aux_dp_driver_unregister(struct dp_aux_ep_driver *aux_ep_drv); + +#endif /* _DP_AUX_BUS_H_ */ diff --git a/include/drm/drm_dp_helper.h b/include/drm/drm_dp_helper.h index 3f2715eb965f..1d5b3dbb6e56 100644 --- a/include/drm/drm_dp_helper.h +++ b/include/drm/drm_dp_helper.h @@ -30,6 +30,7 @@ struct drm_device; struct drm_dp_aux; +struct drm_panel; /* * Unless otherwise noted, all values are from the DP 1.1a spec. Note that @@ -1818,6 +1819,24 @@ drm_dp_sink_can_do_video_without_timing_msa(const u8 dpcd[DP_RECEIVER_CAP_SIZE]) DP_MSA_TIMING_PAR_IGNORED; } +/** + * drm_edp_backlight_supported() - Check an eDP DPCD for VESA backlight support + * @edp_dpcd: The DPCD to check + * + * Note that currently this function will return %false for panels which support various DPCD + * backlight features but which require the brightness be set through PWM, and don't support setting + * the brightness level via the DPCD. This is a TODO. + * + * Returns: %True if @edp_dpcd indicates that VESA backlight controls are supported, %false + * otherwise + */ +static inline bool +drm_edp_backlight_supported(const u8 edp_dpcd[EDP_DISPLAY_CTL_CAP_SIZE]) +{ + return (edp_dpcd[1] & DP_EDP_TCON_BACKLIGHT_ADJUSTMENT_CAP) && + (edp_dpcd[2] & DP_EDP_BACKLIGHT_BRIGHTNESS_AUX_SET_CAP); +} + /* * DisplayPort AUX channel */ @@ -1858,35 +1877,6 @@ struct drm_dp_aux_cec { /** * struct drm_dp_aux - DisplayPort AUX channel - * @name: user-visible name of this AUX channel and the I2C-over-AUX adapter - * @ddc: I2C adapter that can be used for I2C-over-AUX communication - * @dev: pointer to struct device that is the parent for this AUX channel - * @drm_dev: pointer to the &drm_device that owns this AUX channel. Beware, this - * may be %NULL before drm_dp_aux_register() has been called. - * @crtc: backpointer to the crtc that is currently using this AUX channel - * @hw_mutex: internal mutex used for locking transfers - * @crc_work: worker that captures CRCs for each frame - * @crc_count: counter of captured frame CRCs - * @transfer: transfers a message representing a single AUX transaction - * - * The @dev field should be set to a pointer to the device that implements the - * AUX channel. As well, the @drm_dev field should be set to the &drm_device - * that will be using this AUX channel as early as possible. For many graphics - * drivers this should happen before drm_dp_aux_init(), however it's perfectly - * fine to set this field later so long as it's assigned before calling - * drm_dp_aux_register(). - * - * The @name field may be used to specify the name of the I2C adapter. If set to - * %NULL, dev_name() of @dev will be used. - * - * Drivers provide a hardware-specific implementation of how transactions are - * executed via the @transfer() function. A pointer to a &drm_dp_aux_msg - * structure describing the transaction is passed into this function. Upon - * success, the implementation should return the number of payload bytes that - * were transferred, or a negative error-code on failure. Helpers propagate - * errors from the @transfer() function, with the exception of the %-EBUSY - * error, which causes a transaction to be retried. On a short, helpers will - * return %-EPROTO to make it simpler to check for failure. * * An AUX channel can also be used to transport I2C messages to a sink. A * typical application of that is to access an EDID that's present in the sink @@ -1897,22 +1887,96 @@ struct drm_dp_aux_cec { * transfers by default; if a partial response is received, the adapter will * drop down to the size given by the partial response for this transaction * only. - * - * Note that the aux helper code assumes that the @transfer() function only - * modifies the reply field of the &drm_dp_aux_msg structure. The retry logic - * and i2c helpers assume this is the case. */ struct drm_dp_aux { + /** + * @name: user-visible name of this AUX channel and the + * I2C-over-AUX adapter. + * + * It's also used to specify the name of the I2C adapter. If set + * to %NULL, dev_name() of @dev will be used. + */ const char *name; + + /** + * @ddc: I2C adapter that can be used for I2C-over-AUX + * communication + */ struct i2c_adapter ddc; + + /** + * @dev: pointer to struct device that is the parent for this + * AUX channel. + */ struct device *dev; + + /** + * @drm_dev: pointer to the &drm_device that owns this AUX channel. + * Beware, this may be %NULL before drm_dp_aux_register() has been + * called. + * + * It should be set to the &drm_device that will be using this AUX + * channel as early as possible. For many graphics drivers this should + * happen before drm_dp_aux_init(), however it's perfectly fine to set + * this field later so long as it's assigned before calling + * drm_dp_aux_register(). + */ struct drm_device *drm_dev; + + /** + * @crtc: backpointer to the crtc that is currently using this + * AUX channel + */ struct drm_crtc *crtc; + + /** + * @hw_mutex: internal mutex used for locking transfers. + * + * Note that if the underlying hardware is shared among multiple + * channels, the driver needs to do additional locking to + * prevent concurrent access. + */ struct mutex hw_mutex; + + /** + * @crc_work: worker that captures CRCs for each frame + */ struct work_struct crc_work; + + /** + * @crc_count: counter of captured frame CRCs + */ u8 crc_count; + + /** + * @transfer: transfers a message representing a single AUX + * transaction. + * + * This is a hardware-specific implementation of how + * transactions are executed that the drivers must provide. + * + * A pointer to a &drm_dp_aux_msg structure describing the + * transaction is passed into this function. Upon success, the + * implementation should return the number of payload bytes that + * were transferred, or a negative error-code on failure. + * + * Helpers will propagate these errors, with the exception of + * the %-EBUSY error, which causes a transaction to be retried. + * On a short, helpers will return %-EPROTO to make it simpler + * to check for failure. + * + * The @transfer() function must only modify the reply field of + * the &drm_dp_aux_msg structure. The retry logic and i2c + * helpers assume this is the case. + * + * Also note that this callback can be called no matter the + * state @dev is in. Drivers that need that device to be powered + * to perform this operation will first need to make sure it's + * been properly enabled. + */ ssize_t (*transfer)(struct drm_dp_aux *aux, struct drm_dp_aux_msg *msg); + /** * @i2c_nack_count: Counts I2C NACKs, used for DP validation. */ @@ -2124,6 +2188,51 @@ drm_dp_has_quirk(const struct drm_dp_desc *desc, enum drm_dp_quirk quirk) return desc->quirks & BIT(quirk); } +/** + * struct drm_edp_backlight_info - Probed eDP backlight info struct + * @pwmgen_bit_count: The pwmgen bit count + * @pwm_freq_pre_divider: The PWM frequency pre-divider value being used for this backlight, if any + * @max: The maximum backlight level that may be set + * @lsb_reg_used: Do we also write values to the DP_EDP_BACKLIGHT_BRIGHTNESS_LSB register? + * @aux_enable: Does the panel support the AUX enable cap? + * + * This structure contains various data about an eDP backlight, which can be populated by using + * drm_edp_backlight_init(). + */ +struct drm_edp_backlight_info { + u8 pwmgen_bit_count; + u8 pwm_freq_pre_divider; + u16 max; + + bool lsb_reg_used : 1; + bool aux_enable : 1; +}; + +int +drm_edp_backlight_init(struct drm_dp_aux *aux, struct drm_edp_backlight_info *bl, + u16 driver_pwm_freq_hz, const u8 edp_dpcd[EDP_DISPLAY_CTL_CAP_SIZE], + u16 *current_level, u8 *current_mode); +int drm_edp_backlight_set_level(struct drm_dp_aux *aux, const struct drm_edp_backlight_info *bl, + u16 level); +int drm_edp_backlight_enable(struct drm_dp_aux *aux, const struct drm_edp_backlight_info *bl, + u16 level); +int drm_edp_backlight_disable(struct drm_dp_aux *aux, const struct drm_edp_backlight_info *bl); + +#if IS_ENABLED(CONFIG_DRM_KMS_HELPER) && (IS_BUILTIN(CONFIG_BACKLIGHT_CLASS_DEVICE) || \ + (IS_MODULE(CONFIG_DRM_KMS_HELPER) && IS_MODULE(CONFIG_BACKLIGHT_CLASS_DEVICE))) + +int drm_panel_dp_aux_backlight(struct drm_panel *panel, struct drm_dp_aux *aux); + +#else + +static inline int drm_panel_dp_aux_backlight(struct drm_panel *panel, + struct drm_dp_aux *aux) +{ + return 0; +} + +#endif + #ifdef CONFIG_DRM_DP_CEC void drm_dp_cec_irq(struct drm_dp_aux *aux); void drm_dp_cec_register_connector(struct drm_dp_aux *aux, diff --git a/include/drm/drm_drv.h b/include/drm/drm_drv.h index b439ae1921b8..0cd95953cdf5 100644 --- a/include/drm/drm_drv.h +++ b/include/drm/drm_drv.h @@ -137,10 +137,6 @@ enum drm_driver_feature { * @DRIVER_HAVE_IRQ: * * Legacy irq support. Only for legacy drivers. Do not use. - * - * New drivers can either use the drm_irq_install() and - * drm_irq_uninstall() helper functions, or roll their own irq support - * code by calling request_irq() directly. */ DRIVER_HAVE_IRQ = BIT(30), /** @@ -272,42 +268,6 @@ struct drm_driver { void (*release) (struct drm_device *); /** - * @irq_handler: - * - * Interrupt handler called when using drm_irq_install(). Not used by - * drivers which implement their own interrupt handling. - */ - irqreturn_t(*irq_handler) (int irq, void *arg); - - /** - * @irq_preinstall: - * - * Optional callback used by drm_irq_install() which is called before - * the interrupt handler is registered. This should be used to clear out - * any pending interrupts (from e.g. firmware based drives) and reset - * the interrupt handling registers. - */ - void (*irq_preinstall) (struct drm_device *dev); - - /** - * @irq_postinstall: - * - * Optional callback used by drm_irq_install() which is called after - * the interrupt handler is registered. This should be used to enable - * interrupt generation in the hardware. - */ - int (*irq_postinstall) (struct drm_device *dev); - - /** - * @irq_uninstall: - * - * Optional callback used by drm_irq_uninstall() which is called before - * the interrupt handler is unregistered. This should be used to disable - * interrupt generation in the hardware. - */ - void (*irq_uninstall) (struct drm_device *dev); - - /** * @master_set: * * Called whenever the minor master is set. Only used by vmwgfx. @@ -504,6 +464,10 @@ struct drm_driver { int (*dma_ioctl) (struct drm_device *dev, void *data, struct drm_file *file_priv); int (*dma_quiescent) (struct drm_device *); int (*context_dtor) (struct drm_device *dev, int context); + irqreturn_t (*irq_handler)(int irq, void *arg); + void (*irq_preinstall)(struct drm_device *dev); + int (*irq_postinstall)(struct drm_device *dev); + void (*irq_uninstall)(struct drm_device *dev); u32 (*get_vblank_counter)(struct drm_device *dev, unsigned int pipe); int (*enable_vblank)(struct drm_device *dev, unsigned int pipe); void (*disable_vblank)(struct drm_device *dev, unsigned int pipe); diff --git a/include/drm/drm_edid.h b/include/drm/drm_edid.h index 759328a5eeb2..deccfd39e6db 100644 --- a/include/drm/drm_edid.h +++ b/include/drm/drm_edid.h @@ -336,7 +336,7 @@ struct edid { u8 features; /* Color characteristics */ u8 red_green_lo; - u8 black_white_lo; + u8 blue_white_lo; u8 red_x; u8 red_y; u8 green_x; diff --git a/include/drm/drm_file.h b/include/drm/drm_file.h index b81b3bfb08c8..a3acb7ac3550 100644 --- a/include/drm/drm_file.h +++ b/include/drm/drm_file.h @@ -226,15 +226,31 @@ struct drm_file { /** * @master: * - * Master this node is currently associated with. Only relevant if - * drm_is_primary_client() returns true. Note that this only - * matches &drm_device.master if the master is the currently active one. + * Master this node is currently associated with. Protected by struct + * &drm_device.master_mutex, and serialized by @master_lookup_lock. + * + * Only relevant if drm_is_primary_client() returns true. Note that + * this only matches &drm_device.master if the master is the currently + * active one. + * + * To update @master, both &drm_device.master_mutex and + * @master_lookup_lock need to be held, therefore holding either of + * them is safe and enough for the read side. + * + * When dereferencing this pointer, either hold struct + * &drm_device.master_mutex for the duration of the pointer's use, or + * use drm_file_get_master() if struct &drm_device.master_mutex is not + * currently held and there is no other need to hold it. This prevents + * @master from being freed during use. * * See also @authentication and @is_master and the :ref:`section on * primary nodes and authentication <drm_primary_node>`. */ struct drm_master *master; + /** @master_lock: Serializes @master. */ + spinlock_t master_lookup_lock; + /** @pid: Process that opened this file. */ struct pid *pid; diff --git a/include/drm/drm_fourcc.h b/include/drm/drm_fourcc.h index 3b138d4ae67e..22aa64d07c79 100644 --- a/include/drm/drm_fourcc.h +++ b/include/drm/drm_fourcc.h @@ -25,6 +25,11 @@ #include <linux/types.h> #include <uapi/drm/drm_fourcc.h> +/** + * DRM_FORMAT_MAX_PLANES - maximum number of planes a DRM format can have + */ +#define DRM_FORMAT_MAX_PLANES 4u + /* * DRM formats are little endian. Define host endian variants for the * most common formats here, to reduce the #ifdefs needed in drivers. @@ -78,7 +83,7 @@ struct drm_format_info { * triplet @char_per_block, @block_w, @block_h for better * describing the pixel format. */ - u8 cpp[4]; + u8 cpp[DRM_FORMAT_MAX_PLANES]; /** * @char_per_block: @@ -104,7 +109,7 @@ struct drm_format_info { * information from their drm_mode_config.get_format_info hook * if they want the core to be validating the pitch. */ - u8 char_per_block[4]; + u8 char_per_block[DRM_FORMAT_MAX_PLANES]; }; /** @@ -113,7 +118,7 @@ struct drm_format_info { * Block width in pixels, this is intended to be accessed through * drm_format_info_block_width() */ - u8 block_w[4]; + u8 block_w[DRM_FORMAT_MAX_PLANES]; /** * @block_h: @@ -121,7 +126,7 @@ struct drm_format_info { * Block height in pixels, this is intended to be accessed through * drm_format_info_block_height() */ - u8 block_h[4]; + u8 block_h[DRM_FORMAT_MAX_PLANES]; /** @hsub: Horizontal chroma subsampling factor */ u8 hsub; diff --git a/include/drm/drm_framebuffer.h b/include/drm/drm_framebuffer.h index be658ebbec72..f67c5b7bcb68 100644 --- a/include/drm/drm_framebuffer.h +++ b/include/drm/drm_framebuffer.h @@ -27,12 +27,12 @@ #include <linux/list.h> #include <linux/sched.h> +#include <drm/drm_fourcc.h> #include <drm/drm_mode_object.h> struct drm_clip_rect; struct drm_device; struct drm_file; -struct drm_format_info; struct drm_framebuffer; struct drm_gem_object; @@ -147,7 +147,7 @@ struct drm_framebuffer { * @pitches: Line stride per buffer. For userspace created object this * is copied from drm_mode_fb_cmd2. */ - unsigned int pitches[4]; + unsigned int pitches[DRM_FORMAT_MAX_PLANES]; /** * @offsets: Offset from buffer start to the actual pixel data in bytes, * per buffer. For userspace created object this is copied from @@ -165,7 +165,7 @@ struct drm_framebuffer { * data (even for linear buffers). Specifying an x/y pixel offset is * instead done through the source rectangle in &struct drm_plane_state. */ - unsigned int offsets[4]; + unsigned int offsets[DRM_FORMAT_MAX_PLANES]; /** * @modifier: Data layout modifier. This is used to describe * tiling, or also special layouts (like compression) of auxiliary @@ -210,7 +210,7 @@ struct drm_framebuffer { * This is used by the GEM framebuffer helpers, see e.g. * drm_gem_fb_create(). */ - struct drm_gem_object *obj[4]; + struct drm_gem_object *obj[DRM_FORMAT_MAX_PLANES]; }; #define obj_to_fb(x) container_of(x, struct drm_framebuffer, base) diff --git a/include/drm/drm_gem.h b/include/drm/drm_gem.h index 240049566592..35e7f44c2a75 100644 --- a/include/drm/drm_gem.h +++ b/include/drm/drm_gem.h @@ -384,8 +384,6 @@ drm_gem_object_put(struct drm_gem_object *obj) __drm_gem_object_put(obj); } -void drm_gem_object_put_locked(struct drm_gem_object *obj); - int drm_gem_handle_create(struct drm_file *file_priv, struct drm_gem_object *obj, u32 *handlep); diff --git a/include/drm/drm_gem_atomic_helper.h b/include/drm/drm_gem_atomic_helper.h index cfc5adee3d13..48222a107873 100644 --- a/include/drm/drm_gem_atomic_helper.h +++ b/include/drm/drm_gem_atomic_helper.h @@ -5,6 +5,7 @@ #include <linux/dma-buf-map.h> +#include <drm/drm_fourcc.h> #include <drm/drm_plane.h> struct drm_simple_display_pipe; @@ -40,7 +41,15 @@ struct drm_shadow_plane_state { * The memory mappings stored in map should be established in the plane's * prepare_fb callback and removed in the cleanup_fb callback. */ - struct dma_buf_map map[4]; + struct dma_buf_map map[DRM_FORMAT_MAX_PLANES]; + + /** + * @data: Address of each framebuffer BO's data + * + * The address of the data stored in each mapping. This is different + * for framebuffers with non-zero offset fields. + */ + struct dma_buf_map data[DRM_FORMAT_MAX_PLANES]; }; /** @@ -53,6 +62,12 @@ to_drm_shadow_plane_state(struct drm_plane_state *state) return container_of(state, struct drm_shadow_plane_state, base); } +void __drm_gem_duplicate_shadow_plane_state(struct drm_plane *plane, + struct drm_shadow_plane_state *new_shadow_plane_state); +void __drm_gem_destroy_shadow_plane_state(struct drm_shadow_plane_state *shadow_plane_state); +void __drm_gem_reset_shadow_plane(struct drm_plane *plane, + struct drm_shadow_plane_state *shadow_plane_state); + void drm_gem_reset_shadow_plane(struct drm_plane *plane); struct drm_plane_state *drm_gem_duplicate_shadow_plane_state(struct drm_plane *plane); void drm_gem_destroy_shadow_plane_state(struct drm_plane *plane, diff --git a/include/drm/drm_gem_framebuffer_helper.h b/include/drm/drm_gem_framebuffer_helper.h index 6bdffc7aa124..905727719ead 100644 --- a/include/drm/drm_gem_framebuffer_helper.h +++ b/include/drm/drm_gem_framebuffer_helper.h @@ -1,6 +1,11 @@ #ifndef __DRM_GEM_FB_HELPER_H__ #define __DRM_GEM_FB_HELPER_H__ +#include <linux/dma-buf.h> +#include <linux/dma-buf-map.h> + +#include <drm/drm_fourcc.h> + struct drm_afbc_framebuffer; struct drm_device; struct drm_fb_helper_surface_size; @@ -34,6 +39,14 @@ struct drm_framebuffer * drm_gem_fb_create_with_dirty(struct drm_device *dev, struct drm_file *file, const struct drm_mode_fb_cmd2 *mode_cmd); +int drm_gem_fb_vmap(struct drm_framebuffer *fb, + struct dma_buf_map map[static DRM_FORMAT_MAX_PLANES], + struct dma_buf_map data[DRM_FORMAT_MAX_PLANES]); +void drm_gem_fb_vunmap(struct drm_framebuffer *fb, + struct dma_buf_map map[static DRM_FORMAT_MAX_PLANES]); +int drm_gem_fb_begin_cpu_access(struct drm_framebuffer *fb, enum dma_data_direction dir); +void drm_gem_fb_end_cpu_access(struct drm_framebuffer *fb, enum dma_data_direction dir); + #define drm_is_afbc(modifier) \ (((modifier) & AFBC_VENDOR_AND_TYPE_MASK) == DRM_FORMAT_MOD_ARM_AFBC(0)) diff --git a/include/drm/drm_gem_vram_helper.h b/include/drm/drm_gem_vram_helper.h index 27ed7e9243b9..d3cf06c9af65 100644 --- a/include/drm/drm_gem_vram_helper.h +++ b/include/drm/drm_gem_vram_helper.h @@ -124,6 +124,18 @@ void drm_gem_vram_plane_helper_cleanup_fb(struct drm_plane *plane, struct drm_plane_state *old_state); +/** + * DRM_GEM_VRAM_PLANE_HELPER_FUNCS - + * Initializes struct drm_plane_helper_funcs for VRAM handling + * + * Drivers may use GEM BOs as VRAM helpers for the framebuffer memory. This + * macro initializes struct drm_plane_helper_funcs to use the respective helper + * functions. + */ +#define DRM_GEM_VRAM_PLANE_HELPER_FUNCS \ + .prepare_fb = drm_gem_vram_plane_helper_prepare_fb, \ + .cleanup_fb = drm_gem_vram_plane_helper_cleanup_fb + /* * Helpers for struct drm_simple_display_pipe_funcs */ @@ -192,10 +204,6 @@ void drm_vram_mm_debugfs_init(struct drm_minor *minor); * Helpers for integration with struct drm_device */ -struct drm_vram_mm *drm_vram_helper_alloc_mm( - struct drm_device *dev, uint64_t vram_base, size_t vram_size); -void drm_vram_helper_release_mm(struct drm_device *dev); - int drmm_vram_helper_init(struct drm_device *dev, uint64_t vram_base, size_t vram_size); diff --git a/include/drm/drm_legacy.h b/include/drm/drm_legacy.h index b17e79e12bc2..58dc8d8cc907 100644 --- a/include/drm/drm_legacy.h +++ b/include/drm/drm_legacy.h @@ -192,6 +192,9 @@ do { \ void drm_legacy_idlelock_take(struct drm_lock_data *lock); void drm_legacy_idlelock_release(struct drm_lock_data *lock); +/* drm_irq.c */ +int drm_legacy_irq_uninstall(struct drm_device *dev); + /* drm_pci.c */ #ifdef CONFIG_PCI diff --git a/include/drm/drm_mipi_dbi.h b/include/drm/drm_mipi_dbi.h index f543d6e3e822..05e194958265 100644 --- a/include/drm/drm_mipi_dbi.h +++ b/include/drm/drm_mipi_dbi.h @@ -183,7 +183,12 @@ int mipi_dbi_buf_copy(void *dst, struct drm_framebuffer *fb, #define mipi_dbi_command(dbi, cmd, seq...) \ ({ \ const u8 d[] = { seq }; \ - mipi_dbi_command_stackbuf(dbi, cmd, d, ARRAY_SIZE(d)); \ + struct device *dev = &(dbi)->spi->dev; \ + int ret; \ + ret = mipi_dbi_command_stackbuf(dbi, cmd, d, ARRAY_SIZE(d)); \ + if (ret) \ + dev_err_ratelimited(dev, "error %d when sending command %#02x\n", ret, cmd); \ + ret; \ }) #ifdef CONFIG_DEBUG_FS diff --git a/include/drm/drm_mipi_dsi.h b/include/drm/drm_mipi_dsi.h index 360e6377e84b..af7ba8071eb0 100644 --- a/include/drm/drm_mipi_dsi.h +++ b/include/drm/drm_mipi_dsi.h @@ -80,6 +80,11 @@ int mipi_dsi_create_packet(struct mipi_dsi_packet *packet, * Note that typically DSI packet transmission is atomic, so the .transfer() * function will seldomly return anything other than the number of bytes * contained in the transmit buffer on success. + * + * Also note that those callbacks can be called no matter the state the + * host is in. Drivers that need the underlying device to be powered to + * perform these operations will first need to make sure it's been + * properly enabled. */ struct mipi_dsi_host_ops { int (*attach)(struct mipi_dsi_host *host, @@ -119,15 +124,15 @@ struct mipi_dsi_host *of_find_mipi_dsi_host_by_node(struct device_node *node); /* enable hsync-end packets in vsync-pulse and v-porch area */ #define MIPI_DSI_MODE_VIDEO_HSE BIT(4) /* disable hfront-porch area */ -#define MIPI_DSI_MODE_VIDEO_HFP BIT(5) +#define MIPI_DSI_MODE_VIDEO_NO_HFP BIT(5) /* disable hback-porch area */ -#define MIPI_DSI_MODE_VIDEO_HBP BIT(6) +#define MIPI_DSI_MODE_VIDEO_NO_HBP BIT(6) /* disable hsync-active area */ -#define MIPI_DSI_MODE_VIDEO_HSA BIT(7) +#define MIPI_DSI_MODE_VIDEO_NO_HSA BIT(7) /* flush display FIFO on vsync pulse */ #define MIPI_DSI_MODE_VSYNC_FLUSH BIT(8) /* disable EoT packets in HS mode */ -#define MIPI_DSI_MODE_EOT_PACKET BIT(9) +#define MIPI_DSI_MODE_NO_EOT_PACKET BIT(9) /* device supports non-continuous clock behavior (DSI spec 5.6.1) */ #define MIPI_DSI_CLOCK_NON_CONTINUOUS BIT(10) /* transmit data in low power */ diff --git a/include/drm/drm_modeset_helper_vtables.h b/include/drm/drm_modeset_helper_vtables.h index f3a4b47b3986..fdfa9f37ce05 100644 --- a/include/drm/drm_modeset_helper_vtables.h +++ b/include/drm/drm_modeset_helper_vtables.h @@ -1178,8 +1178,11 @@ struct drm_plane_helper_funcs { * equivalent functionality should be implemented through private * members in the plane structure. * - * Drivers which always have their buffers pinned should use - * drm_gem_plane_helper_prepare_fb() for this hook. + * For GEM drivers who neither have a @prepare_fb nor @cleanup_fb hook + * set drm_gem_plane_helper_prepare_fb() is called automatically to + * implement this. Other drivers which need additional plane processing + * can call drm_gem_plane_helper_prepare_fb() from their @prepare_fb + * hook. * * The helpers will call @cleanup_fb with matching arguments for every * successful call to this hook. diff --git a/include/drm/drm_panel.h b/include/drm/drm_panel.h index 33605c3f0eba..4602f833eb51 100644 --- a/include/drm/drm_panel.h +++ b/include/drm/drm_panel.h @@ -64,8 +64,8 @@ enum drm_panel_orientation; * the panel. This is the job of the .unprepare() function. * * Backlight can be handled automatically if configured using - * drm_panel_of_backlight(). Then the driver does not need to implement the - * functionality to enable/disable backlight. + * drm_panel_of_backlight() or drm_panel_dp_aux_backlight(). Then the driver + * does not need to implement the functionality to enable/disable backlight. */ struct drm_panel_funcs { /** @@ -144,8 +144,8 @@ struct drm_panel { * Backlight device, used to turn on backlight after the call * to enable(), and to turn off backlight before the call to * disable(). - * backlight is set by drm_panel_of_backlight() and drivers - * shall not assign it. + * backlight is set by drm_panel_of_backlight() or + * drm_panel_dp_aux_backlight() and drivers shall not assign it. */ struct backlight_device *backlight; diff --git a/include/drm/drm_plane.h b/include/drm/drm_plane.h index 1294610e84f4..fed97e35626f 100644 --- a/include/drm/drm_plane.h +++ b/include/drm/drm_plane.h @@ -186,6 +186,9 @@ struct drm_plane_state { * since last plane update) as an array of &drm_mode_rect in framebuffer * coodinates of the attached framebuffer. Note that unlike plane src, * damage clips are not in 16.16 fixed point. + * + * See drm_plane_get_damage_clips() and + * drm_plane_get_damage_clips_count() for accessing these. */ struct drm_property_blob *fb_damage_clips; @@ -894,38 +897,12 @@ static inline struct drm_plane *drm_plane_find(struct drm_device *dev, bool drm_any_plane_has_format(struct drm_device *dev, u32 format, u64 modifier); -/** - * drm_plane_get_damage_clips_count - Returns damage clips count. - * @state: Plane state. - * - * Simple helper to get the number of &drm_mode_rect clips set by user-space - * during plane update. - * - * Return: Number of clips in plane fb_damage_clips blob property. - */ -static inline unsigned int -drm_plane_get_damage_clips_count(const struct drm_plane_state *state) -{ - return (state && state->fb_damage_clips) ? - state->fb_damage_clips->length/sizeof(struct drm_mode_rect) : 0; -} -/** - * drm_plane_get_damage_clips - Returns damage clips. - * @state: Plane state. - * - * Note that this function returns uapi type &drm_mode_rect. Drivers might - * instead be interested in internal &drm_rect which can be obtained by calling - * drm_helper_get_plane_damage_clips(). - * - * Return: Damage clips in plane fb_damage_clips blob property. - */ -static inline struct drm_mode_rect * -drm_plane_get_damage_clips(const struct drm_plane_state *state) -{ - return (struct drm_mode_rect *)((state && state->fb_damage_clips) ? - state->fb_damage_clips->data : NULL); -} +void drm_plane_enable_fb_damage_clips(struct drm_plane *plane); +unsigned int +drm_plane_get_damage_clips_count(const struct drm_plane_state *state); +struct drm_mode_rect * +drm_plane_get_damage_clips(const struct drm_plane_state *state); int drm_plane_create_scaling_filter_property(struct drm_plane *plane, unsigned int supported_filters); diff --git a/include/drm/drm_print.h b/include/drm/drm_print.h index 9b66be54dd16..15a089a87c22 100644 --- a/include/drm/drm_print.h +++ b/include/drm/drm_print.h @@ -327,7 +327,7 @@ static inline bool drm_debug_enabled(enum drm_debug_category category) /* * struct device based logging * - * Prefer drm_device based logging over device or prink based logging. + * Prefer drm_device based logging over device or printk based logging. */ __printf(3, 4) diff --git a/include/drm/drm_property.h b/include/drm/drm_property.h index bbf5c1fdd7b0..65bc9710a470 100644 --- a/include/drm/drm_property.h +++ b/include/drm/drm_property.h @@ -31,7 +31,6 @@ /** * struct drm_property_enum - symbolic values for enumerations - * @value: numeric property value for this enum entry * @head: list of enum values, linked to &drm_property.enum_list * @name: symbolic name for the enum * @@ -39,6 +38,14 @@ * decoding for each value. This is used for example for the rotation property. */ struct drm_property_enum { + /** + * @value: numeric property value for this enum entry + * + * If the property has the type &DRM_MODE_PROP_BITMASK, @value stores a + * bitshift, not a bitmask. In other words, the enum entry is enabled + * if the bit number @value is set in the property's value. This enum + * entry has the bitmask ``1 << value``. + */ uint64_t value; struct list_head head; char name[DRM_PROP_NAME_LEN]; diff --git a/include/drm/drm_rect.h b/include/drm/drm_rect.h index 39f2deee709c..6f6e19bd4dac 100644 --- a/include/drm/drm_rect.h +++ b/include/drm/drm_rect.h @@ -39,6 +39,9 @@ * @x2: horizontal ending coordinate (exclusive) * @y1: vertical starting coordinate (inclusive) * @y2: vertical ending coordinate (exclusive) + * + * Note that this must match the layout of struct drm_mode_rect or the damage + * helpers like drm_atomic_helper_damage_iter_init() break. */ struct drm_rect { int x1, y1, x2, y2; diff --git a/include/drm/drm_simple_kms_helper.h b/include/drm/drm_simple_kms_helper.h index ef9944e9c5fc..0b3647e614dd 100644 --- a/include/drm/drm_simple_kms_helper.h +++ b/include/drm/drm_simple_kms_helper.h @@ -116,8 +116,11 @@ struct drm_simple_display_pipe_funcs { * the documentation for the &drm_plane_helper_funcs.prepare_fb hook for * more details. * - * Drivers which always have their buffers pinned should use - * drm_gem_simple_display_pipe_prepare_fb() for this hook. + * For GEM drivers who neither have a @prepare_fb nor @cleanup_fb hook + * set drm_gem_simple_display_pipe_prepare_fb() is called automatically + * to implement this. Other drivers which need additional plane + * processing can call drm_gem_simple_display_pipe_prepare_fb() from + * their @prepare_fb hook. */ int (*prepare_fb)(struct drm_simple_display_pipe *pipe, struct drm_plane_state *plane_state); @@ -151,6 +154,33 @@ struct drm_simple_display_pipe_funcs { void (*disable_vblank)(struct drm_simple_display_pipe *pipe); /** + * @reset_crtc: + * + * Optional, called by &drm_crtc_funcs.reset. Please read the + * documentation for the &drm_crtc_funcs.reset hook for more details. + */ + void (*reset_crtc)(struct drm_simple_display_pipe *pipe); + + /** + * @duplicate_crtc_state: + * + * Optional, called by &drm_crtc_funcs.atomic_duplicate_state. Please + * read the documentation for the &drm_crtc_funcs.atomic_duplicate_state + * hook for more details. + */ + struct drm_crtc_state * (*duplicate_crtc_state)(struct drm_simple_display_pipe *pipe); + + /** + * @destroy_crtc_state: + * + * Optional, called by &drm_crtc_funcs.atomic_destroy_state. Please + * read the documentation for the &drm_crtc_funcs.atomic_destroy_state + * hook for more details. + */ + void (*destroy_crtc_state)(struct drm_simple_display_pipe *pipe, + struct drm_crtc_state *crtc_state); + + /** * @reset_plane: * * Optional, called by &drm_plane_funcs.reset. Please read the diff --git a/include/drm/drm_vma_manager.h b/include/drm/drm_vma_manager.h index 76ac5e97a559..4f8c35206f7c 100644 --- a/include/drm/drm_vma_manager.h +++ b/include/drm/drm_vma_manager.h @@ -53,7 +53,7 @@ struct drm_vma_offset_node { rwlock_t vm_lock; struct drm_mm_node vm_node; struct rb_root vm_files; - bool readonly:1; + void *driver_private; }; struct drm_vma_offset_manager { diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h index d18af49fd009..88ae7f331bb1 100644 --- a/include/drm/gpu_scheduler.h +++ b/include/drm/gpu_scheduler.h @@ -239,6 +239,38 @@ struct drm_sched_backend_ops { * @timedout_job: Called when a job has taken too long to execute, * to trigger GPU recovery. * + * This method is called in a workqueue context. + * + * Drivers typically issue a reset to recover from GPU hangs, and this + * procedure usually follows the following workflow: + * + * 1. Stop the scheduler using drm_sched_stop(). This will park the + * scheduler thread and cancel the timeout work, guaranteeing that + * nothing is queued while we reset the hardware queue + * 2. Try to gracefully stop non-faulty jobs (optional) + * 3. Issue a GPU reset (driver-specific) + * 4. Re-submit jobs using drm_sched_resubmit_jobs() + * 5. Restart the scheduler using drm_sched_start(). At that point, new + * jobs can be queued, and the scheduler thread is unblocked + * + * Note that some GPUs have distinct hardware queues but need to reset + * the GPU globally, which requires extra synchronization between the + * timeout handler of the different &drm_gpu_scheduler. One way to + * achieve this synchronization is to create an ordered workqueue + * (using alloc_ordered_workqueue()) at the driver level, and pass this + * queue to drm_sched_init(), to guarantee that timeout handlers are + * executed sequentially. The above workflow needs to be slightly + * adjusted in that case: + * + * 1. Stop all schedulers impacted by the reset using drm_sched_stop() + * 2. Try to gracefully stop non-faulty jobs on all queues impacted by + * the reset (optional) + * 3. Issue a GPU reset on all faulty queues (driver-specific) + * 4. Re-submit jobs on all schedulers impacted by the reset using + * drm_sched_resubmit_jobs() + * 5. Restart all schedulers that were stopped in step #1 using + * drm_sched_start() + * * Return DRM_GPU_SCHED_STAT_NOMINAL, when all is normal, * and the underlying driver has started or completed recovery. * @@ -269,6 +301,7 @@ struct drm_sched_backend_ops { * finished. * @hw_rq_count: the number of jobs currently in the hardware queue. * @job_id_count: used to assign unique id to the each job. + * @timeout_wq: workqueue used to queue @work_tdr * @work_tdr: schedules a delayed call to @drm_sched_job_timedout after the * timeout interval is over. * @thread: the kthread on which the scheduler which run. @@ -293,6 +326,7 @@ struct drm_gpu_scheduler { wait_queue_head_t job_scheduled; atomic_t hw_rq_count; atomic64_t job_id_count; + struct workqueue_struct *timeout_wq; struct delayed_work work_tdr; struct task_struct *thread; struct list_head pending_list; @@ -306,7 +340,8 @@ struct drm_gpu_scheduler { int drm_sched_init(struct drm_gpu_scheduler *sched, const struct drm_sched_backend_ops *ops, - uint32_t hw_submission, unsigned hang_limit, long timeout, + uint32_t hw_submission, unsigned hang_limit, + long timeout, struct workqueue_struct *timeout_wq, atomic_t *score, const char *name); void drm_sched_fini(struct drm_gpu_scheduler *sched); diff --git a/include/drm/ttm/ttm_placement.h b/include/drm/ttm/ttm_placement.h index aa6ba4d0cf78..8995c9e4ec1b 100644 --- a/include/drm/ttm/ttm_placement.h +++ b/include/drm/ttm/ttm_placement.h @@ -47,8 +47,11 @@ * top of the memory area, instead of the bottom. */ -#define TTM_PL_FLAG_CONTIGUOUS (1 << 19) -#define TTM_PL_FLAG_TOPDOWN (1 << 22) +#define TTM_PL_FLAG_CONTIGUOUS (1 << 0) +#define TTM_PL_FLAG_TOPDOWN (1 << 1) + +/* For multihop handling */ +#define TTM_PL_FLAG_TEMPORARY (1 << 2) /** * struct ttm_place diff --git a/include/dt-bindings/phy/phy.h b/include/dt-bindings/phy/phy.h index 887a31b250a8..f48c9acf251e 100644 --- a/include/dt-bindings/phy/phy.h +++ b/include/dt-bindings/phy/phy.h @@ -20,5 +20,7 @@ #define PHY_TYPE_XPCS 7 #define PHY_TYPE_SGMII 8 #define PHY_TYPE_QSGMII 9 +#define PHY_TYPE_DPHY 10 +#define PHY_TYPE_CPHY 11 #endif /* _DT_BINDINGS_PHY */ diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h index efdc56b9d95f..8b32b4bdd590 100644 --- a/include/linux/dma-buf.h +++ b/include/linux/dma-buf.h @@ -54,7 +54,7 @@ struct dma_buf_ops { * device), and otherwise need to fail the attach operation. * * The exporter should also in general check whether the current - * allocation fullfills the DMA constraints of the new device. If this + * allocation fulfills the DMA constraints of the new device. If this * is not the case, and the allocation cannot be moved, it should also * fail the attach operation. * @@ -96,6 +96,12 @@ struct dma_buf_ops { * This is called automatically for non-dynamic importers from * dma_buf_attach(). * + * Note that similar to non-dynamic exporters in their @map_dma_buf + * callback the driver must guarantee that the memory is available for + * use and cleared of any old data by the time this function returns. + * Drivers which pipeline their buffer moves internally must wait for + * all moves and clears to complete. + * * Returns: * * 0 on success, negative error code on failure. @@ -144,9 +150,18 @@ struct dma_buf_ops { * This is always called with the dmabuf->resv object locked when * the dynamic_mapping flag is true. * + * Note that for non-dynamic exporters the driver must guarantee that + * that the memory is available for use and cleared of any old data by + * the time this function returns. Drivers which pipeline their buffer + * moves internally must wait for all moves and clears to complete. + * Dynamic exporters do not need to follow this rule: For non-dynamic + * importers the buffer is already pinned through @pin, which has the + * same requirements. Dynamic importers otoh are required to obey the + * dma_resv fences. + * * Returns: * - * A &sg_table scatter list of or the backing storage of the DMA buffer, + * A &sg_table scatter list of the backing storage of the DMA buffer, * already mapped into the device address space of the &device attached * with the provided &dma_buf_attachment. The addresses and lengths in * the scatter list are PAGE_SIZE aligned. @@ -168,7 +183,7 @@ struct dma_buf_ops { * * This is called by dma_buf_unmap_attachment() and should unmap and * release the &sg_table allocated in @map_dma_buf, and it is mandatory. - * For static dma_buf handling this might also unpins the backing + * For static dma_buf handling this might also unpin the backing * storage if this is the last mapping of the DMA buffer. */ void (*unmap_dma_buf)(struct dma_buf_attachment *, @@ -237,7 +252,7 @@ struct dma_buf_ops { * This callback is used by the dma_buf_mmap() function * * Note that the mapping needs to be incoherent, userspace is expected - * to braket CPU access using the DMA_BUF_IOCTL_SYNC interface. + * to bracket CPU access using the DMA_BUF_IOCTL_SYNC interface. * * Because dma-buf buffers have invariant size over their lifetime, the * dma-buf core checks whether a vma is too large and rejects such @@ -274,27 +289,6 @@ struct dma_buf_ops { /** * struct dma_buf - shared buffer object - * @size: size of the buffer; invariant over the lifetime of the buffer. - * @file: file pointer used for sharing buffers across, and for refcounting. - * @attachments: list of dma_buf_attachment that denotes all devices attached, - * protected by dma_resv lock. - * @ops: dma_buf_ops associated with this buffer object. - * @lock: used internally to serialize list manipulation, attach/detach and - * vmap/unmap - * @vmapping_counter: used internally to refcnt the vmaps - * @vmap_ptr: the current vmap ptr if vmapping_counter > 0 - * @exp_name: name of the exporter; useful for debugging. - * @name: userspace-provided name; useful for accounting and debugging, - * protected by @resv. - * @name_lock: spinlock to protect name access - * @owner: pointer to exporter module; used for refcounting when exporter is a - * kernel module. - * @list_node: node for dma_buf accounting and debugging. - * @priv: exporter specific private data for this buffer object. - * @resv: reservation object linked to this dma-buf - * @poll: for userspace poll support - * @cb_excl: for userspace poll support - * @cb_shared: for userspace poll support * * This represents a shared buffer, created by calling dma_buf_export(). The * userspace representation is a normal file descriptor, which can be created by @@ -306,30 +300,152 @@ struct dma_buf_ops { * Device DMA access is handled by the separate &struct dma_buf_attachment. */ struct dma_buf { + /** + * @size: + * + * Size of the buffer; invariant over the lifetime of the buffer. + */ size_t size; + + /** + * @file: + * + * File pointer used for sharing buffers across, and for refcounting. + * See dma_buf_get() and dma_buf_put(). + */ struct file *file; + + /** + * @attachments: + * + * List of dma_buf_attachment that denotes all devices attached, + * protected by &dma_resv lock @resv. + */ struct list_head attachments; + + /** @ops: dma_buf_ops associated with this buffer object. */ const struct dma_buf_ops *ops; + + /** + * @lock: + * + * Used internally to serialize list manipulation, attach/detach and + * vmap/unmap. Note that in many cases this is superseeded by + * dma_resv_lock() on @resv. + */ struct mutex lock; + + /** + * @vmapping_counter: + * + * Used internally to refcnt the vmaps returned by dma_buf_vmap(). + * Protected by @lock. + */ unsigned vmapping_counter; + + /** + * @vmap_ptr: + * The current vmap ptr if @vmapping_counter > 0. Protected by @lock. + */ struct dma_buf_map vmap_ptr; + + /** + * @exp_name: + * + * Name of the exporter; useful for debugging. See the + * DMA_BUF_SET_NAME IOCTL. + */ const char *exp_name; + + /** + * @name: + * + * Userspace-provided name; useful for accounting and debugging, + * protected by dma_resv_lock() on @resv and @name_lock for read access. + */ const char *name; + + /** @name_lock: Spinlock to protect name acces for read access. */ spinlock_t name_lock; + + /** + * @owner: + * + * Pointer to exporter module; used for refcounting when exporter is a + * kernel module. + */ struct module *owner; + + /** @list_node: node for dma_buf accounting and debugging. */ struct list_head list_node; + + /** @priv: exporter specific private data for this buffer object. */ void *priv; + + /** + * @resv: + * + * Reservation object linked to this dma-buf. + * + * IMPLICIT SYNCHRONIZATION RULES: + * + * Drivers which support implicit synchronization of buffer access as + * e.g. exposed in `Implicit Fence Poll Support`_ must follow the + * below rules. + * + * - Drivers must add a shared fence through dma_resv_add_shared_fence() + * for anything the userspace API considers a read access. This highly + * depends upon the API and window system. + * + * - Similarly drivers must set the exclusive fence through + * dma_resv_add_excl_fence() for anything the userspace API considers + * write access. + * + * - Drivers may just always set the exclusive fence, since that only + * causes unecessarily synchronization, but no correctness issues. + * + * - Some drivers only expose a synchronous userspace API with no + * pipelining across drivers. These do not set any fences for their + * access. An example here is v4l. + * + * DYNAMIC IMPORTER RULES: + * + * Dynamic importers, see dma_buf_attachment_is_dynamic(), have + * additional constraints on how they set up fences: + * + * - Dynamic importers must obey the exclusive fence and wait for it to + * signal before allowing access to the buffer's underlying storage + * through the device. + * + * - Dynamic importers should set fences for any access that they can't + * disable immediately from their &dma_buf_attach_ops.move_notify + * callback. + */ struct dma_resv *resv; - /* poll support */ + /** @poll: for userspace poll support */ wait_queue_head_t poll; + /** @cb_excl: for userspace poll support */ + /** @cb_shared: for userspace poll support */ struct dma_buf_poll_cb_t { struct dma_fence_cb cb; wait_queue_head_t *poll; __poll_t active; } cb_excl, cb_shared; +#ifdef CONFIG_DMABUF_SYSFS_STATS + /** + * @sysfs_entry: + * + * For exposing information about this buffer in sysfs. See also + * `DMA-BUF statistics`_ for the uapi this enables. + */ + struct dma_buf_sysfs_entry { + struct kobject kobj; + struct dma_buf *dmabuf; + } *sysfs_entry; +#endif }; /** @@ -464,7 +580,7 @@ static inline bool dma_buf_is_dynamic(struct dma_buf *dmabuf) /** * dma_buf_attachment_is_dynamic - check if a DMA-buf attachment uses dynamic - * mappinsg + * mappings * @attach: the DMA-buf attachment to check * * Returns true if a DMA-buf importer wants to call the map/unmap functions with diff --git a/include/linux/dma-fence-chain.h b/include/linux/dma-fence-chain.h index 10462a029da2..54fe3443fd2c 100644 --- a/include/linux/dma-fence-chain.h +++ b/include/linux/dma-fence-chain.h @@ -12,25 +12,41 @@ #include <linux/dma-fence.h> #include <linux/irq_work.h> +#include <linux/slab.h> /** * struct dma_fence_chain - fence to represent an node of a fence chain * @base: fence base class - * @lock: spinlock for fence handling * @prev: previous fence of the chain * @prev_seqno: original previous seqno before garbage collection * @fence: encapsulated fence - * @cb: callback structure for signaling - * @work: irq work item for signaling + * @lock: spinlock for fence handling */ struct dma_fence_chain { struct dma_fence base; - spinlock_t lock; struct dma_fence __rcu *prev; u64 prev_seqno; struct dma_fence *fence; - struct dma_fence_cb cb; - struct irq_work work; + union { + /** + * @cb: callback for signaling + * + * This is used to add the callback for signaling the + * complection of the fence chain. Never used at the same time + * as the irq work. + */ + struct dma_fence_cb cb; + + /** + * @work: irq work item for signaling + * + * Irq work structure to allow us to add the callback without + * running into lock inversion. Never used at the same time as + * the callback. + */ + struct irq_work work; + }; + spinlock_t lock; }; extern const struct dma_fence_ops dma_fence_chain_ops; @@ -52,6 +68,30 @@ to_dma_fence_chain(struct dma_fence *fence) } /** + * dma_fence_chain_alloc + * + * Returns a new struct dma_fence_chain object or NULL on failure. + */ +static inline struct dma_fence_chain *dma_fence_chain_alloc(void) +{ + return kmalloc(sizeof(struct dma_fence_chain), GFP_KERNEL); +}; + +/** + * dma_fence_chain_free + * @chain: chain node to free + * + * Frees up an allocated but not used struct dma_fence_chain object. This + * doesn't need an RCU grace period since the fence was never initialized nor + * published. After dma_fence_chain_init() has been called the fence must be + * released by calling dma_fence_put(), and not through this function. + */ +static inline void dma_fence_chain_free(struct dma_fence_chain *chain) +{ + kfree(chain); +}; + +/** * dma_fence_chain_for_each - iterate over all fences in chain * @iter: current fence * @head: starting point diff --git a/include/linux/fb.h b/include/linux/fb.h index ecfbcc0553a5..5950f8f5dc74 100644 --- a/include/linux/fb.h +++ b/include/linux/fb.h @@ -2,6 +2,7 @@ #ifndef _LINUX_FB_H #define _LINUX_FB_H +#include <linux/refcount.h> #include <linux/kgdb.h> #include <uapi/linux/fb.h> @@ -435,7 +436,7 @@ struct fb_tile_ops { struct fb_info { - atomic_t count; + refcount_t count; int node; int flags; /* diff --git a/include/linux/host1x.h b/include/linux/host1x.h index 9b0487c88571..7bccf589aba7 100644 --- a/include/linux/host1x.h +++ b/include/linux/host1x.h @@ -170,6 +170,8 @@ u32 host1x_syncpt_base_id(struct host1x_syncpt_base *base); void host1x_syncpt_release_vblank_reservation(struct host1x_client *client, u32 syncpt_id); +struct dma_fence *host1x_fence_create(struct host1x_syncpt *sp, u32 threshold); + /* * host1x channel */ @@ -216,8 +218,8 @@ struct host1x_job { struct host1x_client *client; /* Gathers and their memory */ - struct host1x_job_gather *gathers; - unsigned int num_gathers; + struct host1x_job_cmd *cmds; + unsigned int num_cmds; /* Array of handles to be pinned & unpinned */ struct host1x_reloc *relocs; @@ -234,9 +236,15 @@ struct host1x_job { u32 syncpt_incrs; u32 syncpt_end; + /* Completion waiter ref */ + void *waiter; + /* Maximum time to wait for this job */ unsigned int timeout; + /* Job has timed out and should be released */ + bool cancelled; + /* Index and number of slots used in the push buffer */ unsigned int first_get; unsigned int num_slots; @@ -257,12 +265,25 @@ struct host1x_job { /* Add a channel wait for previous ops to complete */ bool serialize; + + /* Fast-forward syncpoint increments on job timeout */ + bool syncpt_recovery; + + /* Callback called when job is freed */ + void (*release)(struct host1x_job *job); + void *user_data; + + /* Whether host1x-side firewall should be ran for this job or not */ + bool enable_firewall; }; struct host1x_job *host1x_job_alloc(struct host1x_channel *ch, - u32 num_cmdbufs, u32 num_relocs); + u32 num_cmdbufs, u32 num_relocs, + bool skip_firewall); void host1x_job_add_gather(struct host1x_job *job, struct host1x_bo *bo, unsigned int words, unsigned int offset); +void host1x_job_add_wait(struct host1x_job *job, u32 id, u32 thresh, + bool relative, u32 next_class); struct host1x_job *host1x_job_get(struct host1x_job *job); void host1x_job_put(struct host1x_job *job); int host1x_job_pin(struct host1x_job *job, struct device *dev); diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h index 5cf387813754..9fe165beb0f9 100644 --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h @@ -306,31 +306,29 @@ extern void lock_unpin_lock(struct lockdep_map *lock, struct pin_cookie); #define lockdep_depth(tsk) (debug_locks ? (tsk)->lockdep_depth : 0) -#define lockdep_assert_held(l) do { \ - WARN_ON(debug_locks && \ - lockdep_is_held(l) == LOCK_STATE_NOT_HELD); \ - } while (0) +#define lockdep_assert(cond) \ + do { WARN_ON(debug_locks && !(cond)); } while (0) -#define lockdep_assert_not_held(l) do { \ - WARN_ON(debug_locks && \ - lockdep_is_held(l) == LOCK_STATE_HELD); \ - } while (0) +#define lockdep_assert_once(cond) \ + do { WARN_ON_ONCE(debug_locks && !(cond)); } while (0) -#define lockdep_assert_held_write(l) do { \ - WARN_ON(debug_locks && !lockdep_is_held_type(l, 0)); \ - } while (0) +#define lockdep_assert_held(l) \ + lockdep_assert(lockdep_is_held(l) != LOCK_STATE_NOT_HELD) -#define lockdep_assert_held_read(l) do { \ - WARN_ON(debug_locks && !lockdep_is_held_type(l, 1)); \ - } while (0) +#define lockdep_assert_not_held(l) \ + lockdep_assert(lockdep_is_held(l) != LOCK_STATE_HELD) -#define lockdep_assert_held_once(l) do { \ - WARN_ON_ONCE(debug_locks && !lockdep_is_held(l)); \ - } while (0) +#define lockdep_assert_held_write(l) \ + lockdep_assert(lockdep_is_held_type(l, 0)) -#define lockdep_assert_none_held_once() do { \ - WARN_ON_ONCE(debug_locks && current->lockdep_depth); \ - } while (0) +#define lockdep_assert_held_read(l) \ + lockdep_assert(lockdep_is_held_type(l, 1)) + +#define lockdep_assert_held_once(l) \ + lockdep_assert_once(lockdep_is_held(l) != LOCK_STATE_NOT_HELD) + +#define lockdep_assert_none_held_once() \ + lockdep_assert_once(!current->lockdep_depth) #define lockdep_recursing(tsk) ((tsk)->lockdep_recursion) @@ -407,6 +405,9 @@ extern int lock_is_held(const void *); extern int lockdep_is_held(const void *); #define lockdep_is_held_type(l, r) (1) +#define lockdep_assert(c) do { } while (0) +#define lockdep_assert_once(c) do { } while (0) + #define lockdep_assert_held(l) do { (void)(l); } while (0) #define lockdep_assert_not_held(l) do { (void)(l); } while (0) #define lockdep_assert_held_write(l) do { (void)(l); } while (0) diff --git a/arch/x86/include/asm/sysfb.h b/include/linux/sysfb.h index 9834eef7f034..b0dcfa26d07b 100644 --- a/arch/x86/include/asm/sysfb.h +++ b/include/linux/sysfb.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0-or-later */ -#ifndef _ARCH_X86_KERNEL_SYSFB_H -#define _ARCH_X86_KERNEL_SYSFB_H +#ifndef _LINUX_SYSFB_H +#define _LINUX_SYSFB_H /* * Generic System Framebuffers on x86 @@ -58,37 +58,37 @@ struct efifb_dmi_info { #ifdef CONFIG_EFI extern struct efifb_dmi_info efifb_dmi_list[]; -void sysfb_apply_efi_quirks(void); +void sysfb_apply_efi_quirks(struct platform_device *pd); #else /* CONFIG_EFI */ -static inline void sysfb_apply_efi_quirks(void) +static inline void sysfb_apply_efi_quirks(struct platform_device *pd) { } #endif /* CONFIG_EFI */ -#ifdef CONFIG_X86_SYSFB +#ifdef CONFIG_SYSFB_SIMPLEFB -bool parse_mode(const struct screen_info *si, - struct simplefb_platform_data *mode); -int create_simplefb(const struct screen_info *si, - const struct simplefb_platform_data *mode); +bool sysfb_parse_mode(const struct screen_info *si, + struct simplefb_platform_data *mode); +int sysfb_create_simplefb(const struct screen_info *si, + const struct simplefb_platform_data *mode); -#else /* CONFIG_X86_SYSFB */ +#else /* CONFIG_SYSFB_SIMPLE */ -static inline bool parse_mode(const struct screen_info *si, - struct simplefb_platform_data *mode) +static inline bool sysfb_parse_mode(const struct screen_info *si, + struct simplefb_platform_data *mode) { return false; } -static inline int create_simplefb(const struct screen_info *si, - const struct simplefb_platform_data *mode) +static inline int sysfb_create_simplefb(const struct screen_info *si, + const struct simplefb_platform_data *mode) { return -EINVAL; } -#endif /* CONFIG_X86_SYSFB */ +#endif /* CONFIG_SYSFB_SIMPLE */ -#endif /* _ARCH_X86_KERNEL_SYSFB_H */ +#endif /* _LINUX_SYSFB_H */ diff --git a/include/linux/vgaarb.h b/include/linux/vgaarb.h index dc6ddce92066..b4b9137f9792 100644 --- a/include/linux/vgaarb.h +++ b/include/linux/vgaarb.h @@ -33,6 +33,8 @@ #include <video/vga.h> +struct pci_dev; + /* Legacy VGA regions */ #define VGA_RSRC_NONE 0x00 #define VGA_RSRC_LEGACY_IO 0x01 @@ -42,42 +44,45 @@ #define VGA_RSRC_NORMAL_IO 0x04 #define VGA_RSRC_NORMAL_MEM 0x08 -/* Passing that instead of a pci_dev to use the system "default" - * device, that is the one used by vgacon. Archs will probably - * have to provide their own vga_default_device(); - */ -#define VGA_DEFAULT_DEVICE (NULL) - -struct pci_dev; - -/* For use by clients */ - -/** - * vga_set_legacy_decoding - * - * @pdev: pci device of the VGA card - * @decodes: bit mask of what legacy regions the card decodes - * - * Indicates to the arbiter if the card decodes legacy VGA IOs, - * legacy VGA Memory, both, or none. All cards default to both, - * the card driver (fbdev for example) should tell the arbiter - * if it has disabled legacy decoding, so the card can be left - * out of the arbitration process (and can be safe to take - * interrupts at any time. - */ -#if defined(CONFIG_VGA_ARB) -extern void vga_set_legacy_decoding(struct pci_dev *pdev, - unsigned int decodes); -#else +#ifdef CONFIG_VGA_ARB +void vga_set_legacy_decoding(struct pci_dev *pdev, unsigned int decodes); +int vga_get(struct pci_dev *pdev, unsigned int rsrc, int interruptible); +void vga_put(struct pci_dev *pdev, unsigned int rsrc); +struct pci_dev *vga_default_device(void); +void vga_set_default_device(struct pci_dev *pdev); +int vga_remove_vgacon(struct pci_dev *pdev); +int vga_client_register(struct pci_dev *pdev, + unsigned int (*set_decode)(struct pci_dev *pdev, bool state)); +#else /* CONFIG_VGA_ARB */ static inline void vga_set_legacy_decoding(struct pci_dev *pdev, - unsigned int decodes) { }; -#endif - -#if defined(CONFIG_VGA_ARB) -extern int vga_get(struct pci_dev *pdev, unsigned int rsrc, int interruptible); -#else -static inline int vga_get(struct pci_dev *pdev, unsigned int rsrc, int interruptible) { return 0; } -#endif + unsigned int decodes) +{ +}; +static inline int vga_get(struct pci_dev *pdev, unsigned int rsrc, + int interruptible) +{ + return 0; +} +static inline void vga_put(struct pci_dev *pdev, unsigned int rsrc) +{ +} +static inline struct pci_dev *vga_default_device(void) +{ + return NULL; +} +static inline void vga_set_default_device(struct pci_dev *pdev) +{ +} +static inline int vga_remove_vgacon(struct pci_dev *pdev) +{ + return 0; +} +static inline int vga_client_register(struct pci_dev *pdev, + unsigned int (*set_decode)(struct pci_dev *pdev, bool state)) +{ + return 0; +} +#endif /* CONFIG_VGA_ARB */ /** * vga_get_interruptible @@ -109,48 +114,9 @@ static inline int vga_get_uninterruptible(struct pci_dev *pdev, return vga_get(pdev, rsrc, 0); } -#if defined(CONFIG_VGA_ARB) -extern void vga_put(struct pci_dev *pdev, unsigned int rsrc); -#else -static inline void vga_put(struct pci_dev *pdev, unsigned int rsrc) +static inline void vga_client_unregister(struct pci_dev *pdev) { + vga_client_register(pdev, NULL); } -#endif - - -#ifdef CONFIG_VGA_ARB -extern struct pci_dev *vga_default_device(void); -extern void vga_set_default_device(struct pci_dev *pdev); -extern int vga_remove_vgacon(struct pci_dev *pdev); -#else -static inline struct pci_dev *vga_default_device(void) { return NULL; } -static inline void vga_set_default_device(struct pci_dev *pdev) { } -static inline int vga_remove_vgacon(struct pci_dev *pdev) { return 0; } -#endif - -/* - * Architectures should define this if they have several - * independent PCI domains that can afford concurrent VGA - * decoding - */ -#ifndef __ARCH_HAS_VGA_CONFLICT -static inline int vga_conflicts(struct pci_dev *p1, struct pci_dev *p2) -{ - return 1; -} -#endif - -#if defined(CONFIG_VGA_ARB) -int vga_client_register(struct pci_dev *pdev, void *cookie, - void (*irq_set_state)(void *cookie, bool state), - unsigned int (*set_vga_decode)(void *cookie, bool state)); -#else -static inline int vga_client_register(struct pci_dev *pdev, void *cookie, - void (*irq_set_state)(void *cookie, bool state), - unsigned int (*set_vga_decode)(void *cookie, bool state)) -{ - return 0; -} -#endif #endif /* LINUX_VGA_H */ diff --git a/include/uapi/drm/drm.h b/include/uapi/drm/drm.h index d043752a74cf..3b810b53ba8b 100644 --- a/include/uapi/drm/drm.h +++ b/include/uapi/drm/drm.h @@ -635,8 +635,8 @@ struct drm_gem_open { /** * DRM_CAP_VBLANK_HIGH_CRTC * - * If set to 1, the kernel supports specifying a CRTC index in the high bits of - * &drm_wait_vblank_request.type. + * If set to 1, the kernel supports specifying a :ref:`CRTC index<crtc_index>` + * in the high bits of &drm_wait_vblank_request.type. * * Starting kernel version 2.6.39, this capability is always set to 1. */ @@ -1050,6 +1050,16 @@ extern "C" { #define DRM_IOCTL_MODE_GETPROPBLOB DRM_IOWR(0xAC, struct drm_mode_get_blob) #define DRM_IOCTL_MODE_GETFB DRM_IOWR(0xAD, struct drm_mode_fb_cmd) #define DRM_IOCTL_MODE_ADDFB DRM_IOWR(0xAE, struct drm_mode_fb_cmd) +/** + * DRM_IOCTL_MODE_RMFB - Remove a framebuffer. + * + * This removes a framebuffer previously added via ADDFB/ADDFB2. The IOCTL + * argument is a framebuffer object ID. + * + * Warning: removing a framebuffer currently in-use on an enabled plane will + * disable that plane. The CRTC the plane is linked to may also be disabled + * (depending on driver capabilities). + */ #define DRM_IOCTL_MODE_RMFB DRM_IOWR(0xAF, unsigned int) #define DRM_IOCTL_MODE_PAGE_FLIP DRM_IOWR(0xB0, struct drm_mode_crtc_page_flip) #define DRM_IOCTL_MODE_DIRTYFB DRM_IOWR(0xB1, struct drm_mode_fb_dirty_cmd) diff --git a/include/uapi/drm/drm_fourcc.h b/include/uapi/drm/drm_fourcc.h index f7156322aba5..9f4bb4a6f358 100644 --- a/include/uapi/drm/drm_fourcc.h +++ b/include/uapi/drm/drm_fourcc.h @@ -900,9 +900,9 @@ drm_fourcc_canonicalize_nvidia_format_mod(__u64 modifier) /* * The top 4 bits (out of the 56 bits alloted for specifying vendor specific - * modifiers) denote the category for modifiers. Currently we have only two - * categories of modifiers ie AFBC and MISC. We can have a maximum of sixteen - * different categories. + * modifiers) denote the category for modifiers. Currently we have three + * categories of modifiers ie AFBC, MISC and AFRC. We can have a maximum of + * sixteen different categories. */ #define DRM_FORMAT_MOD_ARM_CODE(__type, __val) \ fourcc_mod_code(ARM, ((__u64)(__type) << 52) | ((__val) & 0x000fffffffffffffULL)) @@ -1018,6 +1018,109 @@ drm_fourcc_canonicalize_nvidia_format_mod(__u64 modifier) #define AFBC_FORMAT_MOD_USM (1ULL << 12) /* + * Arm Fixed-Rate Compression (AFRC) modifiers + * + * AFRC is a proprietary fixed rate image compression protocol and format, + * designed to provide guaranteed bandwidth and memory footprint + * reductions in graphics and media use-cases. + * + * AFRC buffers consist of one or more planes, with the same components + * and meaning as an uncompressed buffer using the same pixel format. + * + * Within each plane, the pixel/luma/chroma values are grouped into + * "coding unit" blocks which are individually compressed to a + * fixed size (in bytes). All coding units within a given plane of a buffer + * store the same number of values, and have the same compressed size. + * + * The coding unit size is configurable, allowing different rates of compression. + * + * The start of each AFRC buffer plane must be aligned to an alignment granule which + * depends on the coding unit size. + * + * Coding Unit Size Plane Alignment + * ---------------- --------------- + * 16 bytes 1024 bytes + * 24 bytes 512 bytes + * 32 bytes 2048 bytes + * + * Coding units are grouped into paging tiles. AFRC buffer dimensions must be aligned + * to a multiple of the paging tile dimensions. + * The dimensions of each paging tile depend on whether the buffer is optimised for + * scanline (SCAN layout) or rotated (ROT layout) access. + * + * Layout Paging Tile Width Paging Tile Height + * ------ ----------------- ------------------ + * SCAN 16 coding units 4 coding units + * ROT 8 coding units 8 coding units + * + * The dimensions of each coding unit depend on the number of components + * in the compressed plane and whether the buffer is optimised for + * scanline (SCAN layout) or rotated (ROT layout) access. + * + * Number of Components in Plane Layout Coding Unit Width Coding Unit Height + * ----------------------------- --------- ----------------- ------------------ + * 1 SCAN 16 samples 4 samples + * Example: 16x4 luma samples in a 'Y' plane + * 16x4 chroma 'V' values, in the 'V' plane of a fully-planar YUV buffer + * ----------------------------- --------- ----------------- ------------------ + * 1 ROT 8 samples 8 samples + * Example: 8x8 luma samples in a 'Y' plane + * 8x8 chroma 'V' values, in the 'V' plane of a fully-planar YUV buffer + * ----------------------------- --------- ----------------- ------------------ + * 2 DONT CARE 8 samples 4 samples + * Example: 8x4 chroma pairs in the 'UV' plane of a semi-planar YUV buffer + * ----------------------------- --------- ----------------- ------------------ + * 3 DONT CARE 4 samples 4 samples + * Example: 4x4 pixels in an RGB buffer without alpha + * ----------------------------- --------- ----------------- ------------------ + * 4 DONT CARE 4 samples 4 samples + * Example: 4x4 pixels in an RGB buffer with alpha + */ + +#define DRM_FORMAT_MOD_ARM_TYPE_AFRC 0x02 + +#define DRM_FORMAT_MOD_ARM_AFRC(__afrc_mode) \ + DRM_FORMAT_MOD_ARM_CODE(DRM_FORMAT_MOD_ARM_TYPE_AFRC, __afrc_mode) + +/* + * AFRC coding unit size modifier. + * + * Indicates the number of bytes used to store each compressed coding unit for + * one or more planes in an AFRC encoded buffer. The coding unit size for chrominance + * is the same for both Cb and Cr, which may be stored in separate planes. + * + * AFRC_FORMAT_MOD_CU_SIZE_P0 indicates the number of bytes used to store + * each compressed coding unit in the first plane of the buffer. For RGBA buffers + * this is the only plane, while for semi-planar and fully-planar YUV buffers, + * this corresponds to the luma plane. + * + * AFRC_FORMAT_MOD_CU_SIZE_P12 indicates the number of bytes used to store + * each compressed coding unit in the second and third planes in the buffer. + * For semi-planar and fully-planar YUV buffers, this corresponds to the chroma plane(s). + * + * For single-plane buffers, AFRC_FORMAT_MOD_CU_SIZE_P0 must be specified + * and AFRC_FORMAT_MOD_CU_SIZE_P12 must be zero. + * For semi-planar and fully-planar buffers, both AFRC_FORMAT_MOD_CU_SIZE_P0 and + * AFRC_FORMAT_MOD_CU_SIZE_P12 must be specified. + */ +#define AFRC_FORMAT_MOD_CU_SIZE_MASK 0xf +#define AFRC_FORMAT_MOD_CU_SIZE_16 (1ULL) +#define AFRC_FORMAT_MOD_CU_SIZE_24 (2ULL) +#define AFRC_FORMAT_MOD_CU_SIZE_32 (3ULL) + +#define AFRC_FORMAT_MOD_CU_SIZE_P0(__afrc_cu_size) (__afrc_cu_size) +#define AFRC_FORMAT_MOD_CU_SIZE_P12(__afrc_cu_size) ((__afrc_cu_size) << 4) + +/* + * AFRC scanline memory layout. + * + * Indicates if the buffer uses the scanline-optimised layout + * for an AFRC encoded buffer, otherwise, it uses the rotation-optimised layout. + * The memory layout is the same for all planes. + */ +#define AFRC_FORMAT_MOD_LAYOUT_SCAN (1ULL << 8) + +/* * Arm 16x16 Block U-Interleaved modifier * * This is used by Arm Mali Utgard and Midgard GPUs. It divides the image diff --git a/include/uapi/drm/drm_mode.h b/include/uapi/drm/drm_mode.h index 9b6722d45f36..90c55383f1ee 100644 --- a/include/uapi/drm/drm_mode.h +++ b/include/uapi/drm/drm_mode.h @@ -312,16 +312,48 @@ struct drm_mode_set_plane { __u32 src_w; }; +/** + * struct drm_mode_get_plane - Get plane metadata. + * + * Userspace can perform a GETPLANE ioctl to retrieve information about a + * plane. + * + * To retrieve the number of formats supported, set @count_format_types to zero + * and call the ioctl. @count_format_types will be updated with the value. + * + * To retrieve these formats, allocate an array with the memory needed to store + * @count_format_types formats. Point @format_type_ptr to this array and call + * the ioctl again (with @count_format_types still set to the value returned in + * the first ioctl call). + */ struct drm_mode_get_plane { + /** + * @plane_id: Object ID of the plane whose information should be + * retrieved. Set by caller. + */ __u32 plane_id; + /** @crtc_id: Object ID of the current CRTC. */ __u32 crtc_id; + /** @fb_id: Object ID of the current fb. */ __u32 fb_id; + /** + * @possible_crtcs: Bitmask of CRTC's compatible with the plane. CRTC's + * are created and they receive an index, which corresponds to their + * position in the bitmask. Bit N corresponds to + * :ref:`CRTC index<crtc_index>` N. + */ __u32 possible_crtcs; + /** @gamma_size: Never used. */ __u32 gamma_size; + /** @count_format_types: Number of formats. */ __u32 count_format_types; + /** + * @format_type_ptr: Pointer to ``__u32`` array of formats that are + * supported by the plane. These formats do not require modifiers. + */ __u64 format_type_ptr; }; @@ -509,22 +541,74 @@ struct drm_mode_get_connector { */ #define DRM_MODE_PROP_ATOMIC 0x80000000 +/** + * struct drm_mode_property_enum - Description for an enum/bitfield entry. + * @value: numeric value for this enum entry. + * @name: symbolic name for this enum entry. + * + * See struct drm_property_enum for details. + */ struct drm_mode_property_enum { __u64 value; char name[DRM_PROP_NAME_LEN]; }; +/** + * struct drm_mode_get_property - Get property metadata. + * + * User-space can perform a GETPROPERTY ioctl to retrieve information about a + * property. The same property may be attached to multiple objects, see + * "Modeset Base Object Abstraction". + * + * The meaning of the @values_ptr field changes depending on the property type. + * See &drm_property.flags for more details. + * + * The @enum_blob_ptr and @count_enum_blobs fields are only meaningful when the + * property has the type &DRM_MODE_PROP_ENUM or &DRM_MODE_PROP_BITMASK. For + * backwards compatibility, the kernel will always set @count_enum_blobs to + * zero when the property has the type &DRM_MODE_PROP_BLOB. User-space must + * ignore these two fields if the property has a different type. + * + * User-space is expected to retrieve values and enums by performing this ioctl + * at least twice: the first time to retrieve the number of elements, the + * second time to retrieve the elements themselves. + * + * To retrieve the number of elements, set @count_values and @count_enum_blobs + * to zero, then call the ioctl. @count_values will be updated with the number + * of elements. If the property has the type &DRM_MODE_PROP_ENUM or + * &DRM_MODE_PROP_BITMASK, @count_enum_blobs will be updated as well. + * + * To retrieve the elements themselves, allocate an array for @values_ptr and + * set @count_values to its capacity. If the property has the type + * &DRM_MODE_PROP_ENUM or &DRM_MODE_PROP_BITMASK, allocate an array for + * @enum_blob_ptr and set @count_enum_blobs to its capacity. Calling the ioctl + * again will fill the arrays. + */ struct drm_mode_get_property { - __u64 values_ptr; /* values and blob lengths */ - __u64 enum_blob_ptr; /* enum and blob id ptrs */ + /** @values_ptr: Pointer to a ``__u64`` array. */ + __u64 values_ptr; + /** @enum_blob_ptr: Pointer to a struct drm_mode_property_enum array. */ + __u64 enum_blob_ptr; + /** + * @prop_id: Object ID of the property which should be retrieved. Set + * by the caller. + */ __u32 prop_id; + /** + * @flags: ``DRM_MODE_PROP_*`` bitfield. See &drm_property.flags for + * a definition of the flags. + */ __u32 flags; + /** + * @name: Symbolic property name. User-space should use this field to + * recognize properties. + */ char name[DRM_PROP_NAME_LEN]; + /** @count_values: Number of elements in @values_ptr. */ __u32 count_values; - /* This is only used to count enum values, not blobs. The _blobs is - * simply because of a historical reason, i.e. backwards compat. */ + /** @count_enum_blobs: Number of elements in @enum_blob_ptr. */ __u32 count_enum_blobs; }; diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index c2c7759b7d2e..bde5860b3686 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -572,6 +572,15 @@ typedef struct drm_i915_irq_wait { #define I915_SCHEDULER_CAP_PREEMPTION (1ul << 2) #define I915_SCHEDULER_CAP_SEMAPHORES (1ul << 3) #define I915_SCHEDULER_CAP_ENGINE_BUSY_STATS (1ul << 4) +/* + * Indicates the 2k user priority levels are statically mapped into 3 buckets as + * follows: + * + * -1k to -1 Low priority + * 0 Normal priority + * 1 to 1k Highest priority + */ +#define I915_SCHEDULER_CAP_STATIC_PRIORITY_MAP (1ul << 5) #define I915_PARAM_HUC_STATUS 42 @@ -674,6 +683,9 @@ typedef struct drm_i915_irq_wait { */ #define I915_PARAM_HAS_EXEC_TIMELINE_FENCES 55 +/* Query if the kernel supports the I915_USERPTR_PROBE flag. */ +#define I915_PARAM_HAS_USERPTR_PROBE 56 + /* Must be kept compact -- no holes and well documented */ typedef struct drm_i915_getparam { @@ -849,45 +861,113 @@ struct drm_i915_gem_mmap_gtt { __u64 offset; }; +/** + * struct drm_i915_gem_mmap_offset - Retrieve an offset so we can mmap this buffer object. + * + * This struct is passed as argument to the `DRM_IOCTL_I915_GEM_MMAP_OFFSET` ioctl, + * and is used to retrieve the fake offset to mmap an object specified by &handle. + * + * The legacy way of using `DRM_IOCTL_I915_GEM_MMAP` is removed on gen12+. + * `DRM_IOCTL_I915_GEM_MMAP_GTT` is an older supported alias to this struct, but will behave + * as setting the &extensions to 0, and &flags to `I915_MMAP_OFFSET_GTT`. + */ struct drm_i915_gem_mmap_offset { - /** Handle for the object being mapped. */ + /** @handle: Handle for the object being mapped. */ __u32 handle; + /** @pad: Must be zero */ __u32 pad; /** - * Fake offset to use for subsequent mmap call + * @offset: The fake offset to use for subsequent mmap call * * This is a fixed-size type for 32/64 compatibility. */ __u64 offset; /** - * Flags for extended behaviour. + * @flags: Flags for extended behaviour. + * + * It is mandatory that one of the `MMAP_OFFSET` types + * should be included: * - * It is mandatory that one of the MMAP_OFFSET types - * (GTT, WC, WB, UC, etc) should be included. + * - `I915_MMAP_OFFSET_GTT`: Use mmap with the object bound to GTT. (Write-Combined) + * - `I915_MMAP_OFFSET_WC`: Use Write-Combined caching. + * - `I915_MMAP_OFFSET_WB`: Use Write-Back caching. + * - `I915_MMAP_OFFSET_FIXED`: Use object placement to determine caching. + * + * On devices with local memory `I915_MMAP_OFFSET_FIXED` is the only valid + * type. On devices without local memory, this caching mode is invalid. + * + * As caching mode when specifying `I915_MMAP_OFFSET_FIXED`, WC or WB will + * be used, depending on the object placement on creation. WB will be used + * when the object can only exist in system memory, WC otherwise. */ __u64 flags; -#define I915_MMAP_OFFSET_GTT 0 -#define I915_MMAP_OFFSET_WC 1 -#define I915_MMAP_OFFSET_WB 2 -#define I915_MMAP_OFFSET_UC 3 - /* - * Zero-terminated chain of extensions. +#define I915_MMAP_OFFSET_GTT 0 +#define I915_MMAP_OFFSET_WC 1 +#define I915_MMAP_OFFSET_WB 2 +#define I915_MMAP_OFFSET_UC 3 +#define I915_MMAP_OFFSET_FIXED 4 + + /** + * @extensions: Zero-terminated chain of extensions. * * No current extensions defined; mbz. */ __u64 extensions; }; +/** + * struct drm_i915_gem_set_domain - Adjust the objects write or read domain, in + * preparation for accessing the pages via some CPU domain. + * + * Specifying a new write or read domain will flush the object out of the + * previous domain(if required), before then updating the objects domain + * tracking with the new domain. + * + * Note this might involve waiting for the object first if it is still active on + * the GPU. + * + * Supported values for @read_domains and @write_domain: + * + * - I915_GEM_DOMAIN_WC: Uncached write-combined domain + * - I915_GEM_DOMAIN_CPU: CPU cache domain + * - I915_GEM_DOMAIN_GTT: Mappable aperture domain + * + * All other domains are rejected. + * + * Note that for discrete, starting from DG1, this is no longer supported, and + * is instead rejected. On such platforms the CPU domain is effectively static, + * where we also only support a single &drm_i915_gem_mmap_offset cache mode, + * which can't be set explicitly and instead depends on the object placements, + * as per the below. + * + * Implicit caching rules, starting from DG1: + * + * - If any of the object placements (see &drm_i915_gem_create_ext_memory_regions) + * contain I915_MEMORY_CLASS_DEVICE then the object will be allocated and + * mapped as write-combined only. + * + * - Everything else is always allocated and mapped as write-back, with the + * guarantee that everything is also coherent with the GPU. + * + * Note that this is likely to change in the future again, where we might need + * more flexibility on future devices, so making this all explicit as part of a + * new &drm_i915_gem_create_ext extension is probable. + */ struct drm_i915_gem_set_domain { - /** Handle for the object */ + /** @handle: Handle for the object. */ __u32 handle; - /** New read domains */ + /** @read_domains: New read domains. */ __u32 read_domains; - /** New write domain */ + /** + * @write_domain: New write domain. + * + * Note that having something in the write domain implies it's in the + * read domain, and only that read domain. + */ __u32 write_domain; }; @@ -1348,12 +1428,11 @@ struct drm_i915_gem_busy { * reading from the object simultaneously. * * The value of each engine class is the same as specified in the - * I915_CONTEXT_SET_ENGINES parameter and via perf, i.e. + * I915_CONTEXT_PARAM_ENGINES context parameter and via perf, i.e. * I915_ENGINE_CLASS_RENDER, I915_ENGINE_CLASS_COPY, etc. - * reported as active itself. Some hardware may have parallel - * execution engines, e.g. multiple media engines, which are - * mapped to the same class identifier and so are not separately - * reported for busyness. + * Some hardware may have parallel execution engines, e.g. multiple + * media engines, which are mapped to the same class identifier and so + * are not separately reported for busyness. * * Caveat emptor: * Only the boolean result of this query is reliable; that is whether @@ -1364,43 +1443,79 @@ struct drm_i915_gem_busy { }; /** - * I915_CACHING_NONE - * - * GPU access is not coherent with cpu caches. Default for machines without an - * LLC. - */ -#define I915_CACHING_NONE 0 -/** - * I915_CACHING_CACHED - * - * GPU access is coherent with cpu caches and furthermore the data is cached in - * last-level caches shared between cpu cores and the gpu GT. Default on - * machines with HAS_LLC. + * struct drm_i915_gem_caching - Set or get the caching for given object + * handle. + * + * Allow userspace to control the GTT caching bits for a given object when the + * object is later mapped through the ppGTT(or GGTT on older platforms lacking + * ppGTT support, or if the object is used for scanout). Note that this might + * require unbinding the object from the GTT first, if its current caching value + * doesn't match. + * + * Note that this all changes on discrete platforms, starting from DG1, the + * set/get caching is no longer supported, and is now rejected. Instead the CPU + * caching attributes(WB vs WC) will become an immutable creation time property + * for the object, along with the GTT caching level. For now we don't expose any + * new uAPI for this, instead on DG1 this is all implicit, although this largely + * shouldn't matter since DG1 is coherent by default(without any way of + * controlling it). + * + * Implicit caching rules, starting from DG1: + * + * - If any of the object placements (see &drm_i915_gem_create_ext_memory_regions) + * contain I915_MEMORY_CLASS_DEVICE then the object will be allocated and + * mapped as write-combined only. + * + * - Everything else is always allocated and mapped as write-back, with the + * guarantee that everything is also coherent with the GPU. + * + * Note that this is likely to change in the future again, where we might need + * more flexibility on future devices, so making this all explicit as part of a + * new &drm_i915_gem_create_ext extension is probable. + * + * Side note: Part of the reason for this is that changing the at-allocation-time CPU + * caching attributes for the pages might be required(and is expensive) if we + * need to then CPU map the pages later with different caching attributes. This + * inconsistent caching behaviour, while supported on x86, is not universally + * supported on other architectures. So for simplicity we opt for setting + * everything at creation time, whilst also making it immutable, on discrete + * platforms. */ -#define I915_CACHING_CACHED 1 -/** - * I915_CACHING_DISPLAY - * - * Special GPU caching mode which is coherent with the scanout engines. - * Transparently falls back to I915_CACHING_NONE on platforms where no special - * cache mode (like write-through or gfdt flushing) is available. The kernel - * automatically sets this mode when using a buffer as a scanout target. - * Userspace can manually set this mode to avoid a costly stall and clflush in - * the hotpath of drawing the first frame. - */ -#define I915_CACHING_DISPLAY 2 - struct drm_i915_gem_caching { /** - * Handle of the buffer to set/get the caching level of. */ + * @handle: Handle of the buffer to set/get the caching level. + */ __u32 handle; /** - * Cacheing level to apply or return value + * @caching: The GTT caching level to apply or possible return value. + * + * The supported @caching values: * - * bits0-15 are for generic caching control (i.e. the above defined - * values). bits16-31 are reserved for platform-specific variations - * (e.g. l3$ caching on gen7). */ + * I915_CACHING_NONE: + * + * GPU access is not coherent with CPU caches. Default for machines + * without an LLC. This means manual flushing might be needed, if we + * want GPU access to be coherent. + * + * I915_CACHING_CACHED: + * + * GPU access is coherent with CPU caches and furthermore the data is + * cached in last-level caches shared between CPU cores and the GPU GT. + * + * I915_CACHING_DISPLAY: + * + * Special GPU caching mode which is coherent with the scanout engines. + * Transparently falls back to I915_CACHING_NONE on platforms where no + * special cache mode (like write-through or gfdt flushing) is + * available. The kernel automatically sets this mode when using a + * buffer as a scanout target. Userspace can manually set this mode to + * avoid a costly stall and clflush in the hotpath of drawing the first + * frame. + */ +#define I915_CACHING_NONE 0 +#define I915_CACHING_CACHED 1 +#define I915_CACHING_DISPLAY 2 __u32 caching; }; @@ -1639,6 +1754,10 @@ struct drm_i915_gem_context_param { __u32 size; __u64 param; #define I915_CONTEXT_PARAM_BAN_PERIOD 0x1 +/* I915_CONTEXT_PARAM_NO_ZEROMAP has been removed. On the off chance + * someone somewhere has attempted to use it, never re-use this context + * param number. + */ #define I915_CONTEXT_PARAM_NO_ZEROMAP 0x2 #define I915_CONTEXT_PARAM_GTT_SIZE 0x3 #define I915_CONTEXT_PARAM_NO_ERROR_CAPTURE 0x4 @@ -1723,24 +1842,8 @@ struct drm_i915_gem_context_param { */ #define I915_CONTEXT_PARAM_PERSISTENCE 0xb -/* - * I915_CONTEXT_PARAM_RINGSIZE: - * - * Sets the size of the CS ringbuffer to use for logical ring contexts. This - * applies a limit of how many batches can be queued to HW before the caller - * is blocked due to lack of space for more commands. - * - * Only reliably possible to be set prior to first use, i.e. during - * construction. At any later point, the current execution must be flushed as - * the ring can only be changed while the context is idle. Note, the ringsize - * can be specified as a constructor property, see - * I915_CONTEXT_CREATE_EXT_SETPARAM, but can also be set later if required. - * - * Only applies to the current set of engine and lost when those engines - * are replaced by a new mapping (see I915_CONTEXT_PARAM_ENGINES). - * - * Must be between 4 - 512 KiB, in intervals of page size [4 KiB]. - * Default is 16 KiB. +/* This API has been removed. On the off chance someone somewhere has + * attempted to use it, never re-use this context param number. */ #define I915_CONTEXT_PARAM_RINGSIZE 0xc /* Must be kept compact -- no holes and well documented */ @@ -1807,6 +1910,69 @@ struct drm_i915_gem_context_param_sseu { __u32 rsvd; }; +/** + * DOC: Virtual Engine uAPI + * + * Virtual engine is a concept where userspace is able to configure a set of + * physical engines, submit a batch buffer, and let the driver execute it on any + * engine from the set as it sees fit. + * + * This is primarily useful on parts which have multiple instances of a same + * class engine, like for example GT3+ Skylake parts with their two VCS engines. + * + * For instance userspace can enumerate all engines of a certain class using the + * previously described `Engine Discovery uAPI`_. After that userspace can + * create a GEM context with a placeholder slot for the virtual engine (using + * `I915_ENGINE_CLASS_INVALID` and `I915_ENGINE_CLASS_INVALID_NONE` for class + * and instance respectively) and finally using the + * `I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE` extension place a virtual engine in + * the same reserved slot. + * + * Example of creating a virtual engine and submitting a batch buffer to it: + * + * .. code-block:: C + * + * I915_DEFINE_CONTEXT_ENGINES_LOAD_BALANCE(virtual, 2) = { + * .base.name = I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE, + * .engine_index = 0, // Place this virtual engine into engine map slot 0 + * .num_siblings = 2, + * .engines = { { I915_ENGINE_CLASS_VIDEO, 0 }, + * { I915_ENGINE_CLASS_VIDEO, 1 }, }, + * }; + * I915_DEFINE_CONTEXT_PARAM_ENGINES(engines, 1) = { + * .engines = { { I915_ENGINE_CLASS_INVALID, + * I915_ENGINE_CLASS_INVALID_NONE } }, + * .extensions = to_user_pointer(&virtual), // Chains after load_balance extension + * }; + * struct drm_i915_gem_context_create_ext_setparam p_engines = { + * .base = { + * .name = I915_CONTEXT_CREATE_EXT_SETPARAM, + * }, + * .param = { + * .param = I915_CONTEXT_PARAM_ENGINES, + * .value = to_user_pointer(&engines), + * .size = sizeof(engines), + * }, + * }; + * struct drm_i915_gem_context_create_ext create = { + * .flags = I915_CONTEXT_CREATE_FLAGS_USE_EXTENSIONS, + * .extensions = to_user_pointer(&p_engines); + * }; + * + * ctx_id = gem_context_create_ext(drm_fd, &create); + * + * // Now we have created a GEM context with its engine map containing a + * // single virtual engine. Submissions to this slot can go either to + * // vcs0 or vcs1, depending on the load balancing algorithm used inside + * // the driver. The load balancing is dynamic from one batch buffer to + * // another and transparent to userspace. + * + * ... + * execbuf.rsvd1 = ctx_id; + * execbuf.flags = 0; // Submits to index 0 which is the virtual engine + * gem_execbuf(drm_fd, &execbuf); + */ + /* * i915_context_engines_load_balance: * @@ -1883,6 +2049,61 @@ struct i915_context_engines_bond { struct i915_engine_class_instance engines[N__]; \ } __attribute__((packed)) name__ +/** + * DOC: Context Engine Map uAPI + * + * Context engine map is a new way of addressing engines when submitting batch- + * buffers, replacing the existing way of using identifiers like `I915_EXEC_BLT` + * inside the flags field of `struct drm_i915_gem_execbuffer2`. + * + * To use it created GEM contexts need to be configured with a list of engines + * the user is intending to submit to. This is accomplished using the + * `I915_CONTEXT_PARAM_ENGINES` parameter and `struct + * i915_context_param_engines`. + * + * For such contexts the `I915_EXEC_RING_MASK` field becomes an index into the + * configured map. + * + * Example of creating such context and submitting against it: + * + * .. code-block:: C + * + * I915_DEFINE_CONTEXT_PARAM_ENGINES(engines, 2) = { + * .engines = { { I915_ENGINE_CLASS_RENDER, 0 }, + * { I915_ENGINE_CLASS_COPY, 0 } } + * }; + * struct drm_i915_gem_context_create_ext_setparam p_engines = { + * .base = { + * .name = I915_CONTEXT_CREATE_EXT_SETPARAM, + * }, + * .param = { + * .param = I915_CONTEXT_PARAM_ENGINES, + * .value = to_user_pointer(&engines), + * .size = sizeof(engines), + * }, + * }; + * struct drm_i915_gem_context_create_ext create = { + * .flags = I915_CONTEXT_CREATE_FLAGS_USE_EXTENSIONS, + * .extensions = to_user_pointer(&p_engines); + * }; + * + * ctx_id = gem_context_create_ext(drm_fd, &create); + * + * // We have now created a GEM context with two engines in the map: + * // Index 0 points to rcs0 while index 1 points to bcs0. Other engines + * // will not be accessible from this context. + * + * ... + * execbuf.rsvd1 = ctx_id; + * execbuf.flags = 0; // Submits to index 0, which is rcs0 for this context + * gem_execbuf(drm_fd, &execbuf); + * + * ... + * execbuf.rsvd1 = ctx_id; + * execbuf.flags = 1; // Submits to index 0, which is bcs0 for this context + * gem_execbuf(drm_fd, &execbuf); + */ + struct i915_context_param_engines { __u64 extensions; /* linked chain of extension blocks, 0 terminates */ #define I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE 0 /* see i915_context_engines_load_balance */ @@ -1901,20 +2122,10 @@ struct drm_i915_gem_context_create_ext_setparam { struct drm_i915_gem_context_param param; }; -struct drm_i915_gem_context_create_ext_clone { +/* This API has been removed. On the off chance someone somewhere has + * attempted to use it, never re-use this extension number. + */ #define I915_CONTEXT_CREATE_EXT_CLONE 1 - struct i915_user_extension base; - __u32 clone_id; - __u32 flags; -#define I915_CONTEXT_CLONE_ENGINES (1u << 0) -#define I915_CONTEXT_CLONE_FLAGS (1u << 1) -#define I915_CONTEXT_CLONE_SCHEDATTR (1u << 2) -#define I915_CONTEXT_CLONE_SSEU (1u << 3) -#define I915_CONTEXT_CLONE_TIMELINE (1u << 4) -#define I915_CONTEXT_CLONE_VM (1u << 5) -#define I915_CONTEXT_CLONE_UNKNOWN -(I915_CONTEXT_CLONE_VM << 1) - __u64 rsvd; -}; struct drm_i915_gem_context_destroy { __u32 ctx_id; @@ -1986,14 +2197,69 @@ struct drm_i915_reset_stats { __u32 pad; }; +/** + * struct drm_i915_gem_userptr - Create GEM object from user allocated memory. + * + * Userptr objects have several restrictions on what ioctls can be used with the + * object handle. + */ struct drm_i915_gem_userptr { + /** + * @user_ptr: The pointer to the allocated memory. + * + * Needs to be aligned to PAGE_SIZE. + */ __u64 user_ptr; + + /** + * @user_size: + * + * The size in bytes for the allocated memory. This will also become the + * object size. + * + * Needs to be aligned to PAGE_SIZE, and should be at least PAGE_SIZE, + * or larger. + */ __u64 user_size; + + /** + * @flags: + * + * Supported flags: + * + * I915_USERPTR_READ_ONLY: + * + * Mark the object as readonly, this also means GPU access can only be + * readonly. This is only supported on HW which supports readonly access + * through the GTT. If the HW can't support readonly access, an error is + * returned. + * + * I915_USERPTR_PROBE: + * + * Probe the provided @user_ptr range and validate that the @user_ptr is + * indeed pointing to normal memory and that the range is also valid. + * For example if some garbage address is given to the kernel, then this + * should complain. + * + * Returns -EFAULT if the probe failed. + * + * Note that this doesn't populate the backing pages, and also doesn't + * guarantee that the object will remain valid when the object is + * eventually used. + * + * The kernel supports this feature if I915_PARAM_HAS_USERPTR_PROBE + * returns a non-zero value. + * + * I915_USERPTR_UNSYNCHRONIZED: + * + * NOT USED. Setting this flag will result in an error. + */ __u32 flags; #define I915_USERPTR_READ_ONLY 0x1 +#define I915_USERPTR_PROBE 0x2 #define I915_USERPTR_UNSYNCHRONIZED 0x80000000 /** - * Returned handle for the object. + * @handle: Returned handle for the object. * * Object handles are nonzero. */ @@ -2377,6 +2643,76 @@ struct drm_i915_query_topology_info { }; /** + * DOC: Engine Discovery uAPI + * + * Engine discovery uAPI is a way of enumerating physical engines present in a + * GPU associated with an open i915 DRM file descriptor. This supersedes the old + * way of using `DRM_IOCTL_I915_GETPARAM` and engine identifiers like + * `I915_PARAM_HAS_BLT`. + * + * The need for this interface came starting with Icelake and newer GPUs, which + * started to establish a pattern of having multiple engines of a same class, + * where not all instances were always completely functionally equivalent. + * + * Entry point for this uapi is `DRM_IOCTL_I915_QUERY` with the + * `DRM_I915_QUERY_ENGINE_INFO` as the queried item id. + * + * Example for getting the list of engines: + * + * .. code-block:: C + * + * struct drm_i915_query_engine_info *info; + * struct drm_i915_query_item item = { + * .query_id = DRM_I915_QUERY_ENGINE_INFO; + * }; + * struct drm_i915_query query = { + * .num_items = 1, + * .items_ptr = (uintptr_t)&item, + * }; + * int err, i; + * + * // First query the size of the blob we need, this needs to be large + * // enough to hold our array of engines. The kernel will fill out the + * // item.length for us, which is the number of bytes we need. + * // + * // Alternatively a large buffer can be allocated straight away enabling + * // querying in one pass, in which case item.length should contain the + * // length of the provided buffer. + * err = ioctl(fd, DRM_IOCTL_I915_QUERY, &query); + * if (err) ... + * + * info = calloc(1, item.length); + * // Now that we allocated the required number of bytes, we call the ioctl + * // again, this time with the data_ptr pointing to our newly allocated + * // blob, which the kernel can then populate with info on all engines. + * item.data_ptr = (uintptr_t)&info, + * + * err = ioctl(fd, DRM_IOCTL_I915_QUERY, &query); + * if (err) ... + * + * // We can now access each engine in the array + * for (i = 0; i < info->num_engines; i++) { + * struct drm_i915_engine_info einfo = info->engines[i]; + * u16 class = einfo.engine.class; + * u16 instance = einfo.engine.instance; + * .... + * } + * + * free(info); + * + * Each of the enumerated engines, apart from being defined by its class and + * instance (see `struct i915_engine_class_instance`), also can have flags and + * capabilities defined as documented in i915_drm.h. + * + * For instance video engines which support HEVC encoding will have the + * `I915_VIDEO_CLASS_CAPABILITY_HEVC` capability bit set. + * + * Engine discovery only fully comes to its own when combined with the new way + * of addressing engines when submitting batch buffers using contexts with + * engine maps configured. + */ + +/** * struct drm_i915_engine_info * * Describes one engine and it's capabilities as known to the driver. diff --git a/include/uapi/drm/msm_drm.h b/include/uapi/drm/msm_drm.h index f075851021c3..6b8fffc28a50 100644 --- a/include/uapi/drm/msm_drm.h +++ b/include/uapi/drm/msm_drm.h @@ -73,11 +73,19 @@ struct drm_msm_timespec { #define MSM_PARAM_MAX_FREQ 0x04 #define MSM_PARAM_TIMESTAMP 0x05 #define MSM_PARAM_GMEM_BASE 0x06 -#define MSM_PARAM_NR_RINGS 0x07 +#define MSM_PARAM_PRIORITIES 0x07 /* The # of priority levels */ #define MSM_PARAM_PP_PGTABLE 0x08 /* => 1 for per-process pagetables, else 0 */ #define MSM_PARAM_FAULTS 0x09 #define MSM_PARAM_SUSPENDS 0x0a +/* For backwards compat. The original support for preemption was based on + * a single ring per priority level so # of priority levels equals the # + * of rings. With drm/scheduler providing additional levels of priority, + * the number of priorities is greater than the # of rings. The param is + * renamed to better reflect this. + */ +#define MSM_PARAM_NR_RINGS MSM_PARAM_PRIORITIES + struct drm_msm_param { __u32 pipe; /* in, MSM_PIPE_x */ __u32 param; /* in, MSM_PARAM_x */ @@ -304,6 +312,10 @@ struct drm_msm_gem_madvise { #define MSM_SUBMITQUEUE_FLAGS (0) +/* + * The submitqueue priority should be between 0 and MSM_PARAM_PRIORITIES-1, + * a lower numeric value is higher priority. + */ struct drm_msm_submitqueue { __u32 flags; /* in, MSM_SUBMITQUEUE_x */ __u32 prio; /* in, Priority level */ diff --git a/include/uapi/drm/tegra_drm.h b/include/uapi/drm/tegra_drm.h index c4df3c3668b3..94cfc306d50a 100644 --- a/include/uapi/drm/tegra_drm.h +++ b/include/uapi/drm/tegra_drm.h @@ -1,24 +1,5 @@ -/* - * Copyright (c) 2012-2013, NVIDIA CORPORATION. All rights reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ +/* SPDX-License-Identifier: MIT */ +/* Copyright (c) 2012-2020 NVIDIA Corporation */ #ifndef _UAPI_TEGRA_DRM_H_ #define _UAPI_TEGRA_DRM_H_ @@ -29,6 +10,8 @@ extern "C" { #endif +/* Tegra DRM legacy UAPI. Only enabled with STAGING */ + #define DRM_TEGRA_GEM_CREATE_TILED (1 << 0) #define DRM_TEGRA_GEM_CREATE_BOTTOM_UP (1 << 1) @@ -649,8 +632,8 @@ struct drm_tegra_gem_get_flags { #define DRM_TEGRA_SYNCPT_READ 0x02 #define DRM_TEGRA_SYNCPT_INCR 0x03 #define DRM_TEGRA_SYNCPT_WAIT 0x04 -#define DRM_TEGRA_OPEN_CHANNEL 0x05 -#define DRM_TEGRA_CLOSE_CHANNEL 0x06 +#define DRM_TEGRA_OPEN_CHANNEL 0x05 +#define DRM_TEGRA_CLOSE_CHANNEL 0x06 #define DRM_TEGRA_GET_SYNCPT 0x07 #define DRM_TEGRA_SUBMIT 0x08 #define DRM_TEGRA_GET_SYNCPT_BASE 0x09 @@ -674,6 +657,402 @@ struct drm_tegra_gem_get_flags { #define DRM_IOCTL_TEGRA_GEM_SET_FLAGS DRM_IOWR(DRM_COMMAND_BASE + DRM_TEGRA_GEM_SET_FLAGS, struct drm_tegra_gem_set_flags) #define DRM_IOCTL_TEGRA_GEM_GET_FLAGS DRM_IOWR(DRM_COMMAND_BASE + DRM_TEGRA_GEM_GET_FLAGS, struct drm_tegra_gem_get_flags) +/* New Tegra DRM UAPI */ + +/* + * Reported by the driver in the `capabilities` field. + * + * DRM_TEGRA_CHANNEL_CAP_CACHE_COHERENT: If set, the engine is cache coherent + * with regard to the system memory. + */ +#define DRM_TEGRA_CHANNEL_CAP_CACHE_COHERENT (1 << 0) + +struct drm_tegra_channel_open { + /** + * @host1x_class: [in] + * + * Host1x class of the engine that will be programmed using this + * channel. + */ + __u32 host1x_class; + + /** + * @flags: [in] + * + * Flags. + */ + __u32 flags; + + /** + * @context: [out] + * + * Opaque identifier corresponding to the opened channel. + */ + __u32 context; + + /** + * @version: [out] + * + * Version of the engine hardware. This can be used by userspace + * to determine how the engine needs to be programmed. + */ + __u32 version; + + /** + * @capabilities: [out] + * + * Flags describing the hardware capabilities. + */ + __u32 capabilities; + __u32 padding; +}; + +struct drm_tegra_channel_close { + /** + * @context: [in] + * + * Identifier of the channel to close. + */ + __u32 context; + __u32 padding; +}; + +/* + * Mapping flags that can be used to influence how the mapping is created. + * + * DRM_TEGRA_CHANNEL_MAP_READ: create mapping that allows HW read access + * DRM_TEGRA_CHANNEL_MAP_WRITE: create mapping that allows HW write access + */ +#define DRM_TEGRA_CHANNEL_MAP_READ (1 << 0) +#define DRM_TEGRA_CHANNEL_MAP_WRITE (1 << 1) +#define DRM_TEGRA_CHANNEL_MAP_READ_WRITE (DRM_TEGRA_CHANNEL_MAP_READ | \ + DRM_TEGRA_CHANNEL_MAP_WRITE) + +struct drm_tegra_channel_map { + /** + * @context: [in] + * + * Identifier of the channel to which make memory available for. + */ + __u32 context; + + /** + * @handle: [in] + * + * GEM handle of the memory to map. + */ + __u32 handle; + + /** + * @flags: [in] + * + * Flags. + */ + __u32 flags; + + /** + * @mapping: [out] + * + * Identifier corresponding to the mapping, to be used for + * relocations or unmapping later. + */ + __u32 mapping; +}; + +struct drm_tegra_channel_unmap { + /** + * @context: [in] + * + * Channel identifier of the channel to unmap memory from. + */ + __u32 context; + + /** + * @mapping: [in] + * + * Mapping identifier of the memory mapping to unmap. + */ + __u32 mapping; +}; + +/* Submission */ + +/** + * Specify that bit 39 of the patched-in address should be set to switch + * swizzling between Tegra and non-Tegra sector layout on systems that store + * surfaces in system memory in non-Tegra sector layout. + */ +#define DRM_TEGRA_SUBMIT_RELOC_SECTOR_LAYOUT (1 << 0) + +struct drm_tegra_submit_buf { + /** + * @mapping: [in] + * + * Identifier of the mapping to use in the submission. + */ + __u32 mapping; + + /** + * @flags: [in] + * + * Flags. + */ + __u32 flags; + + /** + * Information for relocation patching. + */ + struct { + /** + * @target_offset: [in] + * + * Offset from the start of the mapping of the data whose + * address is to be patched into the gather. + */ + __u64 target_offset; + + /** + * @gather_offset_words: [in] + * + * Offset in words from the start of the gather data to + * where the address should be patched into. + */ + __u32 gather_offset_words; + + /** + * @shift: [in] + * + * Number of bits the address should be shifted right before + * patching in. + */ + __u32 shift; + } reloc; +}; + +/** + * Execute `words` words of Host1x opcodes specified in the `gather_data_ptr` + * buffer. Each GATHER_UPTR command uses successive words from the buffer. + */ +#define DRM_TEGRA_SUBMIT_CMD_GATHER_UPTR 0 +/** + * Wait for a syncpoint to reach a value before continuing with further + * commands. + */ +#define DRM_TEGRA_SUBMIT_CMD_WAIT_SYNCPT 1 +/** + * Wait for a syncpoint to reach a value before continuing with further + * commands. The threshold is calculated relative to the start of the job. + */ +#define DRM_TEGRA_SUBMIT_CMD_WAIT_SYNCPT_RELATIVE 2 + +struct drm_tegra_submit_cmd_gather_uptr { + __u32 words; + __u32 reserved[3]; +}; + +struct drm_tegra_submit_cmd_wait_syncpt { + __u32 id; + __u32 value; + __u32 reserved[2]; +}; + +struct drm_tegra_submit_cmd { + /** + * @type: [in] + * + * Command type to execute. One of the DRM_TEGRA_SUBMIT_CMD* + * defines. + */ + __u32 type; + + /** + * @flags: [in] + * + * Flags. + */ + __u32 flags; + + union { + struct drm_tegra_submit_cmd_gather_uptr gather_uptr; + struct drm_tegra_submit_cmd_wait_syncpt wait_syncpt; + __u32 reserved[4]; + }; +}; + +struct drm_tegra_submit_syncpt { + /** + * @id: [in] + * + * ID of the syncpoint that the job will increment. + */ + __u32 id; + + /** + * @flags: [in] + * + * Flags. + */ + __u32 flags; + + /** + * @increments: [in] + * + * Number of times the job will increment this syncpoint. + */ + __u32 increments; + + /** + * @value: [out] + * + * Value the syncpoint will have once the job has completed all + * its specified syncpoint increments. + * + * Note that the kernel may increment the syncpoint before or after + * the job. These increments are not reflected in this field. + * + * If the job hangs or times out, not all of the increments may + * get executed. + */ + __u32 value; +}; + +struct drm_tegra_channel_submit { + /** + * @context: [in] + * + * Identifier of the channel to submit this job to. + */ + __u32 context; + + /** + * @num_bufs: [in] + * + * Number of elements in the `bufs_ptr` array. + */ + __u32 num_bufs; + + /** + * @num_cmds: [in] + * + * Number of elements in the `cmds_ptr` array. + */ + __u32 num_cmds; + + /** + * @gather_data_words: [in] + * + * Number of 32-bit words in the `gather_data_ptr` array. + */ + __u32 gather_data_words; + + /** + * @bufs_ptr: [in] + * + * Pointer to an array of drm_tegra_submit_buf structures. + */ + __u64 bufs_ptr; + + /** + * @cmds_ptr: [in] + * + * Pointer to an array of drm_tegra_submit_cmd structures. + */ + __u64 cmds_ptr; + + /** + * @gather_data_ptr: [in] + * + * Pointer to an array of Host1x opcodes to be used by GATHER_UPTR + * commands. + */ + __u64 gather_data_ptr; + + /** + * @syncobj_in: [in] + * + * Handle for DRM syncobj that will be waited before submission. + * Ignored if zero. + */ + __u32 syncobj_in; + + /** + * @syncobj_out: [in] + * + * Handle for DRM syncobj that will have its fence replaced with + * the job's completion fence. Ignored if zero. + */ + __u32 syncobj_out; + + /** + * @syncpt_incr: [in,out] + * + * Information about the syncpoint the job will increment. + */ + struct drm_tegra_submit_syncpt syncpt; +}; + +struct drm_tegra_syncpoint_allocate { + /** + * @id: [out] + * + * ID of allocated syncpoint. + */ + __u32 id; + __u32 padding; +}; + +struct drm_tegra_syncpoint_free { + /** + * @id: [in] + * + * ID of syncpoint to free. + */ + __u32 id; + __u32 padding; +}; + +struct drm_tegra_syncpoint_wait { + /** + * @timeout: [in] + * + * Absolute timestamp at which the wait will time out. + */ + __s64 timeout_ns; + + /** + * @id: [in] + * + * ID of syncpoint to wait on. + */ + __u32 id; + + /** + * @threshold: [in] + * + * Threshold to wait for. + */ + __u32 threshold; + + /** + * @value: [out] + * + * Value of the syncpoint upon wait completion. + */ + __u32 value; + + __u32 padding; +}; + +#define DRM_IOCTL_TEGRA_CHANNEL_OPEN DRM_IOWR(DRM_COMMAND_BASE + 0x10, struct drm_tegra_channel_open) +#define DRM_IOCTL_TEGRA_CHANNEL_CLOSE DRM_IOWR(DRM_COMMAND_BASE + 0x11, struct drm_tegra_channel_close) +#define DRM_IOCTL_TEGRA_CHANNEL_MAP DRM_IOWR(DRM_COMMAND_BASE + 0x12, struct drm_tegra_channel_map) +#define DRM_IOCTL_TEGRA_CHANNEL_UNMAP DRM_IOWR(DRM_COMMAND_BASE + 0x13, struct drm_tegra_channel_unmap) +#define DRM_IOCTL_TEGRA_CHANNEL_SUBMIT DRM_IOWR(DRM_COMMAND_BASE + 0x14, struct drm_tegra_channel_submit) + +#define DRM_IOCTL_TEGRA_SYNCPOINT_ALLOCATE DRM_IOWR(DRM_COMMAND_BASE + 0x20, struct drm_tegra_syncpoint_allocate) +#define DRM_IOCTL_TEGRA_SYNCPOINT_FREE DRM_IOWR(DRM_COMMAND_BASE + 0x21, struct drm_tegra_syncpoint_free) +#define DRM_IOCTL_TEGRA_SYNCPOINT_WAIT DRM_IOWR(DRM_COMMAND_BASE + 0x22, struct drm_tegra_syncpoint_wait) + #if defined(__cplusplus) } #endif diff --git a/include/uapi/drm/v3d_drm.h b/include/uapi/drm/v3d_drm.h index 1ce746e228d9..4104f22fb3d3 100644 --- a/include/uapi/drm/v3d_drm.h +++ b/include/uapi/drm/v3d_drm.h @@ -38,6 +38,9 @@ extern "C" { #define DRM_V3D_GET_BO_OFFSET 0x05 #define DRM_V3D_SUBMIT_TFU 0x06 #define DRM_V3D_SUBMIT_CSD 0x07 +#define DRM_V3D_PERFMON_CREATE 0x08 +#define DRM_V3D_PERFMON_DESTROY 0x09 +#define DRM_V3D_PERFMON_GET_VALUES 0x0a #define DRM_IOCTL_V3D_SUBMIT_CL DRM_IOWR(DRM_COMMAND_BASE + DRM_V3D_SUBMIT_CL, struct drm_v3d_submit_cl) #define DRM_IOCTL_V3D_WAIT_BO DRM_IOWR(DRM_COMMAND_BASE + DRM_V3D_WAIT_BO, struct drm_v3d_wait_bo) @@ -47,6 +50,12 @@ extern "C" { #define DRM_IOCTL_V3D_GET_BO_OFFSET DRM_IOWR(DRM_COMMAND_BASE + DRM_V3D_GET_BO_OFFSET, struct drm_v3d_get_bo_offset) #define DRM_IOCTL_V3D_SUBMIT_TFU DRM_IOW(DRM_COMMAND_BASE + DRM_V3D_SUBMIT_TFU, struct drm_v3d_submit_tfu) #define DRM_IOCTL_V3D_SUBMIT_CSD DRM_IOW(DRM_COMMAND_BASE + DRM_V3D_SUBMIT_CSD, struct drm_v3d_submit_csd) +#define DRM_IOCTL_V3D_PERFMON_CREATE DRM_IOWR(DRM_COMMAND_BASE + DRM_V3D_PERFMON_CREATE, \ + struct drm_v3d_perfmon_create) +#define DRM_IOCTL_V3D_PERFMON_DESTROY DRM_IOWR(DRM_COMMAND_BASE + DRM_V3D_PERFMON_DESTROY, \ + struct drm_v3d_perfmon_destroy) +#define DRM_IOCTL_V3D_PERFMON_GET_VALUES DRM_IOWR(DRM_COMMAND_BASE + DRM_V3D_PERFMON_GET_VALUES, \ + struct drm_v3d_perfmon_get_values) #define DRM_V3D_SUBMIT_CL_FLUSH_CACHE 0x01 @@ -127,6 +136,11 @@ struct drm_v3d_submit_cl { __u32 bo_handle_count; __u32 flags; + + /* ID of the perfmon to attach to this job. 0 means no perfmon. */ + __u32 perfmon_id; + + __u32 pad; }; /** @@ -195,6 +209,7 @@ enum drm_v3d_param { DRM_V3D_PARAM_SUPPORTS_TFU, DRM_V3D_PARAM_SUPPORTS_CSD, DRM_V3D_PARAM_SUPPORTS_CACHE_FLUSH, + DRM_V3D_PARAM_SUPPORTS_PERFMON, }; struct drm_v3d_get_param { @@ -258,6 +273,127 @@ struct drm_v3d_submit_csd { __u32 in_sync; /* Sync object to signal when the CSD job is done. */ __u32 out_sync; + + /* ID of the perfmon to attach to this job. 0 means no perfmon. */ + __u32 perfmon_id; +}; + +enum { + V3D_PERFCNT_FEP_VALID_PRIMTS_NO_PIXELS, + V3D_PERFCNT_FEP_VALID_PRIMS, + V3D_PERFCNT_FEP_EZ_NFCLIP_QUADS, + V3D_PERFCNT_FEP_VALID_QUADS, + V3D_PERFCNT_TLB_QUADS_STENCIL_FAIL, + V3D_PERFCNT_TLB_QUADS_STENCILZ_FAIL, + V3D_PERFCNT_TLB_QUADS_STENCILZ_PASS, + V3D_PERFCNT_TLB_QUADS_ZERO_COV, + V3D_PERFCNT_TLB_QUADS_NONZERO_COV, + V3D_PERFCNT_TLB_QUADS_WRITTEN, + V3D_PERFCNT_PTB_PRIM_VIEWPOINT_DISCARD, + V3D_PERFCNT_PTB_PRIM_CLIP, + V3D_PERFCNT_PTB_PRIM_REV, + V3D_PERFCNT_QPU_IDLE_CYCLES, + V3D_PERFCNT_QPU_ACTIVE_CYCLES_VERTEX_COORD_USER, + V3D_PERFCNT_QPU_ACTIVE_CYCLES_FRAG, + V3D_PERFCNT_QPU_CYCLES_VALID_INSTR, + V3D_PERFCNT_QPU_CYCLES_TMU_STALL, + V3D_PERFCNT_QPU_CYCLES_SCOREBOARD_STALL, + V3D_PERFCNT_QPU_CYCLES_VARYINGS_STALL, + V3D_PERFCNT_QPU_IC_HIT, + V3D_PERFCNT_QPU_IC_MISS, + V3D_PERFCNT_QPU_UC_HIT, + V3D_PERFCNT_QPU_UC_MISS, + V3D_PERFCNT_TMU_TCACHE_ACCESS, + V3D_PERFCNT_TMU_TCACHE_MISS, + V3D_PERFCNT_VPM_VDW_STALL, + V3D_PERFCNT_VPM_VCD_STALL, + V3D_PERFCNT_BIN_ACTIVE, + V3D_PERFCNT_RDR_ACTIVE, + V3D_PERFCNT_L2T_HITS, + V3D_PERFCNT_L2T_MISSES, + V3D_PERFCNT_CYCLE_COUNT, + V3D_PERFCNT_QPU_CYCLES_STALLED_VERTEX_COORD_USER, + V3D_PERFCNT_QPU_CYCLES_STALLED_FRAGMENT, + V3D_PERFCNT_PTB_PRIMS_BINNED, + V3D_PERFCNT_AXI_WRITES_WATCH_0, + V3D_PERFCNT_AXI_READS_WATCH_0, + V3D_PERFCNT_AXI_WRITE_STALLS_WATCH_0, + V3D_PERFCNT_AXI_READ_STALLS_WATCH_0, + V3D_PERFCNT_AXI_WRITE_BYTES_WATCH_0, + V3D_PERFCNT_AXI_READ_BYTES_WATCH_0, + V3D_PERFCNT_AXI_WRITES_WATCH_1, + V3D_PERFCNT_AXI_READS_WATCH_1, + V3D_PERFCNT_AXI_WRITE_STALLS_WATCH_1, + V3D_PERFCNT_AXI_READ_STALLS_WATCH_1, + V3D_PERFCNT_AXI_WRITE_BYTES_WATCH_1, + V3D_PERFCNT_AXI_READ_BYTES_WATCH_1, + V3D_PERFCNT_TLB_PARTIAL_QUADS, + V3D_PERFCNT_TMU_CONFIG_ACCESSES, + V3D_PERFCNT_L2T_NO_ID_STALL, + V3D_PERFCNT_L2T_COM_QUE_STALL, + V3D_PERFCNT_L2T_TMU_WRITES, + V3D_PERFCNT_TMU_ACTIVE_CYCLES, + V3D_PERFCNT_TMU_STALLED_CYCLES, + V3D_PERFCNT_CLE_ACTIVE, + V3D_PERFCNT_L2T_TMU_READS, + V3D_PERFCNT_L2T_CLE_READS, + V3D_PERFCNT_L2T_VCD_READS, + V3D_PERFCNT_L2T_TMUCFG_READS, + V3D_PERFCNT_L2T_SLC0_READS, + V3D_PERFCNT_L2T_SLC1_READS, + V3D_PERFCNT_L2T_SLC2_READS, + V3D_PERFCNT_L2T_TMU_W_MISSES, + V3D_PERFCNT_L2T_TMU_R_MISSES, + V3D_PERFCNT_L2T_CLE_MISSES, + V3D_PERFCNT_L2T_VCD_MISSES, + V3D_PERFCNT_L2T_TMUCFG_MISSES, + V3D_PERFCNT_L2T_SLC0_MISSES, + V3D_PERFCNT_L2T_SLC1_MISSES, + V3D_PERFCNT_L2T_SLC2_MISSES, + V3D_PERFCNT_CORE_MEM_WRITES, + V3D_PERFCNT_L2T_MEM_WRITES, + V3D_PERFCNT_PTB_MEM_WRITES, + V3D_PERFCNT_TLB_MEM_WRITES, + V3D_PERFCNT_CORE_MEM_READS, + V3D_PERFCNT_L2T_MEM_READS, + V3D_PERFCNT_PTB_MEM_READS, + V3D_PERFCNT_PSE_MEM_READS, + V3D_PERFCNT_TLB_MEM_READS, + V3D_PERFCNT_GMP_MEM_READS, + V3D_PERFCNT_PTB_W_MEM_WORDS, + V3D_PERFCNT_TLB_W_MEM_WORDS, + V3D_PERFCNT_PSE_R_MEM_WORDS, + V3D_PERFCNT_TLB_R_MEM_WORDS, + V3D_PERFCNT_TMU_MRU_HITS, + V3D_PERFCNT_COMPUTE_ACTIVE, + V3D_PERFCNT_NUM, +}; + +#define DRM_V3D_MAX_PERF_COUNTERS 32 + +struct drm_v3d_perfmon_create { + __u32 id; + __u32 ncounters; + __u8 counters[DRM_V3D_MAX_PERF_COUNTERS]; +}; + +struct drm_v3d_perfmon_destroy { + __u32 id; +}; + +/* + * Returns the values of the performance counters tracked by this + * perfmon (as an array of ncounters u64 values). + * + * No implicit synchronization is performed, so the user has to + * guarantee that any jobs using this perfmon have already been + * completed (probably by blocking on the seqno returned by the + * last exec that used the perfmon). + */ +struct drm_v3d_perfmon_get_values { + __u32 id; + __u32 pad; + __u64 values_ptr; }; #if defined(__cplusplus) diff --git a/include/uapi/drm/vmwgfx_drm.h b/include/uapi/drm/vmwgfx_drm.h index 02e917507479..9078775feb51 100644 --- a/include/uapi/drm/vmwgfx_drm.h +++ b/include/uapi/drm/vmwgfx_drm.h @@ -72,6 +72,9 @@ extern "C" { #define DRM_VMW_GB_SURFACE_CREATE_EXT 27 #define DRM_VMW_GB_SURFACE_REF_EXT 28 #define DRM_VMW_MSG 29 +#define DRM_VMW_MKSSTAT_RESET 30 +#define DRM_VMW_MKSSTAT_ADD 31 +#define DRM_VMW_MKSSTAT_REMOVE 32 /*************************************************************************/ /** @@ -1236,6 +1239,44 @@ struct drm_vmw_msg_arg { __u32 receive_len; }; +/** + * struct drm_vmw_mksstat_add_arg + * + * @stat: Pointer to user-space stat-counters array, page-aligned. + * @info: Pointer to user-space counter-infos array, page-aligned. + * @strs: Pointer to user-space stat strings, page-aligned. + * @stat_len: Length in bytes of stat-counters array. + * @info_len: Length in bytes of counter-infos array. + * @strs_len: Length in bytes of the stat strings, terminators included. + * @description: Pointer to instance descriptor string; will be truncated + * to MKS_GUEST_STAT_INSTANCE_DESC_LENGTH chars. + * @id: Output identifier of the produced record; -1 if error. + * + * Argument to the DRM_VMW_MKSSTAT_ADD ioctl. + */ +struct drm_vmw_mksstat_add_arg { + __u64 stat; + __u64 info; + __u64 strs; + __u64 stat_len; + __u64 info_len; + __u64 strs_len; + __u64 description; + __u64 id; +}; + +/** + * struct drm_vmw_mksstat_remove_arg + * + * @id: Identifier of the record being disposed, originally obtained through + * DRM_VMW_MKSSTAT_ADD ioctl. + * + * Argument to the DRM_VMW_MKSSTAT_REMOVE ioctl. + */ +struct drm_vmw_mksstat_remove_arg { + __u64 id; +}; + #if defined(__cplusplus) } #endif diff --git a/include/uapi/linux/dma-buf.h b/include/uapi/linux/dma-buf.h index 7f30393b92c3..8e4a2ca0bcbf 100644 --- a/include/uapi/linux/dma-buf.h +++ b/include/uapi/linux/dma-buf.h @@ -22,8 +22,56 @@ #include <linux/types.h> -/* begin/end dma-buf functions used for userspace mmap. */ +/** + * struct dma_buf_sync - Synchronize with CPU access. + * + * When a DMA buffer is accessed from the CPU via mmap, it is not always + * possible to guarantee coherency between the CPU-visible map and underlying + * memory. To manage coherency, DMA_BUF_IOCTL_SYNC must be used to bracket + * any CPU access to give the kernel the chance to shuffle memory around if + * needed. + * + * Prior to accessing the map, the client must call DMA_BUF_IOCTL_SYNC + * with DMA_BUF_SYNC_START and the appropriate read/write flags. Once the + * access is complete, the client should call DMA_BUF_IOCTL_SYNC with + * DMA_BUF_SYNC_END and the same read/write flags. + * + * The synchronization provided via DMA_BUF_IOCTL_SYNC only provides cache + * coherency. It does not prevent other processes or devices from + * accessing the memory at the same time. If synchronization with a GPU or + * other device driver is required, it is the client's responsibility to + * wait for buffer to be ready for reading or writing before calling this + * ioctl with DMA_BUF_SYNC_START. Likewise, the client must ensure that + * follow-up work is not submitted to GPU or other device driver until + * after this ioctl has been called with DMA_BUF_SYNC_END? + * + * If the driver or API with which the client is interacting uses implicit + * synchronization, waiting for prior work to complete can be done via + * poll() on the DMA buffer file descriptor. If the driver or API requires + * explicit synchronization, the client may have to wait on a sync_file or + * other synchronization primitive outside the scope of the DMA buffer API. + */ struct dma_buf_sync { + /** + * @flags: Set of access flags + * + * DMA_BUF_SYNC_START: + * Indicates the start of a map access session. + * + * DMA_BUF_SYNC_END: + * Indicates the end of a map access session. + * + * DMA_BUF_SYNC_READ: + * Indicates that the mapped DMA buffer will be read by the + * client via the CPU map. + * + * DMA_BUF_SYNC_WRITE: + * Indicates that the mapped DMA buffer will be written by the + * client via the CPU map. + * + * DMA_BUF_SYNC_RW: + * An alias for DMA_BUF_SYNC_READ | DMA_BUF_SYNC_WRITE. + */ __u64 flags; }; diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h index 3cb5b5dd9f77..af96af174dc4 100644 --- a/include/uapi/linux/kfd_ioctl.h +++ b/include/uapi/linux/kfd_ioctl.h @@ -31,9 +31,10 @@ * - 1.3 - Add SMI events support * - 1.4 - Indicate new SRAM EDC bit in device properties * - 1.5 - Add SVM API + * - 1.6 - Query clear flags in SVM get_attr API */ #define KFD_IOCTL_MAJOR_VERSION 1 -#define KFD_IOCTL_MINOR_VERSION 5 +#define KFD_IOCTL_MINOR_VERSION 6 struct kfd_ioctl_get_version_args { __u32 major_version; /* from KFD */ @@ -575,18 +576,19 @@ struct kfd_ioctl_svm_attribute { * @KFD_IOCTL_SVM_ATTR_PREFERRED_LOC or * @KFD_IOCTL_SVM_ATTR_PREFETCH_LOC resepctively. For * @KFD_IOCTL_SVM_ATTR_SET_FLAGS, flags of all pages will be - * aggregated by bitwise AND. The minimum migration granularity - * throughout the range will be returned for - * @KFD_IOCTL_SVM_ATTR_GRANULARITY. + * aggregated by bitwise AND. That means, a flag will be set in the + * output, if that flag is set for all pages in the range. For + * @KFD_IOCTL_SVM_ATTR_CLR_FLAGS, flags of all pages will be + * aggregated by bitwise NOR. That means, a flag will be set in the + * output, if that flag is clear for all pages in the range. + * The minimum migration granularity throughout the range will be + * returned for @KFD_IOCTL_SVM_ATTR_GRANULARITY. * * Querying of accessibility attributes works by initializing the * attribute type to @KFD_IOCTL_SVM_ATTR_ACCESS and the value to the * GPUID being queried. Multiple attributes can be given to allow * querying multiple GPUIDs. The ioctl function overwrites the * attribute type to indicate the access for the specified GPU. - * - * @KFD_IOCTL_SVM_ATTR_CLR_FLAGS is invalid for - * @KFD_IOCTL_SVM_OP_GET_ATTR. */ struct kfd_ioctl_svm_args { __u64 start_addr; |